コード例 #1
0
ファイル: levenshtein_test.go プロジェクト: aarzilli/tools
func inner(t *testing.T, cases *[]TestCase, wantIdx int, opt ls_core.Options, sortIt bool) {

	for i, tc := range *cases {

		m := ls_core.New(WrapAsEqualer(tc.src, sortIt), WrapAsEqualer(tc.dst, sortIt), opt)
		got, relDist := m.Distance()
		_ = relDist
		// fmt.Printf("%v %v\n", got, relDist)

		ssrc := fmt.Sprintf("%v", tc.src)
		sdst := fmt.Sprintf("%v", tc.dst)
		if got != tc.distances[wantIdx] {
			t.Logf(
				"%2v: Distance between %20v and %20v should be %v - but got %v (sorted %v)",
				i, stringspb.Ellipsoider(ssrc, 8), stringspb.Ellipsoider(sdst, 8), tc.distances[wantIdx], got, sortIt)
			t.Fail()
		}

		m.Print()
		fmt.Printf("\n")

		es := m.EditScript()

		got2 := m.ApplyEditScript(es)
		if !m.CompareToCol(got2) {
			t.Logf("\nwnt %v \ngot %v ", WrapAsEqualer(tc.dst, sortIt), got2)
			t.Fail()
		}

		fmt.Printf("\n")
		fmt.Printf("\n")

	}

}
コード例 #2
0
ファイル: 12_funcs_textify.go プロジェクト: aarzilli/tools
// img and a nodes are converted into text nodes.
func inlineNodeToText(n *html.Node) (ct string, ok bool) {

	if n.Type == html.ElementNode {
		switch n.Data {

		case "br":
			ct, ok = "sbr ", true

		case "input":
			name := attrX(n.Attr, "name")
			stype := attrX(n.Attr, "type")
			val := attrX(n.Attr, "value")
			ct = spf("[inp] %v %v %v", name, stype, val)
			ok = true

		case "img":
			src := attrX(n.Attr, "src")
			src = stringspb.Ellipsoider(src, 5)

			alt := attrX(n.Attr, "alt")
			title := attrX(n.Attr, "title")

			if alt == "" && title == "" {
				ct = spf("[img] %v ", src)
			} else if alt == "" {
				ct = spf("[img] %v hbr %v ", src, title)
			} else {
				ct = spf("[img] %v hbr %v hbr %v ", src, title, alt)

			}
			ok = true

		case "a":
			href := attrX(n.Attr, "href")
			href = stringspb.Ellipsoider(href, 5)

			title := attrX(n.Attr, "title")
			if title == "" {
				ct = spf("[a] %v ", href)
			} else {
				ct = spf("[a] %v hbr %v ", href, title)
			}
			ok = true

		}

	}

	return

}
コード例 #3
0
ファイル: 1_fileserver.go プロジェクト: aarzilli/tools
func dirListHtml(w http.ResponseWriter, r *http.Request, f fsi.File) {

	w.Header().Set("Content-Type", "text/html; charset=utf-8")

	for {
		dirs, err := f.Readdir(100)
		if err != nil || len(dirs) == 0 {
			break
		}
		for _, d := range dirs {
			name := d.Name()

			suffix := ""
			if d.IsDir() {
				suffix = "/"
			}

			linktitle := htmlReplacer.Replace(name)
			linktitle = stringspb.Ellipsoider(linktitle, 40)
			if d.IsDir() {
				linktitle = common.Directorify(linktitle)
			}

			surl := path.Join(r.URL.Path, name) + suffix + "?fmt=html"

			oneLine := spf("<a  style='display:inline-block;min-width:600px;' href=\"%s\">%s</a>", surl, linktitle)
			// wpf(w, " %v", d.ModTime().Format("2006-01-02 15:04:05 MST"))
			oneLine += spf(" %v<br>", d.ModTime().Format(time.RFC1123Z))
			wpf(w, oneLine)
		}
	}

}
コード例 #4
0
ファイル: fs.go プロジェクト: aarzilli/tools
func (h HttpFs) Open(name string) (http.File, error) {

	if strings.HasSuffix(name, "favicon.ico") {
		return nil, os.ErrNotExist
	}

	f, err := h.SourceFs.Open(name)
	if err == nil {

		// Gather som info
		stat, err := f.Stat()
		if err != nil {
			return nil, err
		}
		tp := "F"
		if stat.IsDir() {
			tp = "D"
		}
		fn := fmt.Sprintf("%v %v", f.Name(), tp)

		// report info
		log.Printf("httpfs open      %-22v fnd %-22v %v", name, fn, h.Name())

		// return fo as http.File
		if httpfile, ok := f.(http.File); ok {
			return httpfile, nil
		}
	}

	// otherwise: error logging
	log.Printf("httpfs open      %-22v     %-22v %v", name, "", h.Name())
	log.Printf("             err %-22v", stringspb.Ellipsoider(err.Error(), 24))

	return nil, err
}
コード例 #5
0
ファイル: html.go プロジェクト: aarzilli/tools
func CookieDump(r *http.Request) string {
	str := ""
	c := r.Cookies()
	for _, v := range c {
		s := fmt.Sprintf("%v", v)
		s = stringspb.Ellipsoider(s, 50)
		str += fmt.Sprintf("%v<br>\n", s)
	}
	return str
}
コード例 #6
0
ファイル: 4_fetch.go プロジェクト: aarzilli/tools
// stuffStage1 ranges over the RSS entries and filters out unwanted directories.
// Wanted urls are sent to the stage one channel.
func stuffStage1(w http.ResponseWriter, r *http.Request, config FetchCommand,
	inn chan *FullArticle, fin chan struct{}, dirTree *DirTree,
	uriPrefixExcl, uriPrefixIncl string, nWant int) (nFound int) {

	lg, lge := loghttp.Logger(w, r)
	_ = lge

	subtree, head := DiveToDeepestMatch(dirTree, uriPrefixIncl)

	if subtree == nil {
		lg("      does not exist in dirtree: %q", uriPrefixIncl)
	} else {

		opt := LevelWiseDeeperOptions{}
		opt.Rump = head
		opt.ExcludeDir = uriPrefixExcl
		opt.MaxDepthDiff = config.DepthTolerance
		opt.CondenseTrailingDirs = config.CondenseTrailingDirs
		opt.MaxNumber = nWant
		articles := LevelWiseDeeper(w, r, subtree, opt)
		// lg("      levelwise deeper found %v articles", len(articles))

		for _, art := range articles {

			lg("    feed #%02v: %v - %v", nFound, art.Mod.Format("15:04:05"), stringspb.Ellipsoider(art.Url, 50))

			art.Url = config.Host + art.Url

			select {
			case inn <- &art:
				// stage 1 loading
			case <-fin:
				lg("downstream stage has shut down, stop stuffing stage1")
				return
			}

			nFound++
			if nFound > nWant-1 {
				return
			}

		}

	}

	return

}
コード例 #7
0
ファイル: query with cursor.go プロジェクト: aarzilli/tools
func guestViewCursor(w http.ResponseWriter, r *http.Request, m map[string]interface{}) {

	c := appengine.NewContext(r)

	q := ds.NewQuery(gbp.GbEntryKind)
	q.Order("-Date")

	b1 := new(bytes.Buffer)

	cur_start, err := memcache.Get(c, "greeting_cursor")
	if err == nil {
		str_curs := string(cur_start.Value)
		if len(cur_start.Value) > 0 {
			cursor, err := ds.DecodeCursor(str_curs) //  inverse is string()
			loghttp.E(w, r, err, false)
			if err == nil {
				b1.WriteString("found cursor from memcache -" + stringspb.Ellipsoider(str_curs, 10) + "-<br>\n")
				q = q.Start(cursor)
			}
		}
	}

	iter := q.Run(c)
	var cntr int = 0
	for {
		var g gbp.GbEntryRetr
		cntr++
		if cntr > 2 {
			b1.WriteString("  batch complete -" + string(cntr) + "-<br>\n")
			break

		}

		_, err := iter.Next(&g)
		if err == ds.Done {
			b1.WriteString("scan complete -" + string(cntr) + "-<br>\n")
			break
		}

		if fmt.Sprintf("%T", err) == fmt.Sprintf("%T", new(ds.ErrFieldMismatch)) {
			err = nil // ignore this one - it's caused by our deliberate differences between gbsaveEntry and gbEntrieRetr
		}

		if err != nil {
			b1.WriteString("error fetching next: " + err.Error() + "<br>\n")
			break
		}

		b1.WriteString("  - " + g.String())
	}

	// Get updated cursor and store it for next time.
	if cur_end, err := iter.Cursor(); err == nil {

		str_c_end := cur_end.String() //  inverse is decode()
		val := []byte(str_c_end)

		mi_save := &memcache.Item{
			Key:        "greeting_cursor",
			Value:      val,
			Expiration: 60 * time.Second,
		}

		if err := memcache.Set(c, mi_save); err != nil {
			b1.WriteString("error adding memcache item " + err.Error() + "<br>\n")
		} else {
			b1.WriteString("wrote cursor to memcache -" + stringspb.Ellipsoider(str_c_end, 10) + "-<br>\n")
		}

	} else {
		b1.WriteString("could not retrieve cursor_end " + err.Error() + "<br>\n")
	}

	w.Header().Set("Content-Type", "text/html")
	w.Write(b1.Bytes())

	w.Write([]byte("<br>----<br>"))

}
コード例 #8
0
ファイル: tokenize.go プロジェクト: aarzilli/tools
func cleanseHtml(r io.Reader) (*bytes.Buffer, error) {

	skip := map[string]string{
		"script":   "skip",
		"noscript": "skip",
		"link":     "skip",
		"meta":     "skip",
		"iframe":   "skip",
	}

	b := new(bytes.Buffer)

	d := html.NewTokenizer(r)
	cntrErr := 0
	cntrTkn := 0
	fuckOff := false
	for {
		tokenType := d.Next()
		cntrTkn++

		if tokenType == html.ErrorToken {
			cntrErr++
			if cntrErr > 5 {
				return b, errors.New(spf("error loop at pos %v", cntrTkn))
			}
			continue
		}

		token := d.Token()
		s2 := strings.TrimSpace(string(token.Data))
		attr := getAttr(token.Attr)

		cntrErr = 0
		switch tokenType {
		case html.StartTagToken:
			if _, ok := skip[s2]; ok {
				fuckOff = true
			} else {
				s2 = "\n<" + s2 + attr + ">"
			}
		case html.EndTagToken: // </tag>
			if _, ok := skip[s2]; ok {
				fuckOff = false
				s2 = ""
			} else {
				// s2 = "</" + s2 + ">"
				s2 = "\n</" + s2 + ">\n"
			}
		case html.SelfClosingTagToken:
			if _, ok := skip[s2]; ok {
				s2 = ""
			} else {
				s2 = "\n<" + s2 + attr + "/>\n"
			}
		case html.DoctypeToken:
			s2 = "<!DOCTYPE " + s2 + `><meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>`

		case html.TextToken:
			// nothing
		case html.CommentToken:
			s2 = ""
		default:
			// nothing
		}

		if !fuckOff {
			b.WriteString(s2)
		} else {
			if s2 != "" {
				s2 = strings.Replace(s2, "\n", "", -1)
				s2 = stringspb.Ellipsoider(s2, 30)
				pf("skipped %v \n", s2)

			}
		}
	}
	return b, nil

}
コード例 #9
0
ファイル: tokenize.go プロジェクト: aarzilli/tools
// src http://golang-examples.tumblr.com/page/2
func decomposeHtml(r io.Reader) {

	// type Token struct {
	//     Type     TokenType
	//     DataAtom atom.Atom
	//     Data     string
	//     Attr     []Attribute
	// }
	// type Attribute struct {
	//     Namespace, Key, Val string
	// }

	skip := map[string]string{
		"meta":       "skip",
		"html":       "skip",
		"head":       "skip",
		"title":      "skip",
		"body":       "skip",
		"link":       "skip",
		"script":     "skip",
		"noscript":   "skip",
		"----------": "skip",
		"iframe":     "skip",
		"nav":        "skip",
		"form":       "skip",
	}
	histogram := map[string]interface{}{}

	d := html.NewTokenizer(r)
	cntrErr := 0
	cntrTkn := 0
	for {
		tokenType := d.Next()
		cntrTkn++

		if tokenType == html.ErrorToken {
			pf("#%v err ", cntrTkn)
			cntrErr++
			if cntrErr > 5 {
				break
			}
			continue
		}

		token := d.Token()
		cntrErr = 0
		s1 := strings.TrimSpace(spf(" %#v", token))
		s2 := strings.TrimSpace(string(token.Data))
		s3 := string(token.DataAtom)
		_, _, _ = s1, s2, s3

		switch tokenType {
		case html.StartTagToken, html.SelfClosingTagToken:
			if _, ok := skip[s2]; !ok {
				pf("\n%v ", s2)
				if _, ok := histogram[s2]; !ok {
					histogram[s2] = 1
				} else {
					val := histogram[s2].(int)
					histogram[s2] = val + 1
				}
			}
		case html.TextToken:
			if s2 != "" && len(s2) > 1 && !strings.HasPrefix(s2, `//`) {
				s2 = strings.Replace(s2, "\n", "", -1)
				pf("\t%v", stringspb.Ellipsoider(s2, 22))
			}
		case html.EndTagToken: // </tag>
			// pf("/%v ", s2)
		case html.CommentToken:
			// pf("comment ")
		case html.DoctypeToken:

		default:
			pf("default case %v\n", s1)
		}
	}

	hSort := sortmap.StringKeysToSortedArray(histogram)

	pf("\n\n")
	for _, v := range hSort {
		pf("%10s %4v\n", v, histogram[v])
	}

}
コード例 #10
0
ファイル: get.go プロジェクト: aarzilli/tools
// Todo: When c==nil we are in a non-appengine environment.
// We still want to return at least ii.PureHostname
func GetByContext(c context.Context) *Instance {

	tstart := time.Now()

	if !ii.LastUpdated.IsZero() {

		age := tstart.Sub(ii.LastUpdated)

		if age < 200*time.Millisecond {
			aelog.Infof(c, "instance info update too recently: %v, skipping.\n", age)
			return ii
		}

		if age < 1*time.Hour {
			if len(ii.Hostname) > 2 {
				return ii
			}

		}

		aelog.Infof(c, "instance info update too old: %v, recomputing.\n", age)
	}

	ii.ModuleName = appengine.ModuleName(c)
	ii.InstanceID = appengine.InstanceID()
	ii.VersionFull = appengine.VersionID(c)

	majorMinor := strings.Split(ii.VersionFull, ".")
	if len(majorMinor) != 2 {
		panic("we need a version string of format X.Y")
	}

	ii.VersionMajor = majorMinor[0]
	ii.VersionMinor = majorMinor[1]

	var err = errors.New("dummy creation error message")

	ii.NumInstances, err = module.NumInstances(c, ii.ModuleName, ii.VersionFull)
	if err != nil {
		// this never works with version full
		// we do not log this - but try version major
		err = nil

		if !util_appengine.IsLocalEnviron() {
			ii.NumInstances, err = module.NumInstances(c, ii.ModuleName, ii.VersionMajor)

			if err != nil {
				eStr := err.Error()
				eCmp1, eCmp2, eCmp3 := "API error", "INVALID_VERSION)", "Could not find the given version"
				if strings.Contains(eStr, eCmp1) && strings.Contains(eStr, eCmp2) && strings.Contains(eStr, eCmp3) {
					aelog.Infof(c, "get num instances works only live and without autoscale; %v", err)
				} else {
					aelog.Errorf(c, "get num instances error; %v", err)
				}
			}

		}

	}

	// in auto scaling, google reports "zero" - which can not be true
	// we assume at least 1
	if ii.NumInstances == 0 {
		ii.NumInstances = 1
	}

	// http://[0-2].1.default.libertarian-islands.appspot.com/instance-info

	ii.Hostname, err = appengine.ModuleHostname(c, ii.ModuleName, ii.VersionMajor, "")
	if err != nil {
		aelog.Errorf(c, "ModuleHostname1: %v", err)
	}

	ii.PureHostname = appengine.DefaultVersionHostname(c)

	if !appengine.IsDevAppServer() {
		ii.HostnameInst0, err = appengine.ModuleHostname(c, ii.ModuleName, ii.VersionMajor, "0")
		if err != nil && (err.Error() == autoScalingErr1 || err.Error() == autoScalingErr2) {
			aelog.Infof(c, "inst 0: "+autoScalingErrMsg)
			err = nil
		}
		if err != nil {
			aelog.Errorf(c, "ModuleHostname2: %v", err)
		}

		ii.HostnameInst1, err = appengine.ModuleHostname(c, ii.ModuleName, ii.VersionMajor, "1")
		if err != nil && (err.Error() == autoScalingErr1 || err.Error() == autoScalingErr2) {
			aelog.Infof(c, "inst 1: "+autoScalingErrMsg)
			err = nil
		}
		if err != nil {
			aelog.Errorf(c, "ModuleHostname3: %v", err)
		}

		ii.HostnameMod02, err = appengine.ModuleHostname(c, "mod02", "", "")
		if err != nil {
			aelog.Infof(c, "ModuleHostname4: %v", err)
		}

	}

	ii.LastUpdated = time.Now()

	aelog.Infof(c, "collectInfo() completed, %v  - %v - %v - %v - %v, took %v",
		stringspb.Ellipsoider(ii.InstanceID, 4), ii.VersionMajor, ii.ModuleName,
		ii.Hostname, ii.PureHostname, time.Now().Sub(tstart))

	return ii
}
コード例 #11
0
ファイル: view.go プロジェクト: aarzilli/tools
func parseFurther(w http.ResponseWriter, r *http.Request, saveImages bool) {

	c := appengine.NewContext(r)

	b := new(bytes.Buffer)
	defer func() {
		w.Header().Set("Content-type", "text/plain; charset=utf-8")
		w.Write(b.Bytes())
	}()

	// Get the item from the memcache
	wb1 := new(dsu.WrapBlob)
	ok := dsu.McacheGet(c, keyLatest, wb1)
	loghttp.E(w, r, ok, true)

	if ok {
		b.WriteString(sp("name %v\n", wb1.Name))
		b.WriteString(sp("S (boundary): %q\n", wb1.S))

		// dumps the entire body
		// b.WriteString(sp("B: %v\n", string(wb1.VByte)))

		// instead we split it by multipart mime
		vb := bytes.Split(wb1.VByte, []byte("--"+wb1.S))
		for i, v := range vb {
			h := ""  // header
			fn := "" // filename
			s := string(v)
			s = strings.Trim(s, "\r \n")
			ctype := ""

			b.WriteString(sp("\n___________mime boundary index %v___________\n", i))
			if strings.HasPrefix(s, "Content-Type: image/png;") ||
				strings.HasPrefix(s, "Content-Type: image/jpeg;") {

				if start := strings.Index(s, sepHeaderContent); start > 0 {
					h = s[:start]
					vh := strings.Split(h, "\r\n")
					for _, v := range vh {
						v := strings.TrimSpace(v)
						// b.WriteString("\t\t" + v + "\n")
						if strings.HasPrefix(v, "name=") {
							vv := strings.Split(v, "=")
							fn = stringspb.LowerCasedUnderscored(vv[1])
						}
					}
					s = s[start+len(sepHeaderContent):]
					if posSemicol := strings.Index(h, ";"); posSemicol > 0 {
						ctype = h[0:posSemicol]
					}
				}
			}

			if ctype == "" {
				b.WriteString("unparseable: " + stringspb.Ellipsoider(s, 400))
			} else {
				b.WriteString(sp("\n\tctype=%v\n\t------------", ctype))
				if fn != "" {
					b.WriteString(sp("\n\tfilename=%v\n\t------------", fn))
				}
				if saveImages {
					rE := resEntry{}
					rE.when = util.TimeMarker()
					rE.contentType = ctype
					rE.fn = fn
					rE.b64Img = &s
					Images[reservoirRevolver%reservoirSize] = rE
					reservoirRevolver++
					aelog.Infof(c, "Put image into reservoir %v %v", fn, ctype)
				}
			}

		}

	}

}
コード例 #12
0
ファイル: 4_fetch.go プロジェクト: aarzilli/tools
// Fetch takes a RSS XML uri and fetches some of its documents.
// It uses a three staged pipeline for parallel fetching.
// Results are stored into the given filesystem fs.
// Config points to the source of RSS XML,
// and has some rules for conflating URI directories.
// uriPrefix and config.DesiredNumber tell the func
// which subdirs of the RSS dir should be fetched - and how many at max.
func FetchUsingRSS(w http.ResponseWriter, r *http.Request,
	fs fsi.FileSystem, config FetchCommand,
) {

	lg, b := loghttp.BuffLoggerUniversal(w, r)
	closureOverBuf := func(bUnused *bytes.Buffer) {
		loghttp.Pf(w, r, b.String())
	}
	defer closureOverBuf(b) // the argument is ignored,

	if config.Host == "" {
		lg(" empty host; returning")
		return
	}

	config = addDefaults(config)

	// Fetching the rssXML takes time.
	// We do it before the timouts of the pipeline stages are set off.
	lg(" ")
	lg(config.Host)
	if config.Host == "test.economist.com" {
		switchTData(w, r)
	}

	// lg(stringspb.IndentedDump(config))
	dirTree := &DirTree{Name: "/", Dirs: map[string]DirTree{}, EndPoint: true}

	fnDigest := path.Join(docRoot, config.Host, "digest2.json")
	loadDigest(w, r, lg, fs, fnDigest, dirTree) // previous

	age := time.Now().Sub(dirTree.LastFound)
	lg("DirTree is %5.2v hours old (%v)", age.Hours(), dirTree.LastFound.Format(time.ANSIC))
	if age.Hours() > 0.001 {

		rssUrl := matchingRSSURI(w, r, config)
		if rssUrl == "" {
			m := new(MyWorker)
			m.r = r
			m.lg = lg
			m.fs1 = fs
			m.SURL = path.Join(config.Host, config.SearchPrefix)
			_, _, _, err := fetchSave(m)
			lg(err)
			if err != nil {
				return
			}
		} else {
			rssUrl = path.Join(config.Host, rssUrl)
			rssDoc, rssUrlObj := rssXMLFile(w, r, fs, rssUrl)
			_ = rssUrlObj
			rssDoc2DirTree(w, r, dirTree, rssDoc, config.Host)
		}

		saveDigest(lg, fs, fnDigest, dirTree)
	}

	// lg(dirTree.String())
	//
	//
	// setting up a 3 staged pipeline from bottom up
	//
	var fullArticles []FullArticle

	var inn chan *FullArticle = make(chan *FullArticle) // jobs are stuffed in here
	var out chan *FullArticle = make(chan *FullArticle) // completed jobs are delivered here
	var fin chan struct{} = make(chan struct{})         // downstream signals end to upstream
	var stage3Wait sync.WaitGroup

	// stage 3
	// fire up the "collector", a fan-in
	go func() {
		stage3Wait.Add(1)
		// 400 good value; critical point at 35
		// economist.com required 800 ms
		const delayInitial = 1200
		const delayRefresh = 800
		cout := time.After(time.Millisecond * delayInitial)
		for {
			select {

			case fa := <-out:
				fullArticles = append(fullArticles, *fa)
				pth := fetch.PathFromStringUrl(fa.Url)
				lg("    fetched   %v - %v ", fa.Mod.Format("15:04:05"), stringspb.Ellipsoider(pth, 50))
				cout = time.After(time.Millisecond * delayRefresh) // refresh timeout
			case <-cout:
				lg("timeout after %v articles", len(fullArticles))
				// we are using channel == nil - channel closed combinations
				// inspired by http://dave.cheney.net/2013/04/30/curious-channels
				out = nil // not close(out) => case above is now blocked
				close(fin)
				lg("fin closed; out nilled")
				stage3Wait.Done()
				return
			}
		}
	}()

	//
	// stage 2
	for i := 0; i < numWorkers; i++ {
		// fire up a dedicated fetcher routine, a worker
		// we are using channel == nil - channel closed combinations
		// inspired by http://dave.cheney.net/2013/04/30/curious-channels
		go func() {
			var a *FullArticle
			for {
				select {
				case a = <-inn:
					var err error
					var inf fetch.Info
					a.Body, inf, err = fetch.UrlGetter(r, fetch.Options{URL: a.Url})
					lg(err)
					if a.Mod.IsZero() {
						a.Mod = inf.Mod
					}
					select {
					case out <- a:
					case <-fin:
						lg("    worker spinning down; branch 1; abandoning %v", a.Url)
						return
					}
					a = new(FullArticle)
				case <-fin:
					if a != nil && a.Url != "" {
						u, _ := url.Parse(a.Url)
						lg("    abandoned %v", u.Path)
					} else {
						lg("    worker spinning down; branch 2")
					}
					return
				}
			}
		}()
	}

	//
	//
	//
	// loading stage 1
	uriPrefix := config.SearchPrefix
	found := 0
	uriPrefixExcl := "impossible"
	for i := 0; i < 15; i++ {
		lg("  searching for prefix   %v    - excl %q    - %v of %v", uriPrefix, uriPrefixExcl, found, config.DesiredNumber)
		found += stuffStage1(w, r, config, inn, fin, dirTree,
			uriPrefixExcl, uriPrefix, config.DesiredNumber-found)

		if found >= config.DesiredNumber {
			break
		}

		if uriPrefix == "/" || uriPrefix == "." {
			lg("  root exhausted")
			break
		}

		newPrefix := path.Dir(uriPrefix)
		uriPrefixExcl = uriPrefix
		uriPrefix = newPrefix
	}
	lg("  found %v of %v", found, config.DesiredNumber)

	//
	lg("stage3Wait.Wait() before")
	stage3Wait.Wait()
	lg("stage3Wait.Wait() after")

	// workers spin down earlier -
	// but ae log writer and response writer need some time
	// to record the spin-down messages
	time.Sleep(120 * time.Millisecond)

	// compile out directory statistics
	histoDir := map[string]int{}
	for _, a := range fullArticles {
		u, err := url.Parse(a.Url)
		lg(err)
		semanticUri := condenseTrailingDir(u.Path, config.CondenseTrailingDirs)
		dir := path.Dir(semanticUri)
		histoDir[dir]++
	}
	sr := sortmap.SortMapByCount(histoDir)
	_ = sr

	// Create dirs
	for k, _ := range histoDir {
		dir := path.Join(docRoot, k) // config.Host already contained in k
		err := fs.MkdirAll(dir, 0755)
		lg(err)
		err = fs.Chtimes(dir, time.Now(), time.Now())
		lg(err)
	}

	// Saving as files
	for _, a := range fullArticles {
		if len(a.Body) == 0 {
			continue
		}
		u, err := url.Parse(a.Url)
		u.Fragment = ""
		u.RawQuery = ""
		lg(err)
		semanticUri := condenseTrailingDir(u.RequestURI(), config.CondenseTrailingDirs)
		p := path.Join(docRoot, semanticUri)
		err = fs.WriteFile(p, a.Body, 0644)
		lg(err)
		err = fs.Chtimes(p, a.Mod, a.Mod)
		lg(err)
	}

	{
		b, err := json.MarshalIndent(histoDir, "  ", "\t")
		lg(err)
		fnDigest := path.Join(docRoot, config.Host, "fetchDigest.json")
		err = fs.WriteFile(fnDigest, b, 0755)
		lg(err)
	}

	// fsm, ok := memfs.Unwrap(fs)
	// if ok {
	// 	fsm.Dump()
	// }

}
コード例 #13
0
ファイル: write methods.go プロジェクト: aarzilli/tools
func writeMethods(w http.ResponseWriter, r *http.Request, m map[string]interface{}) {

	c := appengine.NewContext(r)

	client := urlfetch.Client(c)

	ii := instance_mgt.Get(r)
	resp2, err := client.Get(spf(`http://%s/write-methods-read`, ii.PureHostname))
	loghttp.E(w, r, err, false)

	bufDemo := new(bytes.Buffer)
	bufDemo.WriteString("end of page")
	defer func() {
		//w.Header().Set("Content-Type", "text/plain; charset=utf-8")
		w.Write(bufDemo.Bytes())

		resp2.Body.Close()
	}()

	w.Header().Set("Content-Type", "text/html; charset=utf-8")
	fmt.Fprint(w, "<pre>")

	//
	//
	fmt.Fprint(w, `Most basic:
		this is written via Fprintln into response writer`+"\n\n\n")

	//
	// byte slice into response writer
	const sz = 20
	var sB []byte
	sB = make([]byte, sz)
	sB[0] = 112
	sB[1] = 111
	sB[2] = '-'
	sB[3] = 112
	sB[4] = 101
	sB[5] = 108
	sB[6] = 32
	for i := 7; i < sz; i++ {
		sB[i] = ' '
	}
	sB[sz-1] = '!'

	w.Write([]byte("Byte slice into response writer: \n\t\t"))
	w.Write(sB)
	w.Write([]byte("\n\n\n"))

	//
	//
	// resp2.Body into byte slice,
	sB2 := make([]byte, sz)
	for i := 0; i < sz; i++ {
		sB2[i] = '-'
	}
	bytesRead, err := resp2.Body.Read(sB2)
	if err == nil {
		fmt.Fprintf(w, "Byte slice - reading %v bytes from response-body\n\t\t%q \n\n\n",
			bytesRead, string(sB2))
	} else {
		fmt.Fprintf(w, "err reading into byte slice  --%v-- \n\n\n", err)
	}

	//
	//
	//
	wpf(w, "operations with a bytes buffer\n")
	var buf1 *bytes.Buffer
	buf1 = new(bytes.Buffer) // not optional on buffer pointer
	buf1.ReadFrom(resp2.Body)

	buf1 = new(bytes.Buffer)
	wpf(buf1, "\t\tbuf1 content %v (filled via Fprintf)\n", 222)

	wpf(w, "FOUR methods of dumping buf1 into resp.w:\n")
	wpf(w, "\tw.Write\n")
	w.Write(buf1.Bytes())
	wpf(w, "\tFprint\n")
	wpf(w, buf1.String())
	wpf(w, "\tio.WriteString\n")
	io.WriteString(w, buf1.String())
	wpf(w, "\tio.Copy \n")
	io.Copy(w, buf1) // copy the bytes.Buffer into w
	wpf(w, " \t\t\tio.copy exhausts buf1 - Fprinting again yields %q ", buf1.String())
	wpf(w, buf1.String())
	wpf(w, "\n\n\n")

	//
	//
	//
	wpf(w, "ioutil.ReadAll\n")
	var content []byte
	resp3, err := client.Get(spf(`http://%s/write-methods-read`, ii.Hostname))
	loghttp.E(w, r, err, false)
	content, _ = ioutil.ReadAll(resp3.Body)
	scont := string(content)
	scont = stringspb.Ellipsoider(scont, 20)
	w.Write([]byte(scont))

	fmt.Fprint(w, "</pre>")

}