Ejemplo n.º 1
0
func confirmPay(w http.ResponseWriter, r *http.Request, m map[string]interface{}) {

	/*


	   http://abc.de/ef?input_transaction_hash=46178baf7de078954b5aebb71c12120b33d998faac1c165af195eae90f19b25c&shared=false&address=18tpXf8WWuhJP95JbDASbZvavmZJbrydut&destination_address=18tpXf8WWuhJP95JbDASbZvavmZJbrydut&input_address=1ZTnjSdknZvur9Gc73gvB8XBTWL7nV1m6&test=true&anonymous=false&confirmations=0&value=82493362&transaction_hash=46178baf7de078954b5aebb71c12120b33d998faac1c165af195eae90f19b25c
	*/

	lg, b := loghttp.BuffLoggerUniversal(w, r)
	closureOverBuf := func(bUnused *bytes.Buffer) {
		loghttp.Pf(w, r, b.String())
	}
	defer closureOverBuf(b) // the argument is ignored,
	r.Header.Set("X-Custom-Header-Counter", "nocounter")

	wpf(b, tplx.ExecTplHelper(tplx.Head, map[string]interface{}{"HtmlTitle": "Payment confirmation"}))
	defer wpf(b, tplx.Foot)

	wpf(b, "<pre>")
	defer wpf(b, "</pre>")

	err := r.ParseForm()
	lg(err)

	custSecret := ""
	if r.FormValue("customsecret") != "" {
		custSecret = r.FormValue("customsecret")
	}
	lg("custom secret is %q", custSecret)

	val := ""
	if r.FormValue("value") != "" {
		val = r.FormValue("value")
	}
	lg("value is %q", val)

}
Ejemplo n.º 2
0
// FetchSimilar is an extended version of Fetch
// It is uses a DirTree of crawled *links*, not actual files.
// As it moves up the DOM, it crawls every document for additional links.
// It first moves up to find similar URLs on the same depth
//                        /\
//          /\           /  \
//    /\   /  \         /    \
// It then moves up the ladder again - to accept higher URLs
//                        /\
//          /\
//    /\
func FetchSimilar(w http.ResponseWriter, r *http.Request, m map[string]interface{}) {

	lg, b := loghttp.BuffLoggerUniversal(w, r)
	closureOverBuf := func(bUnused *bytes.Buffer) {
		loghttp.Pf(w, r, b.String())
	}
	defer closureOverBuf(b) // the argument is ignored,

	r.Header.Set("X-Custom-Header-Counter", "nocounter")

	start := time.Now()

	wpf(b, tplx.ExecTplHelper(tplx.Head, map[string]interface{}{"HtmlTitle": "Find similar HTML URLs"}))
	defer wpf(b, tplx.Foot)

	wpf(b, "<pre>")
	defer wpf(b, "</pre>")

	fs1 := GetFS(appengine.NewContext(r))

	err := r.ParseForm()
	lg(err)

	countSimilar := 3
	sCountSimilar := r.FormValue("cnt")
	if sCountSimilar != "" {
		i, err := strconv.Atoi(strings.TrimSpace(sCountSimilar))
		if err == nil {
			countSimilar = i
		}
	}

	surl := r.FormValue(routes.URLParamKey)
	ourl, err := fetch.URLFromString(surl)
	lg(err)
	if err != nil {
		return
	}
	if ourl.Host == "" {
		lg("host is empty (%v)", surl)
		return
	}

	knownProtocol := ""
	if r.FormValue("prot") != "" {
		knownProtocol = r.FormValue("prot")
	}

	numWorkers := 0
	sNumWorkers := r.FormValue("numworkers")
	if sNumWorkers != "" {
		i, err := strconv.Atoi(strings.TrimSpace(sNumWorkers))
		if err == nil {
			numWorkers = i
		}
	}

	srcDepth := strings.Count(ourl.Path, "/")

	cmd := FetchCommand{}
	cmd.Host = ourl.Host
	cmd.SearchPrefix = ourl.Path
	cmd = addDefaults(cmd)

	dirTree := &DirTree{Name: "/", Dirs: map[string]DirTree{}, EndPoint: true}
	fnDigest := path.Join(docRoot, cmd.Host, "digest2.json")
	loadDigest(w, r, lg, fs1, fnDigest, dirTree) // previous
	lg("dirtree 400 chars is %v end of dirtree\t\t", stringspb.ToLen(dirTree.String(), 400))

	m1 := new(MyWorker)
	m1.r = r
	m1.lg = lg
	m1.fs1 = fs1
	m1.SURL = path.Join(cmd.Host, ourl.Path)
	m1.Protocol = knownProtocol
	btsSrc, modSrc, usedExisting, err := fetchSave(m1)
	if !usedExisting {
		addAnchors(lg, cmd.Host, btsSrc, dirTree)
	}
	lg(err)
	if err != nil {
		return
	}

	lg("\t\t%4.2v secs so far 1", time.Now().Sub(start).Seconds())

	var treePath string
	treePath = "/blogs/freeexchange"
	treePath = "/news/europe"
	treePath = path.Dir(ourl.Path)

	opt := LevelWiseDeeperOptions{}
	opt.Rump = treePath
	opt.ExcludeDir = "/news/americas"
	opt.ExcludeDir = "/blogs/buttonwood"
	opt.ExcludeDir = "/something-impossible"
	opt.MinDepthDiff = 1
	opt.MaxDepthDiff = 1
	opt.CondenseTrailingDirs = cmd.CondenseTrailingDirs
	opt.MaxNumber = cmd.DesiredNumber + 1  // one more for "self"
	opt.MaxNumber = cmd.DesiredNumber + 40 // collect more, 'cause we filter out those too old later

	var subtree *DirTree
	links := []FullArticle{}

	alreadyCrawled := map[string]struct{}{}

MarkOuter:
	for j := 0; j < srcDepth; j++ {
		treePath = path.Dir(ourl.Path)
	MarkInner:
		// for i := 1; i < srcDepth; i++ {
		for i := 1; i < (srcDepth + 5); i++ {

			subtree, treePath = DiveToDeepestMatch(dirTree, treePath)

			lg("Looking from height %v to level %v  - %v", srcDepth-i, srcDepth-j, treePath)

			if _, ok := alreadyCrawled[treePath]; ok {
				// lg("\t already digested %v", treePath)
				continue
			}

			m2 := new(MyWorker)
			m2.r = r
			m2.lg = lg
			m2.fs1 = fs1
			m2.SURL = path.Join(cmd.Host, treePath)
			m2.Protocol = knownProtocol

			btsPar, _, usedExisting, err := fetchSave(m2)
			lg(err)
			if err != nil {
				return
			}
			alreadyCrawled[treePath] = struct{}{}
			if !usedExisting {
				addAnchors(lg, cmd.Host, btsPar, dirTree)
			}

			if subtree == nil {
				lg("\n#%v treePath %q ; subtree is nil", i, treePath)
			} else {
				// lg("\n#%v treePath %q ; subtree exists", i, treePath)

				opt.Rump = treePath
				opt.MinDepthDiff = i - j
				opt.MaxDepthDiff = i - j
				lvlLinks := LevelWiseDeeper(nil, nil, subtree, opt)
				links = append(links, lvlLinks...)
				for _, art := range lvlLinks {
					_ = art
					// lg("#%v fnd    %v", i, stringspb.ToLen(art.Url, 100))
				}

				if len(links) >= opt.MaxNumber {
					lg("found enough links")
					break MarkOuter
				}

				pathPrev := treePath
				treePath = path.Dir(treePath)
				// lg("#%v  bef %v - aft %v", i, pathPrev, treePath)

				if pathPrev == "." && treePath == "." ||
					pathPrev == "/" && treePath == "/" ||
					pathPrev == "" && treePath == "." {
					lg("break to innner")
					break MarkInner
				}
			}

		}
	}

	//
	//
	//
	//
	lg("%v links after %4.2v secs", len(links), time.Now().Sub(start).Seconds())

	lg("============================")
	lg("Now reading/fetching actual similar files - not just the links")
	//
	tried := 0
	selecteds := []FullArticle{}

	nonExisting := []FullArticle{}
	nonExistFetched := []FullArticle{}

	for _, art := range links {

		if art.Url == ourl.Path {
			lg("skipping self\t%v", art.Url)
			continue
		}

		tried++

		useExisting := false

		semanticUri := condenseTrailingDir(art.Url, cmd.CondenseTrailingDirs)
		p := path.Join(docRoot, cmd.Host, semanticUri)

		f, err := fs1.Open(p)
		// lg(err) // its no error if file does not exist
		if err != nil {
			// lg("!nstore %q", semanticUri)
		} else {
			// lg("reading %q", semanticUri)

			// lets put this into a func, so that f.close it called at the end of this func
			// otherwise defer f.close() spans the entire func and prevents
			// overwrites chmods further down
			f := func() {
				defer f.Close()
				fi, err := f.Stat()
				lg(err)
				if err != nil {

				} else {
					age := time.Now().Sub(fi.ModTime())
					if age.Hours() < 10 {
						lg("\t\tusing existing file with age %4.2v hrs", age.Hours())
						art.Mod = fi.ModTime()
						bts, err := ioutil.ReadAll(f)
						lg(err)
						art.Body = bts
						if len(bts) < 200 {
							if bytes.Contains(bts, []byte(fetch.MsgNoRdirects)) {
								return
							}
						}
						selecteds = append(selecteds, art)
						useExisting = true
					}
				}
			}
			f()

		}

		if !useExisting {
			nonExisting = append(nonExisting, art)
		}

		if len(selecteds) >= countSimilar {
			break
		}

	}
	lg("============================")
	lg("tried %v links - yielding %v existing similars; not existing in datastore: %v, %v were requested.",
		tried, len(selecteds), len(nonExisting), countSimilar)

	if len(selecteds) < countSimilar {
		jobs := make([]distrib.Worker, 0, len(nonExisting))
		for _, art := range nonExisting {
			surl := path.Join(cmd.Host, art.Url)
			wrkr := MyWorker{SURL: surl}
			wrkr.Protocol = knownProtocol
			wrkr.r = r
			wrkr.lg = lg
			wrkr.fs1 = fs1
			job := distrib.Worker(&wrkr)
			jobs = append(jobs, job)
		}

		opt := distrib.NewDefaultOptions()
		opt.TimeOutDur = 3500 * time.Millisecond
		opt.Want = int32(countSimilar - len(selecteds) + 4) // get some more, in case we have "redirected" bodies
		opt.NumWorkers = int(opt.Want)                      // 5s query limit; => hurry; spawn as many as we want
		if numWorkers > 0 {
			opt.NumWorkers = numWorkers
		}
		lg("Preparing %v simultaneous, wanting %v fetches; at %4.2v secs.", opt.NumWorkers, opt.Want, time.Now().Sub(start).Seconds())
		opt.CollectRemainder = false // 5s query limit; => hurry; dont wait for stragglers

		ret, msg := distrib.Distrib(jobs, opt)
		lg("Distrib returned at %4.2v secs with %v results.", time.Now().Sub(start).Seconds(), len(ret))

		lg("\n" + msg.String())
		for _, v := range ret {
			v1, _ := v.Worker.(*MyWorker)
			if v1.FA != nil {
				age := time.Now().Sub(v1.FA.Mod)
				if age.Hours() < 10 {
					lg("\t\tusing fetched file with age %4.2v hrs", age.Hours())
					nonExistFetched = append(nonExistFetched, *v1.FA)
					if len(nonExistFetched) > (countSimilar - len(selecteds)) {
						break
					}
				}
			}
			if v1.err != nil {
				lg(err)
			}
		}

		lg("tried %v links - yielding %v fetched - jobs %v", len(nonExisting), len(nonExistFetched), len(jobs))
		selecteds = append(selecteds, nonExistFetched...)

		//
		//
		// Extract links
		for _, v := range nonExistFetched {
			// lg("links -> memory dirtree for %q", v.Url)
			addAnchors(lg, cmd.Host, v.Body, dirTree)
		}

	}

	//
	if time.Now().Sub(dirTree.LastFound).Seconds() < 10 {
		lg("saving accumulated (new) links to digest")
		saveDigest(lg, fs1, fnDigest, dirTree)
	}

	lg("\t\t%4.2v secs so far 3", time.Now().Sub(start).Seconds())

	mp := map[string][]byte{}
	mp["msg"] = b.Bytes()
	mp["url_self"] = []byte(condenseTrailingDir(ourl.Path, cmd.CondenseTrailingDirs))
	mp["mod_self"] = []byte(modSrc.Format(http.TimeFormat))
	mp["bod_self"] = btsSrc

	for i, v := range selecteds {
		mp["url__"+spf("%02v", i)] = []byte(v.Url)
		mp["mod__"+spf("%02v", i)] = []byte(v.Mod.Format(http.TimeFormat))
		mp["bod__"+spf("%02v", i)] = v.Body
	}

	mp["lensimilar"] = []byte(spf("%02v", len(selecteds)))

	//
	smp, err := json.MarshalIndent(mp, "", "\t")
	if err != nil {
		lg(b, "marshalling mp to []byte failed\n")
		return
	}

	r.Header.Set("X-Custom-Header-Counter", "nocounter")
	w.Header().Set("Content-Type", "application/json")
	w.Write(smp)

	b.Reset()             // this keeps the  buf pointer intact; outgoing defers are still heeded
	b = new(bytes.Buffer) // creates a *new* buf pointer; outgoing defers write into the *old* buf

	lg("\t\t%4.2v secs so far 4 (json resp written as []byte)", time.Now().Sub(start).Seconds())

	return

}
Ejemplo n.º 3
0
// dedupHTTP wraps Dedup()
func dedupHTTP(w http.ResponseWriter, r *http.Request, m map[string]interface{}) {

	lg, b := loghttp.BuffLoggerUniversal(w, r)
	closureOverBuf := func(bUnused *bytes.Buffer) {
		loghttp.Pf(w, r, b.String())
	}
	defer closureOverBuf(b) // the argument is ignored,

	r.Header.Set("X-Custom-Header-Counter", "nocounter")

	wpf(b, tplx.ExecTplHelper(tplx.Head, map[string]interface{}{"HtmlTitle": "Deduplicating redundant stuff"}))
	defer wpf(b, tplx.Foot)

	wpf(b, "<pre>")
	defer wpf(b, "</pre>")

	err := r.ParseForm()
	lg(err)

	surl := r.FormValue(routes.URLParamKey)
	ourl, err := fetch.URLFromString(surl)
	lg(err)
	if err != nil {
		return
	}
	if ourl.Host == "" {
		lg("host is empty (%v)", surl)
		return
	}

	knownProtocol := ""
	if r.FormValue("prot") != "" {
		knownProtocol = r.FormValue("prot")
	}

	lg("Host %q, Path %q", ourl.Host, ourl.Path)

	fs := GetFS(appengine.NewContext(r), 0)

	least3Files := FetchAndDecodeJSON(r, ourl.String(), knownProtocol, lg, fs)

	lg("Fetched and decoded; found %v", len(least3Files))
	if len(least3Files) > 0 {
		doc := Dedup(ourl, least3Files, lg, fs)

		fNamer := domclean2.FileNamer(logDir, 0)
		fNamer() // first call yields key
		fsPerm := GetFS(appengine.NewContext(r), 0)
		fileDump(lg, fsPerm, doc, fNamer, "_fin.html")

		lg("MapSimiliarCompares: %v SimpleCompares: %v LevenstheinComp: %v\n", breakMapsTooDistinct, appliedLevenshtein, appliedCompare)
		lg("Finish\n")

		var b2 bytes.Buffer
		err := html.Render(&b2, doc)
		lg(err)
		if err != nil {
			return
		}

		b = new(bytes.Buffer)
		// w.Write([]byte("aa"))
		w.Header().Set("Content-type", "text/html; charset=utf-8")
		w.Write(b2.Bytes())

	}

}
Ejemplo n.º 4
0
func requestPay(w http.ResponseWriter, r *http.Request, m map[string]interface{}) {

	lg, b := loghttp.BuffLoggerUniversal(w, r)
	closureOverBuf := func(bUnused *bytes.Buffer) {
		loghttp.Pf(w, r, b.String())
	}
	defer closureOverBuf(b) // the argument is ignored,
	r.Header.Set("X-Custom-Header-Counter", "nocounter")

	protoc := "https://"
	if appengine.IsDevAppServer() {
		protoc = "http://"
	}

	host := appengine.DefaultVersionHostname(appengine.NewContext(r))
	if appengine.IsDevAppServer() {
		host = "not-loclhost"
	}

	confirmURL := fmt.Sprintf("%v%v%v", protoc, host, uriConfirmPayment)
	confirmURL = url.QueryEscape(confirmURL)

	addrURL := fmt.Sprintf("https://%v/api/receive?method=create&address=%v&callback=%v&customsecret=49&api_code=%v",
		blockChainHost, bitCoinAddress, confirmURL, apiKey)

	req, err := http.NewRequest("GET", addrURL, nil)
	lg(err)
	if err != nil {
		return
	}
	bts, inf, err := fetch.UrlGetter(r, fetch.Options{Req: req})
	bts = bytes.Replace(bts, []byte(`","`), []byte(`", "`), -1)

	if err != nil {
		lg(err)
		lg(inf.Msg)
		return
	}

	lg("response body 1:\n")
	lg("%s\n", string(bts))

	lg("response body 2:\n")
	var data1 map[string]interface{}
	err = json.Unmarshal(bts, &data1)
	lg(err)
	lg(stringspb.IndentedDumpBytes(data1))
	// lg("%#v", data1)

	inputAddress, ok := data1["input_address"].(string)
	if !ok {
		lg("input address could not be casted to string; is type %T", data1["input_address"])
		return
	}
	feePercent, ok := data1["fee_percent"].(float64)
	if !ok {
		lg("fee percent could not be casted to float64; is type %T", data1["fee_percent"])
		return
	}

	lg("Input Adress will be %q; fee percent will be %4.2v", inputAddress, feePercent)

}
Ejemplo n.º 5
0
func fetchSimForm(w http.ResponseWriter, r *http.Request, m map[string]interface{}) {

	lg, b := loghttp.BuffLoggerUniversal(w, r)
	closureOverBuf := func(bUnused *bytes.Buffer) {
		loghttp.Pf(w, r, b.String())
	}
	defer closureOverBuf(b) // the argument is ignored,

	r.Header.Set("X-Custom-Header-Counter", "nocounter")

	// on live server => always use https
	if r.URL.Scheme != "https" && !util_appengine.IsLocalEnviron() {
		r.URL.Scheme = "https"
		r.URL.Host = r.Host
		lg("lo - redirect %v", r.URL.String())
		http.Redirect(w, r, r.URL.String(), http.StatusFound)
	}

	err := r.ParseForm()
	lg(err)

	rURL := ""
	if r.FormValue(routes.URLParamKey) != "" {
		rURL = r.FormValue(routes.URLParamKey)
	}
	if len(rURL) == 0 {

		wpf(b, tplx.ExecTplHelper(tplx.Head, map[string]interface{}{"HtmlTitle": "Find similar HTML URLs"}))
		defer wpf(b, tplx.Foot)

		tm := map[string]string{
			"val":       "www.welt.de/politik/ausland/article146154432/Tuerkische-Bodentruppen-marschieren-im-Nordirak-ein.html",
			"fieldname": routes.URLParamKey,
		}
		tplForm := tt.Must(tt.New("tplName01").Parse(htmlForm))
		tplForm.Execute(b, tm)

	} else {

		fullURL := fmt.Sprintf("https://%s%s?%s=%s&cnt=%s&prot=%s", r.Host, routes.FetchSimilarURI,
			routes.URLParamKey, rURL, r.FormValue("cnt"), r.FormValue("prot"))
		lg("lo - sending to URL 1: %v", fullURL)

		fo := fetch.Options{}
		fo.URL = fullURL
		bts, inf, err := fetch.UrlGetter(r, fo)
		_ = inf
		lg(err)
		if err != nil {
			return
		}

		if len(bts) == 0 {
			lg("empty bts")
			return
		}

		var mp map[string][]byte
		err = json.Unmarshal(bts, &mp)
		lg(err)
		if err != nil {
			lg("%s", bts)
			return
		}

		w.Header().Set("Content-Type", "text/html; charset=utf-8")
		if _, ok := mp["msg"]; ok {
			w.Write(mp["msg"])
		}

		for k, v := range mp {
			if k != "msg" {
				wpf(w, "<br><br>%s:\n", k)
				if true {
					wpf(w, "len %v", len(v))
				} else {
					wpf(w, "%s", html.EscapeString(string(v)))
				}
			}
		}

	}

}
Ejemplo n.º 6
0
// Fetch takes a RSS XML uri and fetches some of its documents.
// It uses a three staged pipeline for parallel fetching.
// Results are stored into the given filesystem fs.
// Config points to the source of RSS XML,
// and has some rules for conflating URI directories.
// uriPrefix and config.DesiredNumber tell the func
// which subdirs of the RSS dir should be fetched - and how many at max.
func FetchUsingRSS(w http.ResponseWriter, r *http.Request,
	fs fsi.FileSystem, config FetchCommand,
) {

	lg, b := loghttp.BuffLoggerUniversal(w, r)
	closureOverBuf := func(bUnused *bytes.Buffer) {
		loghttp.Pf(w, r, b.String())
	}
	defer closureOverBuf(b) // the argument is ignored,

	if config.Host == "" {
		lg(" empty host; returning")
		return
	}

	config = addDefaults(config)

	// Fetching the rssXML takes time.
	// We do it before the timouts of the pipeline stages are set off.
	lg(" ")
	lg(config.Host)
	if config.Host == "test.economist.com" {
		switchTData(w, r)
	}

	// lg(stringspb.IndentedDump(config))
	dirTree := &DirTree{Name: "/", Dirs: map[string]DirTree{}, EndPoint: true}

	fnDigest := path.Join(docRoot, config.Host, "digest2.json")
	loadDigest(w, r, lg, fs, fnDigest, dirTree) // previous

	age := time.Now().Sub(dirTree.LastFound)
	lg("DirTree is %5.2v hours old (%v)", age.Hours(), dirTree.LastFound.Format(time.ANSIC))
	if age.Hours() > 0.001 {

		rssUrl := matchingRSSURI(w, r, config)
		if rssUrl == "" {
			m := new(MyWorker)
			m.r = r
			m.lg = lg
			m.fs1 = fs
			m.SURL = path.Join(config.Host, config.SearchPrefix)
			_, _, _, err := fetchSave(m)
			lg(err)
			if err != nil {
				return
			}
		} else {
			rssUrl = path.Join(config.Host, rssUrl)
			rssDoc, rssUrlObj := rssXMLFile(w, r, fs, rssUrl)
			_ = rssUrlObj
			rssDoc2DirTree(w, r, dirTree, rssDoc, config.Host)
		}

		saveDigest(lg, fs, fnDigest, dirTree)
	}

	// lg(dirTree.String())
	//
	//
	// setting up a 3 staged pipeline from bottom up
	//
	var fullArticles []FullArticle

	var inn chan *FullArticle = make(chan *FullArticle) // jobs are stuffed in here
	var out chan *FullArticle = make(chan *FullArticle) // completed jobs are delivered here
	var fin chan struct{} = make(chan struct{})         // downstream signals end to upstream
	var stage3Wait sync.WaitGroup

	// stage 3
	// fire up the "collector", a fan-in
	go func() {
		stage3Wait.Add(1)
		// 400 good value; critical point at 35
		// economist.com required 800 ms
		const delayInitial = 1200
		const delayRefresh = 800
		cout := time.After(time.Millisecond * delayInitial)
		for {
			select {

			case fa := <-out:
				fullArticles = append(fullArticles, *fa)
				pth := fetch.PathFromStringUrl(fa.Url)
				lg("    fetched   %v - %v ", fa.Mod.Format("15:04:05"), stringspb.Ellipsoider(pth, 50))
				cout = time.After(time.Millisecond * delayRefresh) // refresh timeout
			case <-cout:
				lg("timeout after %v articles", len(fullArticles))
				// we are using channel == nil - channel closed combinations
				// inspired by http://dave.cheney.net/2013/04/30/curious-channels
				out = nil // not close(out) => case above is now blocked
				close(fin)
				lg("fin closed; out nilled")
				stage3Wait.Done()
				return
			}
		}
	}()

	//
	// stage 2
	for i := 0; i < numWorkers; i++ {
		// fire up a dedicated fetcher routine, a worker
		// we are using channel == nil - channel closed combinations
		// inspired by http://dave.cheney.net/2013/04/30/curious-channels
		go func() {
			var a *FullArticle
			for {
				select {
				case a = <-inn:
					var err error
					var inf fetch.Info
					a.Body, inf, err = fetch.UrlGetter(r, fetch.Options{URL: a.Url})
					lg(err)
					if a.Mod.IsZero() {
						a.Mod = inf.Mod
					}
					select {
					case out <- a:
					case <-fin:
						lg("    worker spinning down; branch 1; abandoning %v", a.Url)
						return
					}
					a = new(FullArticle)
				case <-fin:
					if a != nil && a.Url != "" {
						u, _ := url.Parse(a.Url)
						lg("    abandoned %v", u.Path)
					} else {
						lg("    worker spinning down; branch 2")
					}
					return
				}
			}
		}()
	}

	//
	//
	//
	// loading stage 1
	uriPrefix := config.SearchPrefix
	found := 0
	uriPrefixExcl := "impossible"
	for i := 0; i < 15; i++ {
		lg("  searching for prefix   %v    - excl %q    - %v of %v", uriPrefix, uriPrefixExcl, found, config.DesiredNumber)
		found += stuffStage1(w, r, config, inn, fin, dirTree,
			uriPrefixExcl, uriPrefix, config.DesiredNumber-found)

		if found >= config.DesiredNumber {
			break
		}

		if uriPrefix == "/" || uriPrefix == "." {
			lg("  root exhausted")
			break
		}

		newPrefix := path.Dir(uriPrefix)
		uriPrefixExcl = uriPrefix
		uriPrefix = newPrefix
	}
	lg("  found %v of %v", found, config.DesiredNumber)

	//
	lg("stage3Wait.Wait() before")
	stage3Wait.Wait()
	lg("stage3Wait.Wait() after")

	// workers spin down earlier -
	// but ae log writer and response writer need some time
	// to record the spin-down messages
	time.Sleep(120 * time.Millisecond)

	// compile out directory statistics
	histoDir := map[string]int{}
	for _, a := range fullArticles {
		u, err := url.Parse(a.Url)
		lg(err)
		semanticUri := condenseTrailingDir(u.Path, config.CondenseTrailingDirs)
		dir := path.Dir(semanticUri)
		histoDir[dir]++
	}
	sr := sortmap.SortMapByCount(histoDir)
	_ = sr

	// Create dirs
	for k, _ := range histoDir {
		dir := path.Join(docRoot, k) // config.Host already contained in k
		err := fs.MkdirAll(dir, 0755)
		lg(err)
		err = fs.Chtimes(dir, time.Now(), time.Now())
		lg(err)
	}

	// Saving as files
	for _, a := range fullArticles {
		if len(a.Body) == 0 {
			continue
		}
		u, err := url.Parse(a.Url)
		u.Fragment = ""
		u.RawQuery = ""
		lg(err)
		semanticUri := condenseTrailingDir(u.RequestURI(), config.CondenseTrailingDirs)
		p := path.Join(docRoot, semanticUri)
		err = fs.WriteFile(p, a.Body, 0644)
		lg(err)
		err = fs.Chtimes(p, a.Mod, a.Mod)
		lg(err)
	}

	{
		b, err := json.MarshalIndent(histoDir, "  ", "\t")
		lg(err)
		fnDigest := path.Join(docRoot, config.Host, "fetchDigest.json")
		err = fs.WriteFile(fnDigest, b, 0755)
		lg(err)
	}

	// fsm, ok := memfs.Unwrap(fs)
	// if ok {
	// 	fsm.Dump()
	// }

}