func confirmPay(w http.ResponseWriter, r *http.Request, m map[string]interface{}) { /* http://abc.de/ef?input_transaction_hash=46178baf7de078954b5aebb71c12120b33d998faac1c165af195eae90f19b25c&shared=false&address=18tpXf8WWuhJP95JbDASbZvavmZJbrydut&destination_address=18tpXf8WWuhJP95JbDASbZvavmZJbrydut&input_address=1ZTnjSdknZvur9Gc73gvB8XBTWL7nV1m6&test=true&anonymous=false&confirmations=0&value=82493362&transaction_hash=46178baf7de078954b5aebb71c12120b33d998faac1c165af195eae90f19b25c */ lg, b := loghttp.BuffLoggerUniversal(w, r) closureOverBuf := func(bUnused *bytes.Buffer) { loghttp.Pf(w, r, b.String()) } defer closureOverBuf(b) // the argument is ignored, r.Header.Set("X-Custom-Header-Counter", "nocounter") wpf(b, tplx.ExecTplHelper(tplx.Head, map[string]interface{}{"HtmlTitle": "Payment confirmation"})) defer wpf(b, tplx.Foot) wpf(b, "<pre>") defer wpf(b, "</pre>") err := r.ParseForm() lg(err) custSecret := "" if r.FormValue("customsecret") != "" { custSecret = r.FormValue("customsecret") } lg("custom secret is %q", custSecret) val := "" if r.FormValue("value") != "" { val = r.FormValue("value") } lg("value is %q", val) }
// FetchSimilar is an extended version of Fetch // It is uses a DirTree of crawled *links*, not actual files. // As it moves up the DOM, it crawls every document for additional links. // It first moves up to find similar URLs on the same depth // /\ // /\ / \ // /\ / \ / \ // It then moves up the ladder again - to accept higher URLs // /\ // /\ // /\ func FetchSimilar(w http.ResponseWriter, r *http.Request, m map[string]interface{}) { lg, b := loghttp.BuffLoggerUniversal(w, r) closureOverBuf := func(bUnused *bytes.Buffer) { loghttp.Pf(w, r, b.String()) } defer closureOverBuf(b) // the argument is ignored, r.Header.Set("X-Custom-Header-Counter", "nocounter") start := time.Now() wpf(b, tplx.ExecTplHelper(tplx.Head, map[string]interface{}{"HtmlTitle": "Find similar HTML URLs"})) defer wpf(b, tplx.Foot) wpf(b, "<pre>") defer wpf(b, "</pre>") fs1 := GetFS(appengine.NewContext(r)) err := r.ParseForm() lg(err) countSimilar := 3 sCountSimilar := r.FormValue("cnt") if sCountSimilar != "" { i, err := strconv.Atoi(strings.TrimSpace(sCountSimilar)) if err == nil { countSimilar = i } } surl := r.FormValue(routes.URLParamKey) ourl, err := fetch.URLFromString(surl) lg(err) if err != nil { return } if ourl.Host == "" { lg("host is empty (%v)", surl) return } knownProtocol := "" if r.FormValue("prot") != "" { knownProtocol = r.FormValue("prot") } numWorkers := 0 sNumWorkers := r.FormValue("numworkers") if sNumWorkers != "" { i, err := strconv.Atoi(strings.TrimSpace(sNumWorkers)) if err == nil { numWorkers = i } } srcDepth := strings.Count(ourl.Path, "/") cmd := FetchCommand{} cmd.Host = ourl.Host cmd.SearchPrefix = ourl.Path cmd = addDefaults(cmd) dirTree := &DirTree{Name: "/", Dirs: map[string]DirTree{}, EndPoint: true} fnDigest := path.Join(docRoot, cmd.Host, "digest2.json") loadDigest(w, r, lg, fs1, fnDigest, dirTree) // previous lg("dirtree 400 chars is %v end of dirtree\t\t", stringspb.ToLen(dirTree.String(), 400)) m1 := new(MyWorker) m1.r = r m1.lg = lg m1.fs1 = fs1 m1.SURL = path.Join(cmd.Host, ourl.Path) m1.Protocol = knownProtocol btsSrc, modSrc, usedExisting, err := fetchSave(m1) if !usedExisting { addAnchors(lg, cmd.Host, btsSrc, dirTree) } lg(err) if err != nil { return } lg("\t\t%4.2v secs so far 1", time.Now().Sub(start).Seconds()) var treePath string treePath = "/blogs/freeexchange" treePath = "/news/europe" treePath = path.Dir(ourl.Path) opt := LevelWiseDeeperOptions{} opt.Rump = treePath opt.ExcludeDir = "/news/americas" opt.ExcludeDir = "/blogs/buttonwood" opt.ExcludeDir = "/something-impossible" opt.MinDepthDiff = 1 opt.MaxDepthDiff = 1 opt.CondenseTrailingDirs = cmd.CondenseTrailingDirs opt.MaxNumber = cmd.DesiredNumber + 1 // one more for "self" opt.MaxNumber = cmd.DesiredNumber + 40 // collect more, 'cause we filter out those too old later var subtree *DirTree links := []FullArticle{} alreadyCrawled := map[string]struct{}{} MarkOuter: for j := 0; j < srcDepth; j++ { treePath = path.Dir(ourl.Path) MarkInner: // for i := 1; i < srcDepth; i++ { for i := 1; i < (srcDepth + 5); i++ { subtree, treePath = DiveToDeepestMatch(dirTree, treePath) lg("Looking from height %v to level %v - %v", srcDepth-i, srcDepth-j, treePath) if _, ok := alreadyCrawled[treePath]; ok { // lg("\t already digested %v", treePath) continue } m2 := new(MyWorker) m2.r = r m2.lg = lg m2.fs1 = fs1 m2.SURL = path.Join(cmd.Host, treePath) m2.Protocol = knownProtocol btsPar, _, usedExisting, err := fetchSave(m2) lg(err) if err != nil { return } alreadyCrawled[treePath] = struct{}{} if !usedExisting { addAnchors(lg, cmd.Host, btsPar, dirTree) } if subtree == nil { lg("\n#%v treePath %q ; subtree is nil", i, treePath) } else { // lg("\n#%v treePath %q ; subtree exists", i, treePath) opt.Rump = treePath opt.MinDepthDiff = i - j opt.MaxDepthDiff = i - j lvlLinks := LevelWiseDeeper(nil, nil, subtree, opt) links = append(links, lvlLinks...) for _, art := range lvlLinks { _ = art // lg("#%v fnd %v", i, stringspb.ToLen(art.Url, 100)) } if len(links) >= opt.MaxNumber { lg("found enough links") break MarkOuter } pathPrev := treePath treePath = path.Dir(treePath) // lg("#%v bef %v - aft %v", i, pathPrev, treePath) if pathPrev == "." && treePath == "." || pathPrev == "/" && treePath == "/" || pathPrev == "" && treePath == "." { lg("break to innner") break MarkInner } } } } // // // // lg("%v links after %4.2v secs", len(links), time.Now().Sub(start).Seconds()) lg("============================") lg("Now reading/fetching actual similar files - not just the links") // tried := 0 selecteds := []FullArticle{} nonExisting := []FullArticle{} nonExistFetched := []FullArticle{} for _, art := range links { if art.Url == ourl.Path { lg("skipping self\t%v", art.Url) continue } tried++ useExisting := false semanticUri := condenseTrailingDir(art.Url, cmd.CondenseTrailingDirs) p := path.Join(docRoot, cmd.Host, semanticUri) f, err := fs1.Open(p) // lg(err) // its no error if file does not exist if err != nil { // lg("!nstore %q", semanticUri) } else { // lg("reading %q", semanticUri) // lets put this into a func, so that f.close it called at the end of this func // otherwise defer f.close() spans the entire func and prevents // overwrites chmods further down f := func() { defer f.Close() fi, err := f.Stat() lg(err) if err != nil { } else { age := time.Now().Sub(fi.ModTime()) if age.Hours() < 10 { lg("\t\tusing existing file with age %4.2v hrs", age.Hours()) art.Mod = fi.ModTime() bts, err := ioutil.ReadAll(f) lg(err) art.Body = bts if len(bts) < 200 { if bytes.Contains(bts, []byte(fetch.MsgNoRdirects)) { return } } selecteds = append(selecteds, art) useExisting = true } } } f() } if !useExisting { nonExisting = append(nonExisting, art) } if len(selecteds) >= countSimilar { break } } lg("============================") lg("tried %v links - yielding %v existing similars; not existing in datastore: %v, %v were requested.", tried, len(selecteds), len(nonExisting), countSimilar) if len(selecteds) < countSimilar { jobs := make([]distrib.Worker, 0, len(nonExisting)) for _, art := range nonExisting { surl := path.Join(cmd.Host, art.Url) wrkr := MyWorker{SURL: surl} wrkr.Protocol = knownProtocol wrkr.r = r wrkr.lg = lg wrkr.fs1 = fs1 job := distrib.Worker(&wrkr) jobs = append(jobs, job) } opt := distrib.NewDefaultOptions() opt.TimeOutDur = 3500 * time.Millisecond opt.Want = int32(countSimilar - len(selecteds) + 4) // get some more, in case we have "redirected" bodies opt.NumWorkers = int(opt.Want) // 5s query limit; => hurry; spawn as many as we want if numWorkers > 0 { opt.NumWorkers = numWorkers } lg("Preparing %v simultaneous, wanting %v fetches; at %4.2v secs.", opt.NumWorkers, opt.Want, time.Now().Sub(start).Seconds()) opt.CollectRemainder = false // 5s query limit; => hurry; dont wait for stragglers ret, msg := distrib.Distrib(jobs, opt) lg("Distrib returned at %4.2v secs with %v results.", time.Now().Sub(start).Seconds(), len(ret)) lg("\n" + msg.String()) for _, v := range ret { v1, _ := v.Worker.(*MyWorker) if v1.FA != nil { age := time.Now().Sub(v1.FA.Mod) if age.Hours() < 10 { lg("\t\tusing fetched file with age %4.2v hrs", age.Hours()) nonExistFetched = append(nonExistFetched, *v1.FA) if len(nonExistFetched) > (countSimilar - len(selecteds)) { break } } } if v1.err != nil { lg(err) } } lg("tried %v links - yielding %v fetched - jobs %v", len(nonExisting), len(nonExistFetched), len(jobs)) selecteds = append(selecteds, nonExistFetched...) // // // Extract links for _, v := range nonExistFetched { // lg("links -> memory dirtree for %q", v.Url) addAnchors(lg, cmd.Host, v.Body, dirTree) } } // if time.Now().Sub(dirTree.LastFound).Seconds() < 10 { lg("saving accumulated (new) links to digest") saveDigest(lg, fs1, fnDigest, dirTree) } lg("\t\t%4.2v secs so far 3", time.Now().Sub(start).Seconds()) mp := map[string][]byte{} mp["msg"] = b.Bytes() mp["url_self"] = []byte(condenseTrailingDir(ourl.Path, cmd.CondenseTrailingDirs)) mp["mod_self"] = []byte(modSrc.Format(http.TimeFormat)) mp["bod_self"] = btsSrc for i, v := range selecteds { mp["url__"+spf("%02v", i)] = []byte(v.Url) mp["mod__"+spf("%02v", i)] = []byte(v.Mod.Format(http.TimeFormat)) mp["bod__"+spf("%02v", i)] = v.Body } mp["lensimilar"] = []byte(spf("%02v", len(selecteds))) // smp, err := json.MarshalIndent(mp, "", "\t") if err != nil { lg(b, "marshalling mp to []byte failed\n") return } r.Header.Set("X-Custom-Header-Counter", "nocounter") w.Header().Set("Content-Type", "application/json") w.Write(smp) b.Reset() // this keeps the buf pointer intact; outgoing defers are still heeded b = new(bytes.Buffer) // creates a *new* buf pointer; outgoing defers write into the *old* buf lg("\t\t%4.2v secs so far 4 (json resp written as []byte)", time.Now().Sub(start).Seconds()) return }
// dedupHTTP wraps Dedup() func dedupHTTP(w http.ResponseWriter, r *http.Request, m map[string]interface{}) { lg, b := loghttp.BuffLoggerUniversal(w, r) closureOverBuf := func(bUnused *bytes.Buffer) { loghttp.Pf(w, r, b.String()) } defer closureOverBuf(b) // the argument is ignored, r.Header.Set("X-Custom-Header-Counter", "nocounter") wpf(b, tplx.ExecTplHelper(tplx.Head, map[string]interface{}{"HtmlTitle": "Deduplicating redundant stuff"})) defer wpf(b, tplx.Foot) wpf(b, "<pre>") defer wpf(b, "</pre>") err := r.ParseForm() lg(err) surl := r.FormValue(routes.URLParamKey) ourl, err := fetch.URLFromString(surl) lg(err) if err != nil { return } if ourl.Host == "" { lg("host is empty (%v)", surl) return } knownProtocol := "" if r.FormValue("prot") != "" { knownProtocol = r.FormValue("prot") } lg("Host %q, Path %q", ourl.Host, ourl.Path) fs := GetFS(appengine.NewContext(r), 0) least3Files := FetchAndDecodeJSON(r, ourl.String(), knownProtocol, lg, fs) lg("Fetched and decoded; found %v", len(least3Files)) if len(least3Files) > 0 { doc := Dedup(ourl, least3Files, lg, fs) fNamer := domclean2.FileNamer(logDir, 0) fNamer() // first call yields key fsPerm := GetFS(appengine.NewContext(r), 0) fileDump(lg, fsPerm, doc, fNamer, "_fin.html") lg("MapSimiliarCompares: %v SimpleCompares: %v LevenstheinComp: %v\n", breakMapsTooDistinct, appliedLevenshtein, appliedCompare) lg("Finish\n") var b2 bytes.Buffer err := html.Render(&b2, doc) lg(err) if err != nil { return } b = new(bytes.Buffer) // w.Write([]byte("aa")) w.Header().Set("Content-type", "text/html; charset=utf-8") w.Write(b2.Bytes()) } }
func requestPay(w http.ResponseWriter, r *http.Request, m map[string]interface{}) { lg, b := loghttp.BuffLoggerUniversal(w, r) closureOverBuf := func(bUnused *bytes.Buffer) { loghttp.Pf(w, r, b.String()) } defer closureOverBuf(b) // the argument is ignored, r.Header.Set("X-Custom-Header-Counter", "nocounter") protoc := "https://" if appengine.IsDevAppServer() { protoc = "http://" } host := appengine.DefaultVersionHostname(appengine.NewContext(r)) if appengine.IsDevAppServer() { host = "not-loclhost" } confirmURL := fmt.Sprintf("%v%v%v", protoc, host, uriConfirmPayment) confirmURL = url.QueryEscape(confirmURL) addrURL := fmt.Sprintf("https://%v/api/receive?method=create&address=%v&callback=%v&customsecret=49&api_code=%v", blockChainHost, bitCoinAddress, confirmURL, apiKey) req, err := http.NewRequest("GET", addrURL, nil) lg(err) if err != nil { return } bts, inf, err := fetch.UrlGetter(r, fetch.Options{Req: req}) bts = bytes.Replace(bts, []byte(`","`), []byte(`", "`), -1) if err != nil { lg(err) lg(inf.Msg) return } lg("response body 1:\n") lg("%s\n", string(bts)) lg("response body 2:\n") var data1 map[string]interface{} err = json.Unmarshal(bts, &data1) lg(err) lg(stringspb.IndentedDumpBytes(data1)) // lg("%#v", data1) inputAddress, ok := data1["input_address"].(string) if !ok { lg("input address could not be casted to string; is type %T", data1["input_address"]) return } feePercent, ok := data1["fee_percent"].(float64) if !ok { lg("fee percent could not be casted to float64; is type %T", data1["fee_percent"]) return } lg("Input Adress will be %q; fee percent will be %4.2v", inputAddress, feePercent) }
func fetchSimForm(w http.ResponseWriter, r *http.Request, m map[string]interface{}) { lg, b := loghttp.BuffLoggerUniversal(w, r) closureOverBuf := func(bUnused *bytes.Buffer) { loghttp.Pf(w, r, b.String()) } defer closureOverBuf(b) // the argument is ignored, r.Header.Set("X-Custom-Header-Counter", "nocounter") // on live server => always use https if r.URL.Scheme != "https" && !util_appengine.IsLocalEnviron() { r.URL.Scheme = "https" r.URL.Host = r.Host lg("lo - redirect %v", r.URL.String()) http.Redirect(w, r, r.URL.String(), http.StatusFound) } err := r.ParseForm() lg(err) rURL := "" if r.FormValue(routes.URLParamKey) != "" { rURL = r.FormValue(routes.URLParamKey) } if len(rURL) == 0 { wpf(b, tplx.ExecTplHelper(tplx.Head, map[string]interface{}{"HtmlTitle": "Find similar HTML URLs"})) defer wpf(b, tplx.Foot) tm := map[string]string{ "val": "www.welt.de/politik/ausland/article146154432/Tuerkische-Bodentruppen-marschieren-im-Nordirak-ein.html", "fieldname": routes.URLParamKey, } tplForm := tt.Must(tt.New("tplName01").Parse(htmlForm)) tplForm.Execute(b, tm) } else { fullURL := fmt.Sprintf("https://%s%s?%s=%s&cnt=%s&prot=%s", r.Host, routes.FetchSimilarURI, routes.URLParamKey, rURL, r.FormValue("cnt"), r.FormValue("prot")) lg("lo - sending to URL 1: %v", fullURL) fo := fetch.Options{} fo.URL = fullURL bts, inf, err := fetch.UrlGetter(r, fo) _ = inf lg(err) if err != nil { return } if len(bts) == 0 { lg("empty bts") return } var mp map[string][]byte err = json.Unmarshal(bts, &mp) lg(err) if err != nil { lg("%s", bts) return } w.Header().Set("Content-Type", "text/html; charset=utf-8") if _, ok := mp["msg"]; ok { w.Write(mp["msg"]) } for k, v := range mp { if k != "msg" { wpf(w, "<br><br>%s:\n", k) if true { wpf(w, "len %v", len(v)) } else { wpf(w, "%s", html.EscapeString(string(v))) } } } } }
// Fetch takes a RSS XML uri and fetches some of its documents. // It uses a three staged pipeline for parallel fetching. // Results are stored into the given filesystem fs. // Config points to the source of RSS XML, // and has some rules for conflating URI directories. // uriPrefix and config.DesiredNumber tell the func // which subdirs of the RSS dir should be fetched - and how many at max. func FetchUsingRSS(w http.ResponseWriter, r *http.Request, fs fsi.FileSystem, config FetchCommand, ) { lg, b := loghttp.BuffLoggerUniversal(w, r) closureOverBuf := func(bUnused *bytes.Buffer) { loghttp.Pf(w, r, b.String()) } defer closureOverBuf(b) // the argument is ignored, if config.Host == "" { lg(" empty host; returning") return } config = addDefaults(config) // Fetching the rssXML takes time. // We do it before the timouts of the pipeline stages are set off. lg(" ") lg(config.Host) if config.Host == "test.economist.com" { switchTData(w, r) } // lg(stringspb.IndentedDump(config)) dirTree := &DirTree{Name: "/", Dirs: map[string]DirTree{}, EndPoint: true} fnDigest := path.Join(docRoot, config.Host, "digest2.json") loadDigest(w, r, lg, fs, fnDigest, dirTree) // previous age := time.Now().Sub(dirTree.LastFound) lg("DirTree is %5.2v hours old (%v)", age.Hours(), dirTree.LastFound.Format(time.ANSIC)) if age.Hours() > 0.001 { rssUrl := matchingRSSURI(w, r, config) if rssUrl == "" { m := new(MyWorker) m.r = r m.lg = lg m.fs1 = fs m.SURL = path.Join(config.Host, config.SearchPrefix) _, _, _, err := fetchSave(m) lg(err) if err != nil { return } } else { rssUrl = path.Join(config.Host, rssUrl) rssDoc, rssUrlObj := rssXMLFile(w, r, fs, rssUrl) _ = rssUrlObj rssDoc2DirTree(w, r, dirTree, rssDoc, config.Host) } saveDigest(lg, fs, fnDigest, dirTree) } // lg(dirTree.String()) // // // setting up a 3 staged pipeline from bottom up // var fullArticles []FullArticle var inn chan *FullArticle = make(chan *FullArticle) // jobs are stuffed in here var out chan *FullArticle = make(chan *FullArticle) // completed jobs are delivered here var fin chan struct{} = make(chan struct{}) // downstream signals end to upstream var stage3Wait sync.WaitGroup // stage 3 // fire up the "collector", a fan-in go func() { stage3Wait.Add(1) // 400 good value; critical point at 35 // economist.com required 800 ms const delayInitial = 1200 const delayRefresh = 800 cout := time.After(time.Millisecond * delayInitial) for { select { case fa := <-out: fullArticles = append(fullArticles, *fa) pth := fetch.PathFromStringUrl(fa.Url) lg(" fetched %v - %v ", fa.Mod.Format("15:04:05"), stringspb.Ellipsoider(pth, 50)) cout = time.After(time.Millisecond * delayRefresh) // refresh timeout case <-cout: lg("timeout after %v articles", len(fullArticles)) // we are using channel == nil - channel closed combinations // inspired by http://dave.cheney.net/2013/04/30/curious-channels out = nil // not close(out) => case above is now blocked close(fin) lg("fin closed; out nilled") stage3Wait.Done() return } } }() // // stage 2 for i := 0; i < numWorkers; i++ { // fire up a dedicated fetcher routine, a worker // we are using channel == nil - channel closed combinations // inspired by http://dave.cheney.net/2013/04/30/curious-channels go func() { var a *FullArticle for { select { case a = <-inn: var err error var inf fetch.Info a.Body, inf, err = fetch.UrlGetter(r, fetch.Options{URL: a.Url}) lg(err) if a.Mod.IsZero() { a.Mod = inf.Mod } select { case out <- a: case <-fin: lg(" worker spinning down; branch 1; abandoning %v", a.Url) return } a = new(FullArticle) case <-fin: if a != nil && a.Url != "" { u, _ := url.Parse(a.Url) lg(" abandoned %v", u.Path) } else { lg(" worker spinning down; branch 2") } return } } }() } // // // // loading stage 1 uriPrefix := config.SearchPrefix found := 0 uriPrefixExcl := "impossible" for i := 0; i < 15; i++ { lg(" searching for prefix %v - excl %q - %v of %v", uriPrefix, uriPrefixExcl, found, config.DesiredNumber) found += stuffStage1(w, r, config, inn, fin, dirTree, uriPrefixExcl, uriPrefix, config.DesiredNumber-found) if found >= config.DesiredNumber { break } if uriPrefix == "/" || uriPrefix == "." { lg(" root exhausted") break } newPrefix := path.Dir(uriPrefix) uriPrefixExcl = uriPrefix uriPrefix = newPrefix } lg(" found %v of %v", found, config.DesiredNumber) // lg("stage3Wait.Wait() before") stage3Wait.Wait() lg("stage3Wait.Wait() after") // workers spin down earlier - // but ae log writer and response writer need some time // to record the spin-down messages time.Sleep(120 * time.Millisecond) // compile out directory statistics histoDir := map[string]int{} for _, a := range fullArticles { u, err := url.Parse(a.Url) lg(err) semanticUri := condenseTrailingDir(u.Path, config.CondenseTrailingDirs) dir := path.Dir(semanticUri) histoDir[dir]++ } sr := sortmap.SortMapByCount(histoDir) _ = sr // Create dirs for k, _ := range histoDir { dir := path.Join(docRoot, k) // config.Host already contained in k err := fs.MkdirAll(dir, 0755) lg(err) err = fs.Chtimes(dir, time.Now(), time.Now()) lg(err) } // Saving as files for _, a := range fullArticles { if len(a.Body) == 0 { continue } u, err := url.Parse(a.Url) u.Fragment = "" u.RawQuery = "" lg(err) semanticUri := condenseTrailingDir(u.RequestURI(), config.CondenseTrailingDirs) p := path.Join(docRoot, semanticUri) err = fs.WriteFile(p, a.Body, 0644) lg(err) err = fs.Chtimes(p, a.Mod, a.Mod) lg(err) } { b, err := json.MarshalIndent(histoDir, " ", "\t") lg(err) fnDigest := path.Join(docRoot, config.Host, "fetchDigest.json") err = fs.WriteFile(fnDigest, b, 0755) lg(err) } // fsm, ok := memfs.Unwrap(fs) // if ok { // fsm.Dump() // } }