func FetchAndDecodeJSON(r *http.Request, surl, knownProtocol string, lg loghttp.FuncBufUniv, fs fsi.FileSystem) []repo.FullArticle { fullURL := fmt.Sprintf("%s%s?%s=%s&cnt=%v&prot=%v", routes.AppHost(), routes.FetchSimilarURI, routes.URLParamKey, surl, numTotal-1, knownProtocol) // fullURL = fmt.Sprintf("%s%s?%s=%s&cnt=%v", r.URL.Host, repo.routes.FetchSimilarURI, // routes.URLParamKey, surl, numTotal-1) lg("lo fetching %v", fullURL) start := time.Now() fo := fetch.Options{} fo.URL = fullURL bJSON, inf, err := fetch.UrlGetter(r, fo) _ = inf lg(err) if err != nil { lg("msg %v", inf.Msg) return nil } if len(bJSON) == 0 { lg("empty bJSON") return nil } lg("\t\tfetch resp complete after %4.2v secs; %vkB", time.Now().Sub(start).Seconds(), len(bJSON)/1024) var mp map[string][]byte err = json.Unmarshal(bJSON, &mp) lg(err) if err != nil { if _, ok := mp["msg"]; ok { lg("%s", mp["msg"]) } else { lg("%s", bJSON) } return nil } smaxFound := string(mp["lensimilar"]) maxFound := util.Stoi(smaxFound) if maxFound < numTotal-1 { lg("not enough files returned by FetchSimilar 1 - mp[lensimilar] too small: %s", mp["lensimilar"]) return nil } least3Files := make([]repo.FullArticle, maxFound+1) _, ok1 := mp["url_self"] _, ok2 := mp["mod_self"] _, ok3 := mp["bod_self"] if ok1 && ok2 && ok3 { least3Files[0].Url = string(mp["url_self"]) least3Files[0].Mod, err = time.Parse(http.TimeFormat, string(mp["mod_self"])) lg(err) least3Files[0].Body = mp["bod_self"] if len(least3Files[0].Body) < 200 { if !bytes.Contains(least3Files[0].Body, []byte(fetch.MsgNoRdirects)) { lg("found base but its a redirect") return nil } } } lg("found base") for k, v := range mp { if k == "msg" { continue } if strings.HasSuffix(k, "self") { continue } if strings.HasPrefix(k, "url__") { sval := strings.TrimPrefix(k, "url__") val := util.Stoi(sval) // lg("%v %v %s", sval, val, v) least3Files[val+1].Url = string(v) } if strings.HasPrefix(k, "mod__") { sval := strings.TrimPrefix(k, "mod__") val := util.Stoi(sval) // lg("%v %v %s", sval, val, v) least3Files[val+1].Mod, err = time.Parse(http.TimeFormat, string(v)) lg(err) } if strings.HasPrefix(k, "bod__") { sval := strings.TrimPrefix(k, "bod__") val := util.Stoi(sval) least3Files[val+1].Body = v //html.EscapeString(string(v) } } lg("found %v similar; decoding complete after %4.2v secs", maxFound, time.Now().Sub(start).Seconds()) for _, v := range least3Files { lg("%v %v", v.Url, len(v.Body)) } return least3Files }
func fetchSimForm(w http.ResponseWriter, r *http.Request, m map[string]interface{}) { lg, b := loghttp.BuffLoggerUniversal(w, r) closureOverBuf := func(bUnused *bytes.Buffer) { loghttp.Pf(w, r, b.String()) } defer closureOverBuf(b) // the argument is ignored, r.Header.Set("X-Custom-Header-Counter", "nocounter") // on live server => always use https if r.URL.Scheme != "https" && !util_appengine.IsLocalEnviron() { r.URL.Scheme = "https" r.URL.Host = r.Host lg("lo - redirect %v", r.URL.String()) http.Redirect(w, r, r.URL.String(), http.StatusFound) } err := r.ParseForm() lg(err) rURL := "" if r.FormValue(routes.URLParamKey) != "" { rURL = r.FormValue(routes.URLParamKey) } if len(rURL) == 0 { wpf(b, tplx.ExecTplHelper(tplx.Head, map[string]interface{}{"HtmlTitle": "Find similar HTML URLs"})) defer wpf(b, tplx.Foot) tm := map[string]string{ "val": "www.welt.de/politik/ausland/article146154432/Tuerkische-Bodentruppen-marschieren-im-Nordirak-ein.html", "fieldname": routes.URLParamKey, } tplForm := tt.Must(tt.New("tplName01").Parse(htmlForm)) tplForm.Execute(b, tm) } else { fullURL := fmt.Sprintf("https://%s%s?%s=%s&cnt=%s&prot=%s", r.Host, routes.FetchSimilarURI, routes.URLParamKey, rURL, r.FormValue("cnt"), r.FormValue("prot")) lg("lo - sending to URL 1: %v", fullURL) fo := fetch.Options{} fo.URL = fullURL bts, inf, err := fetch.UrlGetter(r, fo) _ = inf lg(err) if err != nil { return } if len(bts) == 0 { lg("empty bts") return } var mp map[string][]byte err = json.Unmarshal(bts, &mp) lg(err) if err != nil { lg("%s", bts) return } w.Header().Set("Content-Type", "text/html; charset=utf-8") if _, ok := mp["msg"]; ok { w.Write(mp["msg"]) } for k, v := range mp { if k != "msg" { wpf(w, "<br><br>%s:\n", k) if true { wpf(w, "len %v", len(v)) } else { wpf(w, "%s", html.EscapeString(string(v))) } } } } }