Beispiel #1
0
func FetchAndDecodeJSON(r *http.Request, surl, knownProtocol string, lg loghttp.FuncBufUniv, fs fsi.FileSystem) []repo.FullArticle {

	fullURL := fmt.Sprintf("%s%s?%s=%s&cnt=%v&prot=%v", routes.AppHost(), routes.FetchSimilarURI,
		routes.URLParamKey, surl, numTotal-1, knownProtocol)

	// fullURL = fmt.Sprintf("%s%s?%s=%s&cnt=%v", r.URL.Host, repo.routes.FetchSimilarURI,
	// 	routes.URLParamKey, surl, numTotal-1)

	lg("lo fetching %v", fullURL)
	start := time.Now()

	fo := fetch.Options{}
	fo.URL = fullURL
	bJSON, inf, err := fetch.UrlGetter(r, fo)
	_ = inf
	lg(err)
	if err != nil {
		lg("msg %v", inf.Msg)
		return nil
	}
	if len(bJSON) == 0 {
		lg("empty bJSON")
		return nil
	}

	lg("\t\tfetch resp complete after %4.2v secs; %vkB", time.Now().Sub(start).Seconds(), len(bJSON)/1024)

	var mp map[string][]byte
	err = json.Unmarshal(bJSON, &mp)
	lg(err)
	if err != nil {
		if _, ok := mp["msg"]; ok {
			lg("%s", mp["msg"])
		} else {
			lg("%s", bJSON)
		}
		return nil
	}

	smaxFound := string(mp["lensimilar"])
	maxFound := util.Stoi(smaxFound)
	if maxFound < numTotal-1 {
		lg("not enough files returned by FetchSimilar 1 - mp[lensimilar] too small: %s", mp["lensimilar"])
		return nil
	}
	least3Files := make([]repo.FullArticle, maxFound+1)

	_, ok1 := mp["url_self"]
	_, ok2 := mp["mod_self"]
	_, ok3 := mp["bod_self"]
	if ok1 && ok2 && ok3 {
		least3Files[0].Url = string(mp["url_self"])
		least3Files[0].Mod, err = time.Parse(http.TimeFormat, string(mp["mod_self"]))
		lg(err)
		least3Files[0].Body = mp["bod_self"]
		if len(least3Files[0].Body) < 200 {
			if !bytes.Contains(least3Files[0].Body, []byte(fetch.MsgNoRdirects)) {
				lg("found base but its a redirect")
				return nil
			}
		}
	}
	lg("found base")

	for k, v := range mp {
		if k == "msg" {
			continue
		}
		if strings.HasSuffix(k, "self") {
			continue
		}

		if strings.HasPrefix(k, "url__") {
			sval := strings.TrimPrefix(k, "url__")
			val := util.Stoi(sval)
			// lg("%v %v %s", sval, val, v)
			least3Files[val+1].Url = string(v)
		}
		if strings.HasPrefix(k, "mod__") {
			sval := strings.TrimPrefix(k, "mod__")
			val := util.Stoi(sval)
			// lg("%v %v %s", sval, val, v)
			least3Files[val+1].Mod, err = time.Parse(http.TimeFormat, string(v))
			lg(err)
		}

		if strings.HasPrefix(k, "bod__") {
			sval := strings.TrimPrefix(k, "bod__")
			val := util.Stoi(sval)
			least3Files[val+1].Body = v //html.EscapeString(string(v)
		}

	}

	lg("found %v similar; decoding complete after %4.2v secs", maxFound, time.Now().Sub(start).Seconds())

	for _, v := range least3Files {
		lg("%v %v", v.Url, len(v.Body))
	}

	return least3Files

}
func fetchSimForm(w http.ResponseWriter, r *http.Request, m map[string]interface{}) {

	lg, b := loghttp.BuffLoggerUniversal(w, r)
	closureOverBuf := func(bUnused *bytes.Buffer) {
		loghttp.Pf(w, r, b.String())
	}
	defer closureOverBuf(b) // the argument is ignored,

	r.Header.Set("X-Custom-Header-Counter", "nocounter")

	// on live server => always use https
	if r.URL.Scheme != "https" && !util_appengine.IsLocalEnviron() {
		r.URL.Scheme = "https"
		r.URL.Host = r.Host
		lg("lo - redirect %v", r.URL.String())
		http.Redirect(w, r, r.URL.String(), http.StatusFound)
	}

	err := r.ParseForm()
	lg(err)

	rURL := ""
	if r.FormValue(routes.URLParamKey) != "" {
		rURL = r.FormValue(routes.URLParamKey)
	}
	if len(rURL) == 0 {

		wpf(b, tplx.ExecTplHelper(tplx.Head, map[string]interface{}{"HtmlTitle": "Find similar HTML URLs"}))
		defer wpf(b, tplx.Foot)

		tm := map[string]string{
			"val":       "www.welt.de/politik/ausland/article146154432/Tuerkische-Bodentruppen-marschieren-im-Nordirak-ein.html",
			"fieldname": routes.URLParamKey,
		}
		tplForm := tt.Must(tt.New("tplName01").Parse(htmlForm))
		tplForm.Execute(b, tm)

	} else {

		fullURL := fmt.Sprintf("https://%s%s?%s=%s&cnt=%s&prot=%s", r.Host, routes.FetchSimilarURI,
			routes.URLParamKey, rURL, r.FormValue("cnt"), r.FormValue("prot"))
		lg("lo - sending to URL 1: %v", fullURL)

		fo := fetch.Options{}
		fo.URL = fullURL
		bts, inf, err := fetch.UrlGetter(r, fo)
		_ = inf
		lg(err)
		if err != nil {
			return
		}

		if len(bts) == 0 {
			lg("empty bts")
			return
		}

		var mp map[string][]byte
		err = json.Unmarshal(bts, &mp)
		lg(err)
		if err != nil {
			lg("%s", bts)
			return
		}

		w.Header().Set("Content-Type", "text/html; charset=utf-8")
		if _, ok := mp["msg"]; ok {
			w.Write(mp["msg"])
		}

		for k, v := range mp {
			if k != "msg" {
				wpf(w, "<br><br>%s:\n", k)
				if true {
					wpf(w, "len %v", len(v))
				} else {
					wpf(w, "%s", html.EscapeString(string(v)))
				}
			}
		}

	}

}