예제 #1
0
파일: tokenize.go 프로젝트: aarzilli/tools
// src http://golang-examples.tumblr.com/page/2
func decomposeHtml(r io.Reader) {

	// type Token struct {
	//     Type     TokenType
	//     DataAtom atom.Atom
	//     Data     string
	//     Attr     []Attribute
	// }
	// type Attribute struct {
	//     Namespace, Key, Val string
	// }

	skip := map[string]string{
		"meta":       "skip",
		"html":       "skip",
		"head":       "skip",
		"title":      "skip",
		"body":       "skip",
		"link":       "skip",
		"script":     "skip",
		"noscript":   "skip",
		"----------": "skip",
		"iframe":     "skip",
		"nav":        "skip",
		"form":       "skip",
	}
	histogram := map[string]interface{}{}

	d := html.NewTokenizer(r)
	cntrErr := 0
	cntrTkn := 0
	for {
		tokenType := d.Next()
		cntrTkn++

		if tokenType == html.ErrorToken {
			pf("#%v err ", cntrTkn)
			cntrErr++
			if cntrErr > 5 {
				break
			}
			continue
		}

		token := d.Token()
		cntrErr = 0
		s1 := strings.TrimSpace(spf(" %#v", token))
		s2 := strings.TrimSpace(string(token.Data))
		s3 := string(token.DataAtom)
		_, _, _ = s1, s2, s3

		switch tokenType {
		case html.StartTagToken, html.SelfClosingTagToken:
			if _, ok := skip[s2]; !ok {
				pf("\n%v ", s2)
				if _, ok := histogram[s2]; !ok {
					histogram[s2] = 1
				} else {
					val := histogram[s2].(int)
					histogram[s2] = val + 1
				}
			}
		case html.TextToken:
			if s2 != "" && len(s2) > 1 && !strings.HasPrefix(s2, `//`) {
				s2 = strings.Replace(s2, "\n", "", -1)
				pf("\t%v", stringspb.Ellipsoider(s2, 22))
			}
		case html.EndTagToken: // </tag>
			// pf("/%v ", s2)
		case html.CommentToken:
			// pf("comment ")
		case html.DoctypeToken:

		default:
			pf("default case %v\n", s1)
		}
	}

	hSort := sortmap.StringKeysToSortedArray(histogram)

	pf("\n\n")
	for _, v := range hSort {
		pf("%10s %4v\n", v, histogram[v])
	}

}
예제 #2
0
파일: get_data.go 프로젝트: aarzilli/tools
func regroupFromDatastore02(w http.ResponseWriter, r *http.Request, m map[string]interface{}) {

	c := appengine.NewContext(r)

	b1 := new(bytes.Buffer)
	defer func() {
		w.Header().Set("Content-Type", "text/html")
		w.Write(b1.Bytes())
	}()

	var vVSrc [][]byte
	dsObj1, err := dsu.BufGet(c, "dsu.WrapBlob__res_processed_01")
	loghttp.E(w, r, err, false)
	vVSrc = dsObj1.VVByte

	d := make(map[string]map[string]float64)

	distinctLangs := make(map[string]interface{})
	distinctPeriods := make(map[string]interface{})
	f_max := 0.0
	for i0 := 0; i0 < len(vVSrc); i0++ {
		//vVDest[i0] = []byte( b_row.Bytes() )
		s_row := string(vVSrc[i0])
		v_row := stringspb.SplitByWhitespace(s_row)

		lang := v_row[0]
		period := v_row[1]
		count := v_row[2]
		fCount := util.Stof(count)
		if fCount > f_max {
			f_max = fCount
		}

		distinctLangs[lang] = 1
		distinctPeriods[period] = 1

		if _, ok := d[period]; !ok {
			d[period] = map[string]float64{}
		}
		d[period][lang] = fCount

	}
	//fmt.Fprintf(w,"%#v\n",d2)
	//fmt.Fprintf(w,"%#v\n",f_max)

	sortedPeriods := sortmap.StringKeysToSortedArray(distinctPeriods)
	sortedLangs := sortmap.StringKeysToSortedArray(distinctLangs)

	cd := CData{}
	_ = cd

	cd.M = d
	cd.VPeriods = sortedPeriods
	cd.VLangs = sortedLangs
	cd.F_max = f_max

	SaveChartDataToDatastore(w, r, cd, "chart_data_01")

	/*
		if r.FormValue("f") == "table" {
			showAsTable(w,r,cd)
		} else {
			showAsChart(w,r,cd)
		}
	*/

}