예제 #1
0
func (b baiduNews) commonPrase(ctx *Context) (infoStr string) {
	body := ctx.GetDom().Find("body")

	var info *goquery.Selection

	if h1s := body.Find("h1"); len(h1s.Nodes) != 0 {
		for i := 0; i < len(h1s.Nodes); i++ {
			info = b.findP(h1s.Eq(i))
		}
	} else if h2s := body.Find("h2"); len(h2s.Nodes) != 0 {
		for i := 0; i < len(h2s.Nodes); i++ {
			info = b.findP(h2s.Eq(i))
		}
	} else if h3s := body.Find("h3"); len(h3s.Nodes) != 0 {
		for i := 0; i < len(h3s.Nodes); i++ {
			info = b.findP(h3s.Eq(i))
		}
	} else {
		info = body.Find("body")
	}
	infoStr, _ = info.Html()

	// 清洗HTML
	infoStr = CleanHtml(infoStr, 5)
	return
}
예제 #2
0
func (b baiduNews) commonPrase(resp *context.Response) (infoStr string) {
	body := resp.GetDom().Find("body")

	var info *goquery.Selection

	if h1s := body.Find("h1"); len(h1s.Nodes) != 0 {
		for i := 0; i < len(h1s.Nodes); i++ {
			info = b.findP(h1s.Eq(i))
		}
	} else if h2s := body.Find("h2"); len(h2s.Nodes) != 0 {
		for i := 0; i < len(h2s.Nodes); i++ {
			info = b.findP(h2s.Eq(i))
		}
	} else if h3s := body.Find("h3"); len(h3s.Nodes) != 0 {
		for i := 0; i < len(h3s.Nodes); i++ {
			info = b.findP(h3s.Eq(i))
		}
	} else {
		info = body.Find("body")
	}
	// 去除标签
	// info.RemoveFiltered("script")
	// info.RemoveFiltered("style")
	infoStr, _ = info.Html()

	// 清洗HTML
	infoStr = CleanHtml(infoStr, 5)
	return
}
예제 #3
0
func (rc *TwitterChecker) findSigInTweet(h SigHint, s *goquery.Selection) ProofError {

	inside := s.Text()
	html, err := s.Html()

	checkText := h.checkText

	if err != nil {
		return NewProofError(keybase1.ProofStatus_CONTENT_FAILURE, "No HTML tweet found: %s", err)
	}

	G.Log.Debug("+ Checking tweet '%s' for signature '%s'", inside, checkText)
	G.Log.Debug("| HTML is: %s", html)

	rxx := regexp.MustCompile(`^(@[a-zA-Z0-9_-]+\s+)`)
	for {
		if m := rxx.FindStringSubmatchIndex(inside); m == nil {
			break
		} else {
			prefix := inside[m[2]:m[3]]
			inside = inside[m[3]:]
			G.Log.Debug("| Stripping off @prefx: %s", prefix)
		}
	}
	if strings.HasPrefix(inside, checkText) {
		return nil
	}

	return NewProofError(keybase1.ProofStatus_DELETED, "Could not find '%s' in '%s'",
		checkText, inside)
}
예제 #4
0
파일: list.go 프로젝트: frankMilde/rol
func convertOrderedListSelection(sel *goquery.Selection) {
	handleNestedList(sel)
	setCounter := getListStartCounter(sel)

	indentBeginEnd := strings.Repeat("\t", NEST_DEPTH-1)
	text, _ := sel.Html()
	left := indentBeginEnd + "\\begin{enumerate}\n" + setCounter + "\t\\itemsep0em"
	right := indentBeginEnd + "\\end{enumerate}"
	sel.ReplaceWithHtml(wrap(text, left, right))
}
예제 #5
0
파일: list.go 프로젝트: frankMilde/rol
func convertUnorderedListSelection(sel *goquery.Selection) {
	handleNestedList(sel)
	setCounter := getListStartCounter(sel)

	//	indentItems := strings.Repeat("\t", NEST_DEPTH)
	indentBeginEnd := strings.Repeat("\t", NEST_DEPTH-1)

	text, _ := sel.Html()
	//text = strdel.LeadingSpaces(text)
	left := indentBeginEnd + "\\begin{itemize}\n" + setCounter + "\t\\itemsep0em"
	right := indentBeginEnd + "\\end{itemize}"
	sel.ReplaceWithHtml(wrap(text, left, right))
}
예제 #6
0
파일: html2tex.go 프로젝트: frankMilde/rol
func printSelectionHtmlWithTitle(title string, sel *goquery.Selection) {
	html, _ := sel.Html()
	Debug("%v selection: %v", title, html)
}
예제 #7
0
파일: html2tex.go 프로젝트: frankMilde/rol
func printSelectionHtml(sel *goquery.Selection) {
	html, _ := sel.Html()
	Debug("selection: %v", html)
}
예제 #8
0
파일: baidunews.go 프로젝트: zydudu/pholcus
						}
					} else if h2s := query.Find("h2"); len(h2s.Nodes) != 0 {
						for i := 0; i < len(h2s.Nodes); i++ {
							info = findP(h2s.Eq(i))
						}
					} else if h3s := query.Find("h3"); len(h3s.Nodes) != 0 {
						for i := 0; i < len(h3s.Nodes); i++ {
							info = findP(h3s.Eq(i))
						}
					} else {
						info = query.Find("body")
					}
					// 去除标签
					// info.RemoveFiltered("script")
					// info.RemoveFiltered("style")
					infoStr, _ := info.Html()

					// 清洗HTML
					infoStr = CleanHtml(infoStr, 5)

					// 结果存入Response中转
					resp.AddItem(map[string]interface{}{
						self.GetOutFeild(resp, 0): title,
						self.GetOutFeild(resp, 1): resp.GetTemp("description"),
						self.GetOutFeild(resp, 2): infoStr,
						self.GetOutFeild(resp, 3): resp.GetTemp("releaseTime"),
						self.GetOutFeild(resp, 4): resp.GetTemp("src"),
						self.GetOutFeild(resp, 5): resp.GetTemp("author"),
					})
				},
			},