func (b baiduNews) commonPrase(ctx *Context) (infoStr string) { body := ctx.GetDom().Find("body") var info *goquery.Selection if h1s := body.Find("h1"); len(h1s.Nodes) != 0 { for i := 0; i < len(h1s.Nodes); i++ { info = b.findP(h1s.Eq(i)) } } else if h2s := body.Find("h2"); len(h2s.Nodes) != 0 { for i := 0; i < len(h2s.Nodes); i++ { info = b.findP(h2s.Eq(i)) } } else if h3s := body.Find("h3"); len(h3s.Nodes) != 0 { for i := 0; i < len(h3s.Nodes); i++ { info = b.findP(h3s.Eq(i)) } } else { info = body.Find("body") } infoStr, _ = info.Html() // 清洗HTML infoStr = CleanHtml(infoStr, 5) return }
func (b baiduNews) commonPrase(resp *context.Response) (infoStr string) { body := resp.GetDom().Find("body") var info *goquery.Selection if h1s := body.Find("h1"); len(h1s.Nodes) != 0 { for i := 0; i < len(h1s.Nodes); i++ { info = b.findP(h1s.Eq(i)) } } else if h2s := body.Find("h2"); len(h2s.Nodes) != 0 { for i := 0; i < len(h2s.Nodes); i++ { info = b.findP(h2s.Eq(i)) } } else if h3s := body.Find("h3"); len(h3s.Nodes) != 0 { for i := 0; i < len(h3s.Nodes); i++ { info = b.findP(h3s.Eq(i)) } } else { info = body.Find("body") } // 去除标签 // info.RemoveFiltered("script") // info.RemoveFiltered("style") infoStr, _ = info.Html() // 清洗HTML infoStr = CleanHtml(infoStr, 5) return }
func (rc *TwitterChecker) findSigInTweet(h SigHint, s *goquery.Selection) ProofError { inside := s.Text() html, err := s.Html() checkText := h.checkText if err != nil { return NewProofError(keybase1.ProofStatus_CONTENT_FAILURE, "No HTML tweet found: %s", err) } G.Log.Debug("+ Checking tweet '%s' for signature '%s'", inside, checkText) G.Log.Debug("| HTML is: %s", html) rxx := regexp.MustCompile(`^(@[a-zA-Z0-9_-]+\s+)`) for { if m := rxx.FindStringSubmatchIndex(inside); m == nil { break } else { prefix := inside[m[2]:m[3]] inside = inside[m[3]:] G.Log.Debug("| Stripping off @prefx: %s", prefix) } } if strings.HasPrefix(inside, checkText) { return nil } return NewProofError(keybase1.ProofStatus_DELETED, "Could not find '%s' in '%s'", checkText, inside) }
func convertOrderedListSelection(sel *goquery.Selection) { handleNestedList(sel) setCounter := getListStartCounter(sel) indentBeginEnd := strings.Repeat("\t", NEST_DEPTH-1) text, _ := sel.Html() left := indentBeginEnd + "\\begin{enumerate}\n" + setCounter + "\t\\itemsep0em" right := indentBeginEnd + "\\end{enumerate}" sel.ReplaceWithHtml(wrap(text, left, right)) }
func convertUnorderedListSelection(sel *goquery.Selection) { handleNestedList(sel) setCounter := getListStartCounter(sel) // indentItems := strings.Repeat("\t", NEST_DEPTH) indentBeginEnd := strings.Repeat("\t", NEST_DEPTH-1) text, _ := sel.Html() //text = strdel.LeadingSpaces(text) left := indentBeginEnd + "\\begin{itemize}\n" + setCounter + "\t\\itemsep0em" right := indentBeginEnd + "\\end{itemize}" sel.ReplaceWithHtml(wrap(text, left, right)) }
func printSelectionHtmlWithTitle(title string, sel *goquery.Selection) { html, _ := sel.Html() Debug("%v selection: %v", title, html) }
func printSelectionHtml(sel *goquery.Selection) { html, _ := sel.Html() Debug("selection: %v", html) }
} } else if h2s := query.Find("h2"); len(h2s.Nodes) != 0 { for i := 0; i < len(h2s.Nodes); i++ { info = findP(h2s.Eq(i)) } } else if h3s := query.Find("h3"); len(h3s.Nodes) != 0 { for i := 0; i < len(h3s.Nodes); i++ { info = findP(h3s.Eq(i)) } } else { info = query.Find("body") } // 去除标签 // info.RemoveFiltered("script") // info.RemoveFiltered("style") infoStr, _ := info.Html() // 清洗HTML infoStr = CleanHtml(infoStr, 5) // 结果存入Response中转 resp.AddItem(map[string]interface{}{ self.GetOutFeild(resp, 0): title, self.GetOutFeild(resp, 1): resp.GetTemp("description"), self.GetOutFeild(resp, 2): infoStr, self.GetOutFeild(resp, 3): resp.GetTemp("releaseTime"), self.GetOutFeild(resp, 4): resp.GetTemp("src"), self.GetOutFeild(resp, 5): resp.GetTemp("author"), }) }, },