func (this *SimpleExecutor) Parse(response api.FetchResponse) (title string, content string, err error) { res := strings.NewReader(string(response.GetBody())) doc, err := goquery.NewDocumentFromReader(res) if nil != err { return "", "", err } else { content = doc.Find("body").Text() title = doc.Find("title").Text() } return }
func (this *RSSFinder) FindUrl(response *api.FetchResponse) (result []api.FetchRequest, err error) { var temp Result err = xml.Unmarshal(response.GetBody(), &temp) result = make([]api.FetchRequest, 0) for _, value := range temp.Channel.Items { link := value.Link cur := api.BuildFetchRequestByStr(link, api.COMM_LINK) result = append(result, *cur) } return }
func (this *CommExecutor) Parse(response api.FetchResponse) (title string, content string, err error) { html := string(response.GetBody()) re := regexp.MustCompile("<!--[\\s\\S]*?-->") html = re.ReplaceAllString(html, "") res := strings.NewReader(html) doc, err := goquery.NewDocumentFromReader(res) if nil != err { return "", "", err } else { doc = this.cleanJunkSpan(doc) content = doc.Find("body").Text() title = doc.Find("title").Text() } return }