func (this *RSSFinder) FindUrl(response *api.FetchResponse) (result []api.FetchRequest, err error) { var temp Result err = xml.Unmarshal(response.GetBody(), &temp) result = make([]api.FetchRequest, 0) for _, value := range temp.Channel.Items { link := value.Link cur := api.BuildFetchRequestByStr(link, api.COMM_LINK) result = append(result, *cur) } return }
func (this *SimpleExecutor) Parse(response api.FetchResponse) (title string, content string, err error) { res := strings.NewReader(string(response.GetBody())) doc, err := goquery.NewDocumentFromReader(res) if nil != err { return "", "", err } else { content = doc.Find("body").Text() title = doc.Find("title").Text() } return }
func Test_Parse(tester *testing.T) { content, _ := ioutil.ReadFile("/Users/gaowei/workSpace/github/remote/coco/res/test/html") if len(content) == 0 { tester.Error("not read content") } response := new(api.FetchResponse) response.SetBody(content) cur := new(CommExecutor) _, text, _ := cur.Parse(*response) fmt.Println(text) }
func (this *CommExecutor) Parse(response api.FetchResponse) (title string, content string, err error) { html := string(response.GetBody()) re := regexp.MustCompile("<!--[\\s\\S]*?-->") html = re.ReplaceAllString(html, "") res := strings.NewReader(html) doc, err := goquery.NewDocumentFromReader(res) if nil != err { return "", "", err } else { doc = this.cleanJunkSpan(doc) content = doc.Find("body").Text() title = doc.Find("title").Text() } return }