Esempio n. 1
0
func (this *RSSFinder) FindUrl(response *api.FetchResponse) (result []api.FetchRequest, err error) {
	var temp Result
	err = xml.Unmarshal(response.GetBody(), &temp)
	result = make([]api.FetchRequest, 0)
	for _, value := range temp.Channel.Items {
		link := value.Link
		cur := api.BuildFetchRequestByStr(link, api.COMM_LINK)
		result = append(result, *cur)
	}
	return
}
Esempio n. 2
0
func (this *SimpleExecutor) Parse(response api.FetchResponse) (title string, content string, err error) {
	res := strings.NewReader(string(response.GetBody()))
	doc, err := goquery.NewDocumentFromReader(res)
	if nil != err {
		return "", "", err
	} else {
		content = doc.Find("body").Text()
		title = doc.Find("title").Text()
	}
	return
}
Esempio n. 3
0
func Test_Parse(tester *testing.T) {
	content, _ := ioutil.ReadFile("/Users/gaowei/workSpace/github/remote/coco/res/test/html")
	if len(content) == 0 {
		tester.Error("not read content")
	}
	response := new(api.FetchResponse)
	response.SetBody(content)
	cur := new(CommExecutor)
	_, text, _ := cur.Parse(*response)
	fmt.Println(text)

}
Esempio n. 4
0
func (this *CommExecutor) Parse(response api.FetchResponse) (title string, content string, err error) {
	html := string(response.GetBody())
	re := regexp.MustCompile("<!--[\\s\\S]*?-->")
	html = re.ReplaceAllString(html, "")

	res := strings.NewReader(html)
	doc, err := goquery.NewDocumentFromReader(res)
	if nil != err {
		return "", "", err
	} else {
		doc = this.cleanJunkSpan(doc)
		content = doc.Find("body").Text()
		title = doc.Find("title").Text()
	}
	return
}