示例#1
0
// Scrapping logic from the document
func DocumentScrap(jobId string, selector ScrapSelector, doc *goquery.Document, items chan ItemResult) {
	rdata := NewRedisScrapdata()

	defer func() {
		if r := recover(); r != nil {
			log.Printf("ERROR: DocumentScrap Panic applying selectors: '%v'", r)
			rdata.ScrapLogWrite("ERROR: bad CSS Selector, please review the syntax")
		}
	}()

	sel := doc.Find(selector.Base)
	for i := range sel.Nodes {
		s := sel.Eq(i)
		var err error
		item := model.Item{}
		item.ScrapUrl = selector.Url
		item.ScrapTags = selector.ScrapTags

		item.Link = SanitizeURL(item.ScrapUrl, extractText(s, selector.Link), selector.LinkPathLimit)
		item.Id, err = extractId(s, selector, item.Link)
		item.Image = SanitizeURL(item.ScrapUrl, extractText(s, selector.Image), 0)
		item.Title = extractText(s, selector.Title)
		item.Description = extractText(s, selector.Description)
		item.Price = extractFloat(s, selector.Price)
		item.Currency = extractCurrency(s, selector.Price)
		item.Stars = extractFloat(s, selector.Stars)
		item.Categories = extractText(s, selector.Categories)

		item.LastScrap = time.Now().Format(time.RFC3339)

		items <- ItemResult{
			JobId: jobId,
			Item:  item,
			Err:   err,
		}
	}

}