Example #1
0
func (r Repository) SaveAllAuthors(authors []*entity.Author) error {
	tx := r.DB.Begin()
	for _, a := range authors {
		author := entity.Author{ID: a.ID}
		tx.Preload("Crawl").Preload("Images").FirstOrCreate(&author)

		author.Ordering = a.Ordering
		author.Name = a.Name
		author.Title = a.Title
		author.URL = a.URL

		for _, i := range a.Images {
			author.AddImage(i)
		}

		if author.Crawl.ID == 0 {
			author.Crawl = entity.Crawl{Next: time.Now()}
		}

		tx.Save(&author)
	}
	tx.Commit()

	return nil
}
Example #2
0
func (scraper Scraper) scrapeAuthor(a *entity.Author) error {
	log.Println(scraper.host + a.URL)
	doc, err := goquery.NewDocument(scraper.host + a.URL)
	if err != nil {
		return err
	}

	scraper.db.Preload("Images").Preload("Crawl").FirstOrCreate(&a)

	authorNode := doc.Find("main .island .author")
	imageNode := authorNode.Find("img.author__img")

	a.Biography = strings.TrimSpace(authorNode.Find("p").First().Text())

	html, err := authorNode.Find("p.meta").Html()
	if err != nil {
		return err
	}

	a.SocialMedia = strings.TrimSpace(html)
	var images []entity.Image
	if imageNode.Length() > 0 {
		srcset, _ := imageNode.Attr("srcset")
		images, err = ParseAuthorImages(srcset)
		if err != nil {
			return err
		}
	}

	for _, i := range images {
		a.AddImage(i)
	}

	a.Crawl.Next = time.Now().Add(time.Duration(float64(rand.Intn(18000))+30*time.Minute.Seconds()) * time.Second)
	scraper.db.Save(&a)

	return nil
}