func (r Repository) SaveAllAuthors(authors []*entity.Author) error { tx := r.DB.Begin() for _, a := range authors { author := entity.Author{ID: a.ID} tx.Preload("Crawl").Preload("Images").FirstOrCreate(&author) author.Ordering = a.Ordering author.Name = a.Name author.Title = a.Title author.URL = a.URL for _, i := range a.Images { author.AddImage(i) } if author.Crawl.ID == 0 { author.Crawl = entity.Crawl{Next: time.Now()} } tx.Save(&author) } tx.Commit() return nil }
func (scraper Scraper) scrapeAuthor(a *entity.Author) error { log.Println(scraper.host + a.URL) doc, err := goquery.NewDocument(scraper.host + a.URL) if err != nil { return err } scraper.db.Preload("Images").Preload("Crawl").FirstOrCreate(&a) authorNode := doc.Find("main .island .author") imageNode := authorNode.Find("img.author__img") a.Biography = strings.TrimSpace(authorNode.Find("p").First().Text()) html, err := authorNode.Find("p.meta").Html() if err != nil { return err } a.SocialMedia = strings.TrimSpace(html) var images []entity.Image if imageNode.Length() > 0 { srcset, _ := imageNode.Attr("srcset") images, err = ParseAuthorImages(srcset) if err != nil { return err } } for _, i := range images { a.AddImage(i) } a.Crawl.Next = time.Now().Add(time.Duration(float64(rand.Intn(18000))+30*time.Minute.Seconds()) * time.Second) scraper.db.Save(&a) return nil }