func AddUpdatableChilds(htmlpost *post.Post, dbpost *post.Post, dbmap *gorp.DbMap) (updateNeeded bool, err error) { // Check if there are aleady comments in dbpost // If not get them from the database if len(dbpost.Comments) == 0 { pk := dbpost.Id if pk == 0 { err = errors.New("primary key not set in dbpost") return } var res interface{} res, err = dbmap.GetWithChilds(post.Post{}, pk) if err != nil { err = errors.New("get failed: " + err.Error()) return } if res == nil { err = errors.New(fmt.Sprintf("Get post for id %d did not return any rows ", pk)) return } dbpost := res.(*post.Post) if DebugLevel > 3 { // Print out the update info fmt.Println("----------- DB POST -----------------") fmt.Println(dbpost.String("CHECK DB: ")) fmt.Println("----------- DB POST END -------------------") } } if DebugLevel > 3 { // Print out the update info fmt.Println("----------- HTML POST -----------------") fmt.Println(htmlpost.String("CHECK HTML: ")) fmt.Println("----------- HTML POST END -------------------") } updateNeeded = htmlpost.Hash() != dbpost.Hash() if updateNeeded { var UpdatedComments []*post.Comment var found bool if DebugLevel > 2 { fmt.Printf("**** UpdatedComments len %d\n", len(UpdatedComments)) } for _, h := range htmlpost.Comments { found = false htmlHash := h.Hash() for _, d := range dbpost.Comments { if DebugLevel > 2 { fmt.Printf("**** COMPARE\n") fmt.Printf("**** **** d.Hash():%d htmlHash %d\n", d.Hash(), htmlHash) fmt.Printf("**** **** d.Date '%s' h.Date '%s'\n", d.GetCommentDate().String(), h.GetCommentDate().String()) fmt.Printf("**** COMPARE END\n") } if d.Hash() == htmlHash { // post with identical content has been found - do not store this comment found = true if DebugLevel > 2 { fmt.Printf("**** ***************** MATCH d.Hash() == htmlHash %d\n", d.Hash()) } break } if h.WebCommentId == d.WebCommentId { // external unique comment id found - this comment is already stored // but the comment content has been changed - update needed if DebugLevel > 3 { fmt.Printf("**** COMPARE h.WebCommentId\n") fmt.Printf("**** **** h '%s' d '%s'\n", h.WebCommentId, d.WebCommentId) fmt.Printf("**** COMPARE h.WebCommentId END\n") } h.Id = d.Id h.PostId = d.PostId break } } if !found { UpdatedComments = append(UpdatedComments, h) if DebugLevel > 2 { fmt.Printf("**** UpdatedComments len %d\n", len(UpdatedComments)) fmt.Printf("**** **** append(UpdatedComments, h) %s\n", h.String("APP: ")) } } } fmt.Printf("**** htmlpost.Comments len %d\n", len(htmlpost.Comments)) fmt.Printf("**** UpdatedComments len %d\n", len(UpdatedComments)) dbpost.Comments = make([]*post.Comment, len(UpdatedComments), len(UpdatedComments)) fmt.Printf("**** dbpost.Comments1 len %d\n", len(dbpost.Comments)) copy(dbpost.Comments, UpdatedComments) fmt.Printf("**** dbpost.Comments2 len %d\n", len(dbpost.Comments)) } if (DebugLevel > 3) && updateNeeded { // Print out the update info fmt.Println("----------- UPDATE NEEDED -----------------") for i := range htmlpost.Comments { fmt.Println(htmlpost.Comments[i].String("UPDATE NEEDED HTML: ")) if i < len(dbpost.Comments) { fmt.Println(dbpost.Comments[i].String("UPDATE NEEDED DB: ")) } } //fmt.Println(htmlpost.String("UPDATE NEEDED HTML: ")) //fmt.Println(dbpost.String("UPDATE NEEDED DB: ")) fmt.Println("----------- UPDATE NEEDED END -------------------") } return }
func ParseHtmlComments(p *post.Post) (err error) { if p.WebPostId == "" { return errors.New(fmt.Sprintf("p.WebPostId is empty in post '%s'", p.String("PC: "))) } // Get comments from hackernews geturl := fmt.Sprintf("http://news.ycombinator.com/item?id=%s", p.WebPostId) // DEBUG //geturl := fmt.Sprintf("https://news.ycombinator.com/item?id=9751858") if DebugLevel > 2 { fmt.Printf("START GET COMMENTS FROM '%s'\n", geturl) } body, err := GetHtmlBody(geturl) if err != nil { return errors.New("GetHtmlBody: " + err.Error()) } // Create a qoquery document to parse from an io.Reader doc, err := goquery.NewDocumentFromReader(body) if err != nil { return errors.New("Failed to parse HTML: " + err.Error()) } // Find hackernews comments = elements with class "athing" thing := doc.Find(".athing") for iThing := range thing.Nodes { // use `singlecomment` as a selection of one single post singlecomment := thing.Eq(iThing) comment := post.NewComment() //p.Comments = append(p.Comments, &comment) comheads := singlecomment.Find(".comhead a") for i := range comheads.Nodes { comhead := comheads.Eq(i) t, _ := comhead.Html() s, exists := comhead.Attr("href") if exists { if strings.HasPrefix(s, "user?id") { comment.User = t continue } if strings.HasPrefix(s, "item?id") { if strings.Contains(t, "ago") { var commentDate time.Time commentDate, err = GetDateFromCreatedAgo(t) if err != nil { comment.Err = errors.New(fmt.Sprintf("Failed to convert to date: %s: %s\n", t, err.Error())) err = nil continue } comment.CommentDate = commentDate if len(strings.Split(s, "=")) > 1 { comment.WebCommentId = strings.Split(s, "=")[1] } //comment.Err = err } } } comments := singlecomment.Find("span.comment") removeReplySelection := comments.Find("span div.reply") removeReplySelection.Remove() var sep string for iComment, _ := range comments.Nodes { s := comments.Eq(iComment) h, _ := s.Html() if !utf8.ValidString(s.Text()) { comment.Err = errors.New(fmt.Sprintf("Ignoring invalid UTF-8: '%s'", s.Text())) break } h, err = HtmlToMarkdown(h) if err != nil { comment.Err = errors.New(fmt.Sprintf("Ignoring markdownifier: '%s'", err.Error())) break } if h != "" { comment.Body = comment.Body + sep + h } sep = "\n" } //fmt.Printf("POST %s BODY = %s\n", p.WebPostId, comment.Body) if comment.Err == nil && len(comment.WebCommentId) > 0 && len(comment.Body) > 0 { p.Comments = append(p.Comments, &comment) } else { p.CommentParseErrors = append(p.CommentParseErrors, &comment) } } } if DebugLevel > 0 { fmt.Printf("GET COMMENTS FROM '%s' yielded %d comments\n", geturl, len(p.Comments)) } return err }