// node returns a string representation of the selection. func node(i int, s *goquery.Selection) string { switch node := s.Get(0); { case node.Data == "h1": return fmt.Sprintf(" \033[%dm# %s\033[0m\n\n", blue, text(s)) case node.Data == "h2": return fmt.Sprintf(" \033[%dm## %s\033[0m\n\n", blue, text(s)) case node.Data == "h3": return fmt.Sprintf(" \033[%dm### %s\033[0m\n\n", blue, text(s)) case node.Data == "p": return fmt.Sprintf("\033[%dm%s\033[0m\n\n", none, indent(text(s), 1)) case node.Data == "pre" || s.HasClass("highlight"): return fmt.Sprintf("\033[1m%s\033[0m\n\n", indent(text(s), 2)) case node.Data == "a": return fmt.Sprintf("%s (%s) ", s.Text(), s.AttrOr("href", "missing link")) case node.Data == "li": return fmt.Sprintf(" • %s\n", contents(s)) case node.Data == "ul": return fmt.Sprintf("%s\n", nodes(s)) case node.Data == "code": return fmt.Sprintf("\033[1m%s\033[0m ", s.Text()) case node.Type == html.TextNode: return strings.TrimSpace(node.Data) default: return "" } }
// 处理 Reddit 中的一条资源 func dealRedditOneResource(contentSelection *goquery.Selection) error { aSelection := contentSelection.Find(".title a.title") title := aSelection.Text() if title == "" { return errors.New("title is empty") } resourceUrl, ok := aSelection.Attr("href") if !ok || resourceUrl == "" { return errors.New("resource url is empty") } isReddit := false resource := model.NewResource() // Reddit 自身的内容 if contentSelection.HasClass("self") { isReddit = true resourceUrl = Reddit + resourceUrl } err := resource.Where("url=?", resourceUrl).Find("id") // 已经存在 if resource.Id != 0 { // 如果是 reddit 本身的,可以更新评论信息 if !isReddit { return errors.New("url" + resourceUrl + "has exists!") } } if isReddit { resource.Form = model.ContentForm var doc *goquery.Document if doc, err = goquery.NewDocument(resourceUrl); err != nil { return errors.New("goquery reddit.com/r/golang self newdocument error:" + err.Error()) } content, err := doc.Find("#siteTable .usertext .md").Html() if err != nil { return err } doc.Find(".commentarea .comment .usertext .md").Each(func(i int, contentSel *goquery.Selection) { if i == 0 { content += `<hr/>**评论:**<br/><br/>` } comment, err := contentSel.Html() if err != nil { return } comment = strings.TrimSpace(comment) comment = resourceRe.ReplaceAllLiteralString(comment, "\n") author := contentSel.ParentsFiltered(".usertext").Prev().Find(".author").Text() content += author + ": <pre>" + comment + "</pre>" }) if strings.TrimSpace(content) == "" { return errors.New("goquery reddit.com/r/golang self newdocument(" + resourceUrl + ") error: content is empty") } resource.Content = content // reddit 本身的,当做其他资源 resource.Catid = 4 } else { resource.Form = model.LinkForm // Github,是开源项目 if contentSelection.Find(".title .domain a").Text() == "github.com" { resource.Catid = 2 } else { resource.Catid = 1 } } resource.Title = title resource.Url = resourceUrl resource.Uid = PresetUids[rand.Intn(4)] ctime := util.TimeNow() datetime, ok := contentSelection.Find(".tagline time").Attr("datetime") if ok { dtime, err := time.ParseInLocation(time.RFC3339, datetime, time.UTC) if err != nil { logger.Errorln("parse ctime error:", err) } else { ctime = dtime.Local().Format("2006-01-02 15:04:05") } } resource.Ctime = ctime if resource.Id == 0 { var id int64 id, err = resource.Insert() if err != nil { return errors.New("insert into Resource error:" + err.Error()) } // 存扩展信息 resourceEx := model.NewResourceEx() resourceEx.Id = int(id) if _, err = resourceEx.Insert(); err != nil { return errors.New("insert into ResourceEx error:" + err.Error()) } } else { if err = resource.Persist(resource); err != nil { return errors.New("persist resource:" + strconv.Itoa(resource.Id) + " error:" + err.Error()) } } return nil }