Exemple #1
0
// node returns a string representation of the selection.
func node(i int, s *goquery.Selection) string {
	switch node := s.Get(0); {
	case node.Data == "h1":
		return fmt.Sprintf(" \033[%dm# %s\033[0m\n\n", blue, text(s))
	case node.Data == "h2":
		return fmt.Sprintf(" \033[%dm## %s\033[0m\n\n", blue, text(s))
	case node.Data == "h3":
		return fmt.Sprintf(" \033[%dm### %s\033[0m\n\n", blue, text(s))
	case node.Data == "p":
		return fmt.Sprintf("\033[%dm%s\033[0m\n\n", none, indent(text(s), 1))
	case node.Data == "pre" || s.HasClass("highlight"):
		return fmt.Sprintf("\033[1m%s\033[0m\n\n", indent(text(s), 2))
	case node.Data == "a":
		return fmt.Sprintf("%s (%s) ", s.Text(), s.AttrOr("href", "missing link"))
	case node.Data == "li":
		return fmt.Sprintf("  • %s\n", contents(s))
	case node.Data == "ul":
		return fmt.Sprintf("%s\n", nodes(s))
	case node.Data == "code":
		return fmt.Sprintf("\033[1m%s\033[0m ", s.Text())
	case node.Type == html.TextNode:
		return strings.TrimSpace(node.Data)
	default:
		return ""
	}
}
Exemple #2
0
// 处理 Reddit 中的一条资源
func dealRedditOneResource(contentSelection *goquery.Selection) error {
	aSelection := contentSelection.Find(".title a.title")

	title := aSelection.Text()
	if title == "" {
		return errors.New("title is empty")
	}

	resourceUrl, ok := aSelection.Attr("href")
	if !ok || resourceUrl == "" {
		return errors.New("resource url is empty")
	}

	isReddit := false

	resource := model.NewResource()
	// Reddit 自身的内容
	if contentSelection.HasClass("self") {
		isReddit = true
		resourceUrl = Reddit + resourceUrl
	}

	err := resource.Where("url=?", resourceUrl).Find("id")
	// 已经存在
	if resource.Id != 0 {
		// 如果是 reddit 本身的,可以更新评论信息
		if !isReddit {
			return errors.New("url" + resourceUrl + "has exists!")
		}
	}

	if isReddit {

		resource.Form = model.ContentForm

		var doc *goquery.Document

		if doc, err = goquery.NewDocument(resourceUrl); err != nil {
			return errors.New("goquery reddit.com/r/golang self newdocument error:" + err.Error())
		}

		content, err := doc.Find("#siteTable .usertext .md").Html()
		if err != nil {
			return err
		}

		doc.Find(".commentarea .comment .usertext .md").Each(func(i int, contentSel *goquery.Selection) {
			if i == 0 {
				content += `<hr/>**评论:**<br/><br/>`
			}

			comment, err := contentSel.Html()
			if err != nil {
				return
			}

			comment = strings.TrimSpace(comment)
			comment = resourceRe.ReplaceAllLiteralString(comment, "\n")

			author := contentSel.ParentsFiltered(".usertext").Prev().Find(".author").Text()
			content += author + ": <pre>" + comment + "</pre>"
		})

		if strings.TrimSpace(content) == "" {
			return errors.New("goquery reddit.com/r/golang self newdocument(" + resourceUrl + ") error: content is empty")
		}

		resource.Content = content

		// reddit 本身的,当做其他资源
		resource.Catid = 4
	} else {
		resource.Form = model.LinkForm

		// Github,是开源项目
		if contentSelection.Find(".title .domain a").Text() == "github.com" {
			resource.Catid = 2
		} else {
			resource.Catid = 1
		}
	}

	resource.Title = title
	resource.Url = resourceUrl
	resource.Uid = PresetUids[rand.Intn(4)]

	ctime := util.TimeNow()
	datetime, ok := contentSelection.Find(".tagline time").Attr("datetime")
	if ok {
		dtime, err := time.ParseInLocation(time.RFC3339, datetime, time.UTC)
		if err != nil {
			logger.Errorln("parse ctime error:", err)
		} else {
			ctime = dtime.Local().Format("2006-01-02 15:04:05")
		}
	}
	resource.Ctime = ctime

	if resource.Id == 0 {
		var id int64
		id, err = resource.Insert()

		if err != nil {
			return errors.New("insert into Resource error:" + err.Error())
		}

		// 存扩展信息
		resourceEx := model.NewResourceEx()
		resourceEx.Id = int(id)
		if _, err = resourceEx.Insert(); err != nil {
			return errors.New("insert into ResourceEx error:" + err.Error())
		}
	} else {
		if err = resource.Persist(resource); err != nil {
			return errors.New("persist resource:" + strconv.Itoa(resource.Id) + " error:" + err.Error())
		}
	}

	return nil
}