Example #1
0
func (markdown Markdown) Extract(creativeWork *schema.CreativeWork, path string) error {
	markdownContent, err := ioutil.ReadFile(path)
	if nil != err {
		return err
	}

	html := blackfriday.MarkdownCommon(markdownContent)

	doc, err := goquery.NewDocumentFromReader(bytes.NewReader(html))
	if nil != err {
		return err
	}

	doc.Find("a[href]").Each(func(i int, s *goquery.Selection) {
		link, _ := s.Attr("href")
		url, _ := url.Parse(link)

		if !url.IsAbs() && strings.HasSuffix(link, ".md") {
			s.SetAttr("href", fmt.Sprint(link[:len(link)-3], ".jsonld"))
		}
	})

	creativeWork.Name = doc.Find("h1").Text()
	creativeWork.Text, err = doc.Find("body").Html()
	if nil != err {
		return err
	}

	return nil
}
Example #2
0
func (markdown Markdown) Extract(creativeWork *schema.CreativeWork, path string) error {
	markdownContent, err := ioutil.ReadFile(path)
	if nil != err {
		return err
	}

	unsafe := blackfriday.MarkdownCommon(markdownContent)
	p := bluemonday.UGCPolicy()
	p.RequireNoFollowOnLinks(false)
	p.AllowAttrs("class").Matching(regexp.MustCompile("^language-[a-zA-Z0-9]+$")).OnElements("code")
	html := p.SanitizeBytes(unsafe)

	doc, err := goquery.NewDocumentFromReader(bytes.NewReader(html))
	if nil != err {
		return err
	}

	doc.Find("a[href]").Each(func(i int, s *goquery.Selection) {
		link, _ := s.Attr("href")
		url, _ := url.Parse(link)

		if !url.IsAbs() {
			s.SetAttr("href", strings.Replace(link, ".md", ".jsonld", 1))
		}
	})

	creativeWork.Name = doc.Find("h1").Text()
	creativeWork.Text, err = doc.Find("body").Html()
	if nil != err {
		return err
	}

	return nil
}