Esempio n. 1
0
func (markdown Markdown) Extract(creativeWork *schema.CreativeWork, path string) error {
	markdownContent, err := ioutil.ReadFile(path)
	if nil != err {
		return err
	}

	html := blackfriday.MarkdownCommon(markdownContent)

	doc, err := goquery.NewDocumentFromReader(bytes.NewReader(html))
	if nil != err {
		return err
	}

	doc.Find("a[href]").Each(func(i int, s *goquery.Selection) {
		link, _ := s.Attr("href")
		url, _ := url.Parse(link)

		if !url.IsAbs() && strings.HasSuffix(link, ".md") {
			s.SetAttr("href", fmt.Sprint(link[:len(link)-3], ".jsonld"))
		}
	})

	creativeWork.Name = doc.Find("h1").Text()
	creativeWork.Text, err = doc.Find("body").Html()
	if nil != err {
		return err
	}

	return nil
}
Esempio n. 2
0
func (markdown Markdown) Extract(creativeWork *schema.CreativeWork, path string) error {
	markdownContent, err := ioutil.ReadFile(path)
	if nil != err {
		return err
	}

	unsafe := blackfriday.MarkdownCommon(markdownContent)
	p := bluemonday.UGCPolicy()
	p.RequireNoFollowOnLinks(false)
	p.AllowAttrs("class").Matching(regexp.MustCompile("^language-[a-zA-Z0-9]+$")).OnElements("code")
	html := p.SanitizeBytes(unsafe)

	doc, err := goquery.NewDocumentFromReader(bytes.NewReader(html))
	if nil != err {
		return err
	}

	doc.Find("a[href]").Each(func(i int, s *goquery.Selection) {
		link, _ := s.Attr("href")
		url, _ := url.Parse(link)

		if !url.IsAbs() {
			s.SetAttr("href", strings.Replace(link, ".md", ".jsonld", 1))
		}
	})

	creativeWork.Name = doc.Find("h1").Text()
	creativeWork.Text, err = doc.Find("body").Html()
	if nil != err {
		return err
	}

	return nil
}
Esempio n. 3
0
func (git Git) Extract(creativeWork *schema.CreativeWork, path string) error {
	if "" == gitPath {
		return nil
	}

	cmd := exec.Command(gitPath, "log", "--format=%an;%ae;%aI", path)
	stdout, err := cmd.StdoutPipe()
	if nil != err {
		return err
	}

	if err := cmd.Start(); err != nil {
		return err
	}

	scanner := bufio.NewScanner(stdout)
	for scanner.Scan() {
		parts := strings.Split(strings.TrimSpace(scanner.Text()), ";")

		author := schema.NewPerson(parts[0], parts[1])
		creativeWork.Author = append([]schema.Person{*author}, creativeWork.Author...)

		creativeWork.DateCreated = parts[2]
		if "" == creativeWork.DateModified {
			creativeWork.DateModified = parts[2]
		}
	}

	if err := scanner.Err(); err != nil {
		return err
	}

	if err := cmd.Wait(); err != nil {
		log.Fatalln("You are not in a git repository.")
		return err
	}

	return nil
}