func (markdown Markdown) Extract(creativeWork *schema.CreativeWork, path string) error { markdownContent, err := ioutil.ReadFile(path) if nil != err { return err } html := blackfriday.MarkdownCommon(markdownContent) doc, err := goquery.NewDocumentFromReader(bytes.NewReader(html)) if nil != err { return err } doc.Find("a[href]").Each(func(i int, s *goquery.Selection) { link, _ := s.Attr("href") url, _ := url.Parse(link) if !url.IsAbs() && strings.HasSuffix(link, ".md") { s.SetAttr("href", fmt.Sprint(link[:len(link)-3], ".jsonld")) } }) creativeWork.Name = doc.Find("h1").Text() creativeWork.Text, err = doc.Find("body").Html() if nil != err { return err } return nil }
func (markdown Markdown) Extract(creativeWork *schema.CreativeWork, path string) error { markdownContent, err := ioutil.ReadFile(path) if nil != err { return err } unsafe := blackfriday.MarkdownCommon(markdownContent) p := bluemonday.UGCPolicy() p.RequireNoFollowOnLinks(false) p.AllowAttrs("class").Matching(regexp.MustCompile("^language-[a-zA-Z0-9]+$")).OnElements("code") html := p.SanitizeBytes(unsafe) doc, err := goquery.NewDocumentFromReader(bytes.NewReader(html)) if nil != err { return err } doc.Find("a[href]").Each(func(i int, s *goquery.Selection) { link, _ := s.Attr("href") url, _ := url.Parse(link) if !url.IsAbs() { s.SetAttr("href", strings.Replace(link, ".md", ".jsonld", 1)) } }) creativeWork.Name = doc.Find("h1").Text() creativeWork.Text, err = doc.Find("body").Html() if nil != err { return err } return nil }
func (git Git) Extract(creativeWork *schema.CreativeWork, path string) error { if "" == gitPath { return nil } cmd := exec.Command(gitPath, "log", "--format=%an;%ae;%aI", path) stdout, err := cmd.StdoutPipe() if nil != err { return err } if err := cmd.Start(); err != nil { return err } scanner := bufio.NewScanner(stdout) for scanner.Scan() { parts := strings.Split(strings.TrimSpace(scanner.Text()), ";") author := schema.NewPerson(parts[0], parts[1]) creativeWork.Author = append([]schema.Person{*author}, creativeWork.Author...) creativeWork.DateCreated = parts[2] if "" == creativeWork.DateModified { creativeWork.DateModified = parts[2] } } if err := scanner.Err(); err != nil { return err } if err := cmd.Wait(); err != nil { log.Fatalln("You are not in a git repository.") return err } return nil }