func TestLinks(filter string) { linkCount := 0 for link, details := range data.AllLinks { linkCount++ status := testUrl(link, filter) data.AllLinks[link].Response = status statusCount[status]++ if status == 200 || status == 900 || status == 888 || status == 299 || status == 2900 || status == 666 { // These are not counted to the error count data.VerboseLog("\t\t(%d) %d links to %s\n", status, details.Count, link) } else { data.ErrorLog("(%d) %d links to (%s)\n", status, details.Count, link) for i, file := range data.AllLinks[link].LinksFrom { fmt.Printf("\t\t\t link %s on page %s\n", data.AllLinks[link].ActualLink[i], file) } } } for status, count := range statusCount { fmt.Printf("\t%d: %d times (%s)\n", status, count, data.ResponseCode[status]) } fmt.Printf("\tTotal Links: %d\n", linkCount) }
func (renderer *TestRenderer) Link(out *bytes.Buffer, linkB []byte, title []byte, content []byte) { actualLink := string(linkB) data.VerboseLog("Link [%s](%s) in file %s\n", string(content), actualLink, renderer.LinkFrom) var link string base, err := url.Parse(actualLink) if err == nil && base.Scheme == "" { if strings.HasPrefix(actualLink, "#") { link = actualLink } else if strings.HasPrefix(actualLink, "/") { link = strings.TrimLeft(actualLink, "/") } else { // TODO: fix for relative paths. // TODO: need to check the from links are all the same dir too link = filepath.Clean(filepath.FromSlash(actualLink)) if strings.IndexRune(link, os.PathSeparator) == 0 { // filepath.IsAbs fails to me. link = link[1:] } else { // TODO: need to check all the LinksFrom link = filepath.Join(filepath.Dir(renderer.LinkFrom), link) } data.VerboseLog("---- converted %s (on page %s, in %s) into %s\n", actualLink, renderer.LinkFrom, filepath.Dir(renderer.LinkFrom), link) } } else { link = actualLink } _, ok := data.AllLinks[link] if !ok { data.AllLinks[link] = new(data.LinkDetails) data.AllLinks[link].LinksFrom = make(map[int]string) data.AllLinks[link].ActualLink = make(map[int]string) } data.AllLinks[link].LinksFrom[data.AllLinks[link].Count] = renderer.LinkFrom data.AllLinks[link].ActualLink[data.AllLinks[link].Count] = actualLink data.AllLinks[link].Count++ }
func CheckMarkdownLinks(reader *linereader.LineReader, file string) (err error) { // mmark.HtmlRendererWithParameters(htmlFlags, "", "", renderParameters) htmlFlags := 0 htmlFlags |= mmark.HTML_FOOTNOTE_RETURN_LINKS renderParameters := mmark.HtmlRendererParameters{ // FootnoteAnchorPrefix: viper.GetString("FootnoteAnchorPrefix"), // FootnoteReturnLinkContents: viper.GetString("FootnoteReturnLinkContents"), } renderer := &TestRenderer{ LinkFrom: file, Renderer: mmark.HtmlRendererWithParameters(htmlFlags, "", "", renderParameters), } extensions := 0 | //mmark.EXTENSION_NO_INTRA_EMPHASIS | mmark.EXTENSION_TABLES | mmark.EXTENSION_FENCED_CODE | mmark.EXTENSION_AUTOLINK | //mmark.EXTENSION_STRIKETHROUGH | mmark.EXTENSION_SPACE_HEADERS | mmark.EXTENSION_FOOTNOTES | mmark.EXTENSION_HEADER_IDS | mmark.EXTENSION_AUTO_HEADER_IDS //| // mmark.EXTENSION_DEFINITION_LISTS //var output []byte buf := make([]byte, 1024*1024) length, err := reader.Read(buf) if length == 0 || err != nil { return err } data.VerboseLog("RUNNING Markdown on %s length(%d) - not counting frontmater\n", file, length) _ = mmark.Parse(buf, renderer, extensions) data.VerboseLog("FINISHED Markdown on %s\n", file) return nil }
func main() { flag.Parse() args := flag.Args() if len(args) < 1 { printUsage() os.Exit(-1) } dir := args[0] filter := "" if len(args) >= 2 { filter = args[1] } data.AllFiles = make(map[string]*data.FileDetails) fmt.Println("Finding files") err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { if err != nil { data.ErrorLog("%s\n", err) return err } data.VerboseLog("FOUND: %s\n", path) if info.IsDir() { return nil } file, err := filepath.Rel(dir, path) if err != nil { data.ErrorLog("%s\n", err) return err } // verboseLog("\t walked to %s\n", file) data.AddFile(file, path) return nil }) if err != nil { data.ErrorLog("%s\n", err) os.Exit(-1) } count := 0 for file, details := range data.AllFiles { if !strings.HasPrefix(file, filter) { data.VerboseLog("FILTERED: %s\n", file) continue } if !strings.HasSuffix(file, ".md") { data.VerboseLog("SKIPPING: %s\n", file) continue } // fmt.Printf("opening: %s\n", file) count++ if count%100 == 0 { fmt.Printf("\topened %d files so far\n", count) } reader, err := linereader.OpenReader(details.FullPath) if err != nil { data.ErrorLog("%s\n", err) data.AllFiles[file].FormatErrorCount++ } err = checkers.CheckHugoFrontmatter(reader, file) if err != nil { data.ErrorLog("(%s) frontmatter: %s\n", file, err) } if draft, ok := data.AllFiles[file].Meta["draft"]; ok || draft == "true" { data.VerboseLog("Draft=%s: SKIPPING %s link check.\n", draft, file) } else { //fmt.Printf("Draft=%s: %s link check.\n", draft, file) err = checkers.CheckMarkdownLinks(reader, file) if err != nil { // this only errors if there is a fatal issue data.ErrorLog("(%s) links: %s\n", file, err) data.AllFiles[file].FormatErrorCount++ } } reader.Close() } fmt.Printf("Starting to test links (Filter = %s)\n", filter) checkers.TestLinks(filter) // TODO (JIRA: DOCS-181): Title, unique across products if not, file should include an {identifier} summaryFileName := "markdownlint.summary.txt" f, err := os.Create(summaryFileName) if err == nil { fmt.Printf("Also writing summary to %s :\n\n", summaryFileName) defer f.Close() } if filter != "" { Printf(f, "# Filtered (%s) Summary:\n\n", filter) } else { Printf(f, "# Summary:\n\n") } errorCount, errorString := checkers.FrontSummary(filter) Printf(f, errorString) count, errorString = checkers.LinkSummary(filter) errorCount += count //Printf(f, errorString) Printf(f, "\n\tFound: %d files\n", len(data.AllFiles)) Printf(f, "\tFound: %d errors\n", errorCount) // return the number of 404's to show that there are things to be fixed os.Exit(errorCount) }
func doCheckHugoFrontmatter(reader *linereader.LineReader, file string) (err error) { foundComment := false for err == nil { byteBuff, _, err := reader.ReadLine() if err != nil { return err } buff := string(byteBuff) if buff == "+++" { data.VerboseLog("Found TOML start") break } if strings.HasPrefix(buff, "<!--") { if !strings.HasSuffix(buff, "-->") { data.VerboseLog("found comment start") foundComment = true continue } } //data.VerboseLog("ReadLine: %s, %v, %s\n", string(byteBuff), isPrefix, err) for i := 0; i < len(buff); { runeValue, width := utf8.DecodeRuneInString(buff[i:]) if unicode.IsSpace(runeValue) { i += width } else { data.VerboseLog("Unexpected non-whitespace char: %s", buff) return fmt.Errorf("Unexpected non-whitespace char: %s", buff) } } } data.AllFiles[file].Meta = make(map[string]string) // read lines until `+++` ending for err == nil { byteBuff, _, err := reader.ReadLine() if err != nil { return err } buff := string(byteBuff) if buff == "+++" { data.VerboseLog("Found TOML end") break } data.VerboseLog("\t%s\n", buff) meta := strings.SplitN(buff, "=", 2) data.VerboseLog("\t%d\t%v\n", len(meta), meta) if len(meta) == 2 { data.VerboseLog("\t\t%s: %s\n", meta[0], meta[1]) data.AllFiles[file].Meta[strings.Trim(meta[0], " ")] = strings.Trim(meta[1], " ") } } // remove trailing close comment if foundComment { byteBuff, _, err := reader.ReadLine() if err != nil { return err } buff := string(byteBuff) data.VerboseLog("is this a comment? (%s)\n", buff) if strings.HasSuffix(buff, "-->") { if !strings.HasPrefix(buff, "<!--") { data.VerboseLog("found comment end\n") foundComment = false } } if foundComment { reader.UnreadLine(buff) return fmt.Errorf("Did not find expected close metadata comment") } } // ensure that the minimum metadata keys are set // ignore draft files if draft, ok := data.AllFiles[file].Meta["draft"]; !ok || draft != "true" { if _, ok := data.AllFiles[file].Meta["title"]; !ok { return fmt.Errorf("Did not find `title` metadata element") } if _, ok := data.AllFiles[file].Meta["description"]; !ok { return fmt.Errorf("Did not find `description` metadata element") } if _, ok := data.AllFiles[file].Meta["keywords"]; !ok { return fmt.Errorf("Did not find `keywords` metadata element") } } return nil }
func testUrl(link, filter string, checkExternalLinks bool) int { if _, ok := skipUrls[link]; ok { fmt.Printf("Skipping: %s\n", link) return 299 } base, err := url.Parse(link) if err != nil { fmt.Println("ERROR: failed to Parse \"" + link + "\"") return 999 } switch base.Scheme { case "": // Internal markdown link // otherwuse, look in data.AllFiles if strings.HasPrefix(link, "#") { // internal link to an anchor //TODO: need to look for anchor return 200 } else { path := strings.Split(link, "#") relUrl := path[0] if !strings.HasPrefix(relUrl, filter) { //fmt.Printf("Filtered(%s): %s\n", filter, link) return 299 } // TODO: need to test for path[1] anchor if _, ok := data.AllFiles[relUrl]; ok { return 2900 } if _, ok := data.AllFiles[relUrl+".md"]; ok { return 290 } fmt.Printf("\t\tERROR: failed to find %s or %s.md\n", relUrl, relUrl) } ok := 777 return ok case "mailto", "irc": err = fmt.Errorf("%s", base.Scheme) return 900 } // http / https if base.Host == "docs.docker.com" { err = fmt.Errorf("avoid linking directly to %s", base.Host) return 666 } if !checkExternalLinks { data.VerboseLog("External link checking disabled: %s\n", link) return 299 } httpClient := &http.Client{ Timeout: 10 * time.Second, } resp, err := httpClient.Get(link) if err != nil { fmt.Println("Warning: Failed to crawl \"" + link + "\" " + err.Error()) return 888 } loc, err := resp.Location() if err == nil && link != loc.String() { fmt.Printf("\t crawled \"%s\"", link) fmt.Printf("\t\t to \"%s\"", loc) } b := resp.Body defer b.Close() // close Body when the function returns return resp.StatusCode }