func fetchList(url, prefix string) ([]string, error) { res, err := http.Get(url) if err != nil { return nil, err } defer res.Body.Close() if res.StatusCode != 200 { return nil, fmt.Errorf("failed to fetch %s - %s", url, res.Status) } list := []string{} selector := cascadia.MustCompile("a") webdevdata.ProcessMatchingTagsReader(res.Body, "table tbody tr > td:first-of-type", func(node *html.Node) { pkg := "" link := selector.MatchFirst(node) if link != nil { pkg = webdevdata.GetAttr("href", link.Attr) } else if node.FirstChild != nil && node.FirstChild.Type == html.TextNode { pkg = node.FirstChild.Data } else if node.FirstChild != nil && node.FirstChild.Data == "b" { return } if pkg == "" { log.Fatal("markup from godoc.org changed") } p := strings.TrimLeft(pkg, "/") if !strings.HasPrefix(p, prefix) { return } list = append(list, p) }) return list, nil }
func process(file string, selector string, attrList []string, csv *csv.Writer) { webdevdata.ProcessMatchingTags(file, selector, func(node *html.Node) { content := []string{file, node.Data} for _, attr := range attrList { if attr != "" { content = append(content, webdevdata.GetAttr(attr, node.Attr)) } } csv.Write(content) }) }
func main() { flag.Parse() file := flag.Arg(0) csv := csv.NewWriter(os.Stdout) selector := "meta[name]" // All meta tags with name attribute webdevdata.ProcessMatchingTags(file, selector, func(node *html.Node) { name := webdevdata.GetAttr("name", node.Attr) csv.Write([]string{file, name}) }) csv.Flush() }
func main() { flag.Parse() file := flag.Arg(0) csv := csv.NewWriter(os.Stdout) selector := "html[manifest]" // all html tags with manifest attribute webdevdata.ProcessMatchingTags(file, selector, func(node *html.Node) { manifest := webdevdata.GetAttr("manifest", node.Attr) if manifest != "" { csv.Write([]string{file, manifest}) } }) csv.Flush() }