/
main.go
57 lines (50 loc) · 981 Bytes
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
package main
import (
"encoding/csv"
"log"
"net/http"
"os"
"regexp"
"golang.org/x/net/html"
)
var (
repoRegexp = regexp.MustCompile(`(github.com|bitbucket.org)/(\w+)/(\w+)`)
)
func process(node *html.Node, w *csv.Writer) error {
for child := node.FirstChild; child != nil; child = child.NextSibling {
if child.Type == html.ElementNode && child.Data == "a" {
for _, a := range child.Attr {
if a.Key == "href" {
data := repoRegexp.FindStringSubmatch(a.Val)
if len(data) == 4 {
err := w.Write(data)
return err
}
break
}
}
}
process(child, w)
}
return nil
}
func main() {
resp, err := http.Get("http://godoc.org/-/index")
if err != nil {
log.Fatal(err)
}
defer resp.Body.Close()
doc, err := html.Parse(resp.Body)
if err != nil {
log.Fatal(err)
}
w := csv.NewWriter(os.Stdout)
err = process(doc, w)
if err != nil {
log.Fatal(err)
}
if err := w.Error(); err != nil {
log.Fatal(err)
}
w.Flush()
}