/
getit.go
95 lines (90 loc) · 1.99 KB
/
getit.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
// Fetching path schedules at web scale.
// I'm guessing a shell script doing the same job will have fewer lines of code.
package main
import (
"os"
"fmt"
"io"
"log"
"net/http"
"strings"
"path"
"code.google.com/p/go.net/html"
)
func fetch(url string) {
res, err := http.Get(url)
if err != nil {
log.Fatal(err)
}
if err != nil {
log.Fatal(err)
}
// http://stackoverflow.com/questions/1821811/how-to-read-write-from-to-file
fo, err := os.Create(path.Base(url))
if err != nil {
panic(err)
}
defer func() {
if err := fo.Close(); err != nil {
panic(err)
}
}()
buf := make([]byte, 65536)
for {
n, err := res.Body.Read(buf)
if err != nil && err != io.EOF {
panic(err)
}
if n == 0 {
break
}
if _, err := fo.Write(buf[:n]); err != nil {
panic(err)
}
}
res.Body.Close()
}
func main() {
start := "http://www.panynj.gov/path/full-schedules.html"
res, err := http.Get(start)
if err != nil {
log.Fatal(err)
}
z := html.NewTokenizer(res.Body)
for {
tt := z.Next()
if tt == html.ErrorToken {
break
}
if tt == html.StartTagToken {
tn, _ := z.TagName()
if len(tn) == 1 && tn[0] == 'a' {
for {
key, value, more := z.TagAttr()
// http://stackoverflow.com/questions/14230145/what-is-the-best-way-to-convert-byte-array-to-string
if string(key) == "href" {
v := string(value)
if strings.HasPrefix(v, "schedules/") {
fuckedurl := path.Join(path.Dir(start), v)
// yep, hack it
// thx go for making me rename the variable
url := strings.Replace(fuckedurl, ":/", "://", 1)
fmt.Printf("%s\n", url)
fetch(url)
}
}
if !more {
break
}
}
}
// ...
//return ...
}
// Process the current token.
}
res.Body.Close()
if err != nil {
log.Fatal(err)
}
}