forked from c9s/zh-stroke-data
/
fetch.go
93 lines (78 loc) · 1.68 KB
/
fetch.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
package main
import "fmt"
import "net/http"
import "log"
import "io/ioutil"
import "os"
import "path"
import "strings"
import "runtime"
import "time"
const baseDir = "data"
const xmlBaseUrl = "http://stroke-order.learningweb.moe.edu.tw/provideStrokeInfo.do?big5="
const imageBaseUrl = "http://stroke-order.learningweb.moe.edu.tw/showWordImage.do?big5="
func fetchUrl(url string) (*[]byte, error) {
res, err := http.Get(url)
defer res.Body.Close()
if err != nil {
return nil, err
}
body, err := ioutil.ReadAll(res.Body)
if err != nil {
return nil, err
}
return &body, nil
}
func fetchStrokeXml(code int) {
hex := fmt.Sprintf("%x",code)
url := xmlBaseUrl + hex
filename := path.Join( baseDir, hex + ".xml" )
fi, err := os.Stat(filename)
if fi != nil {
fmt.Print("-")
return
}
xmlContentP, err := fetchUrl(url)
if err != nil {
log.Println(err)
return
}
xmlContent := *xmlContentP
if ! strings.HasPrefix(string(xmlContent), "<?xml") {
fmt.Print("x")
// log.Printf("ERROR: %s returns non-XML response",url)
return
}
// filename string, data []byte, perm os.FileMode
fmt.Print(".")
ioutil.WriteFile(filename, xmlContent, 0666)
time.Sleep(500 * time.Millisecond)
}
func main() {
in := make(chan int, 10)
done := make(chan bool)
worker := func(in chan int, done chan bool) {
for {
c := <-in
if c == 0 {
break
}
fetchStrokeXml(c)
}
done <- true
}
for i := 0 ; i < runtime.NumCPU() ; i++ {
go worker(in, done)
}
// 0xA440-0xC67E
// 0xC940-0xF9D5
os.Mkdir(baseDir, 0777)
for code := 0xa440 ; code < 0xc67e ; code++ {
in <- code
}
for i := 0 ; i < runtime.NumCPU() ; i++ {
in <- 0
<-done
fmt.Printf("goroutine %d finished\n", i)
}
}