forked from ErosZy/singoriensis
/
downloader.go
119 lines (91 loc) · 2.63 KB
/
downloader.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
package singoriensis
import (
"singoriensis/common"
"singoriensis/interfaces"
"time"
)
var retryMaxCount int
type Downloader struct {
sleepTime time.Duration
requests []*Request
scheduler interfaces.SchedulerInterface
pipeliner interfaces.PipelinerInterface
process interfaces.ProcessInterface
middlewares []interfaces.DownloaderMiddlewareInterface
}
func NewDownloader() *Downloader {
return &Downloader{
sleepTime: 1 * time.Second,
middlewares: make([]interfaces.DownloaderMiddlewareInterface, 0),
}
}
func (self *Downloader) GetScheduler() interfaces.SchedulerInterface {
return self.scheduler
}
func (self *Downloader) SetScheduler(scheduler interfaces.SchedulerInterface) {
self.scheduler = scheduler
}
func (self *Downloader) SetPipeliner(pipeliner interfaces.PipelinerInterface) {
self.pipeliner = pipeliner
}
func (self *Downloader) SetProcess(process interfaces.ProcessInterface) {
self.process = process
}
func (self *Downloader) SetSleepTime(time time.Duration) {
self.sleepTime = time
}
func (self *Downloader) SetRetryMaxCount(count int) {
if count < 0 {
panic("thread retry max num can't be lt 0.")
}
retryMaxCount = count
}
func (self *Downloader) RegisterMiddleware(mw interfaces.DownloaderMiddlewareInterface) {
self.middlewares = append(self.middlewares, mw)
}
func (self *Downloader) CallMiddlewareMethod(name string, params []interface{}) {
common.CallObjMethod(self.middlewares, name, params)
}
func (self *Downloader) Start(threadNum int) {
self.requests = make([]*Request, threadNum)
for i := 0; i < threadNum; i++ {
request := NewRequest()
self.requests[i] = request
self.requests[i].SetDelegate(self)
}
for i := 0; i < threadNum; i++ {
go func(index int, retryMaxCount int) {
var urlStr string
for {
elem := self.scheduler.ShiftElementItem()
if elem != nil {
elemItem := elem.(common.ElementItem)
urlStr = elemItem.UrlStr
req, res, err := self.requests[index].Init(urlStr).Request()
if err != nil {
if elemItem.FaildCount < retryMaxCount {
elemItem.FaildCount += 1
self.scheduler.AddElementItem(elemItem, true)
}
} else {
params := make([]interface{}, 0)
page := common.NewPage(req, res)
self.process.Do(page)
res.Body.Close()
items, elems := page.GetAll()
for _, v := range elems {
self.scheduler.AddElementItem(v, false)
}
for _, v := range items {
params = append(params, v)
}
self.pipeliner.CallMiddlewareMethod("GetItems", params)
}
Threads <- index
} else {
time.Sleep(self.sleepTime)
}
}
}(i, retryMaxCount)
}
}