func NewDatabase(t string, c string) *gorm.DB { if t == "" || c == "" { util.Error("db type and connect string can't be empty") return nil } if database, err := gorm.Open(t, c); err != nil { util.Error("can't connect to database %s\n", err.Error()) return nil } else { return &database } }
func (worker *Worker) Run() { worker.validate() util.Info("'%s' started\n", worker.project) for { task := worker.PopLocal() if task == nil { task = worker.Pop() } if task != nil { worker.retryCount = 0 if taskset, ok := worker.tasksets[task.TaskSet]; !ok { util.Error("can't find taskset %s\n", task.TaskSet) continue } else { data := taskset.Fetch(task) if data != nil { taskset.Parse(task, data) } } } else { worker.retryCount += 1 if worker.retryCount >= worker.retryMax { break } util.Info("sleep 1 second, ( %d | %d )\n", worker.retryCount, worker.retryMax) time.Sleep(time.Second) } } util.Info("'%s' exit\n", worker.project) }
func (worker *Worker) Pop() *common.Task { r, err := util.HttpPostJSON(worker.info.Server.AddrPop(), &common.Worker{worker.info.Server.Token, worker.project}) if err != nil { util.Error("failed to pop task, %s\n", err.Error()) return nil } resp := new(common.CommonResponse) err = util.HttpResponseUnmarshalJSON(resp, r, http.StatusOK) if err != nil { util.Error("failed to pop task, %s\n", err.Error()) return nil } else { if resp.Task != nil { util.Debug(resp.Task.PopLog()) } } return resp.Task }
func (worker *Worker) Push(task *common.Task) *common.Task { task.Token = worker.info.Server.Token task.Project = worker.project r, err := util.HttpPostJSON(worker.info.Server.AddrPush(), task) if err != nil { util.Error("failed to push task, %s\n", err.Error()) return nil } resp := new(common.CommonResponse) err = util.HttpResponseUnmarshalJSON(resp, r, http.StatusOK) if err != nil { util.Error("failed to push task, %s\n", err.Error()) return nil } else { if resp.Task != nil { util.Debug(resp.Task.PushLog()) } } return resp.Task }
func TumblrParser(taskset *nandu.TaskSet, task *common.Task, bytes []byte) { resp := new(TumblrResponse) err := json.Unmarshal(bytes, resp) if err != nil { util.Error("failed to parse response %s\n", err.Error()) return } d := TaskTumblrData{} task.GetData(&d) if d.Sleep != 0 { time.Sleep(time.Duration(d.Sleep) * time.Millisecond) } if d.Bid == 0 { blog := getTumblrBlog(d.Name, taskset.GetDB()) d.Bid = int64(blog.ID) } util.Info("fetching %s\n", task.Url) begin := int64(resp.Data.Blog.Posts) - d.Offset end := begin - int64(len(resp.Data.Posts)) + 1 ibegin, iend := d.Update(begin, end) for i := ibegin; i < iend; i++ { post := resp.Data.Posts[i] post.TumblrBlogID = uint(d.Bid) post.Offset = uint(begin - i) for j := range post.TumblrPhotos { post.TumblrPhotos[j].Fill() url := post.TumblrPhotos[j].Orig.Url if fn, err := getFileName(url); err == nil { util.Info("yield %s %s (%d | %d)\n", url, fn, resp.Data.Blog.Posts, begin-i) } } taskset.GetDB().Create(&post) } if d.HasMore() { new_task := new(common.Task) new_task.Project = task.Project new_task.TaskSet = task.TaskSet d.Offset = int64(resp.Data.Blog.Posts) - d.Current + 1 new_task.SetData(d) new_task.Url = genUrlFromInterval(&d) taskset.GetWorker().Push(new_task) } }
func CommonFetcher(taskset *TaskSet, task *common.Task) []byte { r, err := taskset.client.Get(task.Url) if err != nil { util.Error("can't fetch url %s, %s\n", task.Url, err.Error()) return nil } defer r.Body.Close() bytes, err := ioutil.ReadAll(r.Body) if err != nil { util.Error("can't get bytes from url %s, %s\n", task.Url, err.Error()) return nil } if r.StatusCode/100 != 2 { util.Error("get wrong status code url %s, %d\n", task.Url, r.StatusCode) return nil } return bytes }
func NewOauth(appKey string, appSecret string, token string, secret string) *http.Client { if token == "" || secret == "" || appKey == "" || appSecret == "" { util.Error("token can't be empty\n") return nil } consumer := oauth.NewConsumer( appKey, appSecret, oauth.ServiceProvider{ "", "", "", "", }) t := new(oauth.AccessToken) t.Token = token t.Secret = secret client, err := consumer.MakeHttpClient(t) if err != nil { util.Error("%s\n", err.Error()) return nil } return client }
func DownloadParser(taskset *nandu.TaskSet, task *common.Task, bs []byte) { if gDownloadInfo == nil { var err error gDownloadInfo, err = NewDownloadInfo(kDownloadInfoFile) if err != nil { util.Fatal("%s\n", err.Error()) } } h := md5.New() h.Write(bs) hash := fmt.Sprintf("%x", h.Sum(nil)) data := DownloadData{} task.GetData(&data) if dup, fid := duplicate(taskset.GetDB(), hash); dup { setFileId(taskset.GetDB(), data.Pid, fid) return } iv, bs := encrypt(bs, gDownloadInfo.EncryptKey) fileData := FileData{} fileData.FileName = fmt.Sprintf("%x%s", iv[:4], hash[:24]) fileData.Hash = hash err := gDownloadInfo.Bucket.PutObject(fileData.FileName, bytes.NewReader(bs)) if err != nil { util.Error("can't create file %s\n", err.Error()) } else { util.Info("downloading %s\n", task.Url) taskset.GetDB().Create(&fileData) setFileId(taskset.GetDB(), data.Pid, fileData.ID) } }