func TumblrParser(taskset *nandu.TaskSet, task *common.Task, bytes []byte) { resp := new(TumblrResponse) err := json.Unmarshal(bytes, resp) if err != nil { util.Error("failed to parse response %s\n", err.Error()) return } d := TaskTumblrData{} task.GetData(&d) if d.Sleep != 0 { time.Sleep(time.Duration(d.Sleep) * time.Millisecond) } if d.Bid == 0 { blog := getTumblrBlog(d.Name, taskset.GetDB()) d.Bid = int64(blog.ID) } util.Info("fetching %s\n", task.Url) begin := int64(resp.Data.Blog.Posts) - d.Offset end := begin - int64(len(resp.Data.Posts)) + 1 ibegin, iend := d.Update(begin, end) for i := ibegin; i < iend; i++ { post := resp.Data.Posts[i] post.TumblrBlogID = uint(d.Bid) post.Offset = uint(begin - i) for j := range post.TumblrPhotos { post.TumblrPhotos[j].Fill() url := post.TumblrPhotos[j].Orig.Url if fn, err := getFileName(url); err == nil { util.Info("yield %s %s (%d | %d)\n", url, fn, resp.Data.Blog.Posts, begin-i) } } taskset.GetDB().Create(&post) } if d.HasMore() { new_task := new(common.Task) new_task.Project = task.Project new_task.TaskSet = task.TaskSet d.Offset = int64(resp.Data.Blog.Posts) - d.Current + 1 new_task.SetData(d) new_task.Url = genUrlFromInterval(&d) taskset.GetWorker().Push(new_task) } }
func (worker *Worker) Push(task *common.Task) *common.Task { task.Token = worker.info.Server.Token task.Project = worker.project r, err := util.HttpPostJSON(worker.info.Server.AddrPush(), task) if err != nil { util.Error("failed to push task, %s\n", err.Error()) return nil } resp := new(common.CommonResponse) err = util.HttpResponseUnmarshalJSON(resp, r, http.StatusOK) if err != nil { util.Error("failed to push task, %s\n", err.Error()) return nil } else { if resp.Task != nil { util.Debug(resp.Task.PushLog()) } } return resp.Task }
func DownloadParser(taskset *nandu.TaskSet, task *common.Task, bs []byte) { if gDownloadInfo == nil { var err error gDownloadInfo, err = NewDownloadInfo(kDownloadInfoFile) if err != nil { util.Fatal("%s\n", err.Error()) } } h := md5.New() h.Write(bs) hash := fmt.Sprintf("%x", h.Sum(nil)) data := DownloadData{} task.GetData(&data) if dup, fid := duplicate(taskset.GetDB(), hash); dup { setFileId(taskset.GetDB(), data.Pid, fid) return } iv, bs := encrypt(bs, gDownloadInfo.EncryptKey) fileData := FileData{} fileData.FileName = fmt.Sprintf("%x%s", iv[:4], hash[:24]) fileData.Hash = hash err := gDownloadInfo.Bucket.PutObject(fileData.FileName, bytes.NewReader(bs)) if err != nil { util.Error("can't create file %s\n", err.Error()) } else { util.Info("downloading %s\n", task.Url) taskset.GetDB().Create(&fileData) setFileId(taskset.GetDB(), data.Pid, fileData.ID) } }
func main() { util.SetDebug(util.DebugInfo) worker := nandu.NewWorker() info, err := NewTaskPushInfo(kTaskPushInfoFile) if err != nil { util.Fatal("%s\n", err.Error()) } for i := range info.Blogs { task := &info.Blogs[i] d := TaskTumblrData{} task.GetData(&d) d.Min = getStop(d.Name, worker.GetDB(kDatabaseName)) task.SetData(d) util.Info("%s", task.PushLog()) worker.Push(task) } photos := getPhotos(info.FileDataRange.Start, info.FileDataRange.Stop, worker.GetDB(kDatabaseName)) if photos != nil { for i := range photos { task := common.Task{} if photos[i].FileDataID != 0 { continue } task.Url = photos[i].Url task.TaskSet = kDownloadTaskSetName task.SetData(DownloadData{photos[i].ID}) util.Info("push %s\n", task.Url) worker.Push(&task) } } }