func TumblrParser(taskset *nandu.TaskSet, task *common.Task, bytes []byte) { resp := new(TumblrResponse) err := json.Unmarshal(bytes, resp) if err != nil { util.Error("failed to parse response %s\n", err.Error()) return } d := TaskTumblrData{} task.GetData(&d) if d.Sleep != 0 { time.Sleep(time.Duration(d.Sleep) * time.Millisecond) } if d.Bid == 0 { blog := getTumblrBlog(d.Name, taskset.GetDB()) d.Bid = int64(blog.ID) } util.Info("fetching %s\n", task.Url) begin := int64(resp.Data.Blog.Posts) - d.Offset end := begin - int64(len(resp.Data.Posts)) + 1 ibegin, iend := d.Update(begin, end) for i := ibegin; i < iend; i++ { post := resp.Data.Posts[i] post.TumblrBlogID = uint(d.Bid) post.Offset = uint(begin - i) for j := range post.TumblrPhotos { post.TumblrPhotos[j].Fill() url := post.TumblrPhotos[j].Orig.Url if fn, err := getFileName(url); err == nil { util.Info("yield %s %s (%d | %d)\n", url, fn, resp.Data.Blog.Posts, begin-i) } } taskset.GetDB().Create(&post) } if d.HasMore() { new_task := new(common.Task) new_task.Project = task.Project new_task.TaskSet = task.TaskSet d.Offset = int64(resp.Data.Blog.Posts) - d.Current + 1 new_task.SetData(d) new_task.Url = genUrlFromInterval(&d) taskset.GetWorker().Push(new_task) } }
func main() { util.SetDebug(util.DebugInfo) worker := nandu.NewWorker() info, err := NewTaskPushInfo(kTaskPushInfoFile) if err != nil { util.Fatal("%s\n", err.Error()) } for i := range info.Blogs { task := &info.Blogs[i] d := TaskTumblrData{} task.GetData(&d) d.Min = getStop(d.Name, worker.GetDB(kDatabaseName)) task.SetData(d) util.Info("%s", task.PushLog()) worker.Push(task) } photos := getPhotos(info.FileDataRange.Start, info.FileDataRange.Stop, worker.GetDB(kDatabaseName)) if photos != nil { for i := range photos { task := common.Task{} if photos[i].FileDataID != 0 { continue } task.Url = photos[i].Url task.TaskSet = kDownloadTaskSetName task.SetData(DownloadData{photos[i].ID}) util.Info("push %s\n", task.Url) worker.Push(&task) } } }