func (worker *Worker) GetDB(name string) *gorm.DB { database, ok := worker.databases[name] if !ok { util.Fatal("can't find database") } return database.DB }
func (worker *Worker) GetClient(name string) *http.Client { client, ok := worker.clients[name] if !ok { util.Fatal("can't find client") } return client }
func (worker *Worker) checkParsers() { for i := range worker.tasksets { if worker.tasksets[i].parser == nil { util.Fatal("missing parser of tasksets %s\n", worker.tasksets[i].Name) } } }
func (worker *Worker) Model(name string, model interface{}) *Worker { database, ok := worker.databases[name] if !ok { util.Fatal("can't find database %s\n", name) } if database.Init { database.DB.CreateTable(model) } return worker }
func NewWorker() *Worker { worker := new(Worker) worker.retryCount = 0 worker.retryMax = 10 info, err := NewNanduInfo(kNanduConfigFile) if err != nil { util.Fatal("failed to load config file %s\n", err.Error()) } worker.info = info worker.project = info.Project worker.registerClients() worker.registerDatabase() return worker }
func DownloadParser(taskset *nandu.TaskSet, task *common.Task, bs []byte) { if gDownloadInfo == nil { var err error gDownloadInfo, err = NewDownloadInfo(kDownloadInfoFile) if err != nil { util.Fatal("%s\n", err.Error()) } } h := md5.New() h.Write(bs) hash := fmt.Sprintf("%x", h.Sum(nil)) data := DownloadData{} task.GetData(&data) if dup, fid := duplicate(taskset.GetDB(), hash); dup { setFileId(taskset.GetDB(), data.Pid, fid) return } iv, bs := encrypt(bs, gDownloadInfo.EncryptKey) fileData := FileData{} fileData.FileName = fmt.Sprintf("%x%s", iv[:4], hash[:24]) fileData.Hash = hash err := gDownloadInfo.Bucket.PutObject(fileData.FileName, bytes.NewReader(bs)) if err != nil { util.Error("can't create file %s\n", err.Error()) } else { util.Info("downloading %s\n", task.Url) taskset.GetDB().Create(&fileData) setFileId(taskset.GetDB(), data.Pid, fileData.ID) } }
func main() { util.SetDebug(util.DebugInfo) worker := nandu.NewWorker() info, err := NewTaskPushInfo(kTaskPushInfoFile) if err != nil { util.Fatal("%s\n", err.Error()) } for i := range info.Blogs { task := &info.Blogs[i] d := TaskTumblrData{} task.GetData(&d) d.Min = getStop(d.Name, worker.GetDB(kDatabaseName)) task.SetData(d) util.Info("%s", task.PushLog()) worker.Push(task) } photos := getPhotos(info.FileDataRange.Start, info.FileDataRange.Stop, worker.GetDB(kDatabaseName)) if photos != nil { for i := range photos { task := common.Task{} if photos[i].FileDataID != 0 { continue } task.Url = photos[i].Url task.TaskSet = kDownloadTaskSetName task.SetData(DownloadData{photos[i].ID}) util.Info("push %s\n", task.Url) worker.Push(&task) } } }