Пример #1
0
func NewDatabase(t string, c string) *gorm.DB {
	if t == "" || c == "" {
		util.Error("db type and connect string can't be empty")
		return nil
	}
	if database, err := gorm.Open(t, c); err != nil {
		util.Error("can't connect to database %s\n", err.Error())
		return nil
	} else {
		return &database
	}
}
Пример #2
0
func (worker *Worker) Run() {

	worker.validate()

	util.Info("'%s' started\n", worker.project)

	for {
		task := worker.PopLocal()
		if task == nil {
			task = worker.Pop()
		}
		if task != nil {
			worker.retryCount = 0
			if taskset, ok := worker.tasksets[task.TaskSet]; !ok {
				util.Error("can't find taskset %s\n", task.TaskSet)
				continue
			} else {
				data := taskset.Fetch(task)
				if data != nil {
					taskset.Parse(task, data)
				}
			}
		} else {
			worker.retryCount += 1
			if worker.retryCount >= worker.retryMax {
				break
			}
			util.Info("sleep 1 second, ( %d | %d )\n", worker.retryCount, worker.retryMax)
			time.Sleep(time.Second)
		}
	}

	util.Info("'%s' exit\n", worker.project)
}
Пример #3
0
func (worker *Worker) Pop() *common.Task {
	r, err := util.HttpPostJSON(worker.info.Server.AddrPop(), &common.Worker{worker.info.Server.Token, worker.project})
	if err != nil {
		util.Error("failed to pop task, %s\n", err.Error())
		return nil
	}

	resp := new(common.CommonResponse)
	err = util.HttpResponseUnmarshalJSON(resp, r, http.StatusOK)

	if err != nil {
		util.Error("failed to pop task, %s\n", err.Error())
		return nil
	} else {
		if resp.Task != nil {
			util.Debug(resp.Task.PopLog())
		}
	}

	return resp.Task
}
Пример #4
0
func (worker *Worker) Push(task *common.Task) *common.Task {
	task.Token = worker.info.Server.Token
	task.Project = worker.project
	r, err := util.HttpPostJSON(worker.info.Server.AddrPush(), task)
	if err != nil {
		util.Error("failed to push task, %s\n", err.Error())
		return nil
	}

	resp := new(common.CommonResponse)
	err = util.HttpResponseUnmarshalJSON(resp, r, http.StatusOK)

	if err != nil {
		util.Error("failed to push task, %s\n", err.Error())
		return nil
	} else {
		if resp.Task != nil {
			util.Debug(resp.Task.PushLog())
		}
	}
	return resp.Task
}
Пример #5
0
func TumblrParser(taskset *nandu.TaskSet, task *common.Task, bytes []byte) {
	resp := new(TumblrResponse)
	err := json.Unmarshal(bytes, resp)
	if err != nil {
		util.Error("failed to parse response %s\n", err.Error())
		return
	}

	d := TaskTumblrData{}
	task.GetData(&d)

	if d.Sleep != 0 {
		time.Sleep(time.Duration(d.Sleep) * time.Millisecond)
	}

	if d.Bid == 0 {
		blog := getTumblrBlog(d.Name, taskset.GetDB())
		d.Bid = int64(blog.ID)
	}

	util.Info("fetching %s\n", task.Url)

	begin := int64(resp.Data.Blog.Posts) - d.Offset
	end := begin - int64(len(resp.Data.Posts)) + 1

	ibegin, iend := d.Update(begin, end)

	for i := ibegin; i < iend; i++ {
		post := resp.Data.Posts[i]
		post.TumblrBlogID = uint(d.Bid)
		post.Offset = uint(begin - i)
		for j := range post.TumblrPhotos {
			post.TumblrPhotos[j].Fill()
			url := post.TumblrPhotos[j].Orig.Url
			if fn, err := getFileName(url); err == nil {
				util.Info("yield %s %s (%d | %d)\n", url, fn, resp.Data.Blog.Posts, begin-i)
			}
		}
		taskset.GetDB().Create(&post)
	}

	if d.HasMore() {
		new_task := new(common.Task)
		new_task.Project = task.Project
		new_task.TaskSet = task.TaskSet
		d.Offset = int64(resp.Data.Blog.Posts) - d.Current + 1
		new_task.SetData(d)
		new_task.Url = genUrlFromInterval(&d)
		taskset.GetWorker().Push(new_task)
	}
}
Пример #6
0
func CommonFetcher(taskset *TaskSet, task *common.Task) []byte {
	r, err := taskset.client.Get(task.Url)
	if err != nil {
		util.Error("can't fetch url %s, %s\n", task.Url, err.Error())
		return nil
	}

	defer r.Body.Close()

	bytes, err := ioutil.ReadAll(r.Body)

	if err != nil {
		util.Error("can't get bytes from url %s, %s\n", task.Url, err.Error())
		return nil
	}

	if r.StatusCode/100 != 2 {
		util.Error("get wrong status code url %s, %d\n", task.Url, r.StatusCode)
		return nil
	}

	return bytes
}
Пример #7
0
func NewOauth(appKey string, appSecret string, token string, secret string) *http.Client {
	if token == "" || secret == "" || appKey == "" || appSecret == "" {
		util.Error("token can't be empty\n")
		return nil
	}
	consumer := oauth.NewConsumer(
		appKey,
		appSecret,
		oauth.ServiceProvider{
			"", "", "", "",
		})

	t := new(oauth.AccessToken)
	t.Token = token
	t.Secret = secret

	client, err := consumer.MakeHttpClient(t)

	if err != nil {
		util.Error("%s\n", err.Error())
		return nil
	}
	return client
}
Пример #8
0
func DownloadParser(taskset *nandu.TaskSet, task *common.Task, bs []byte) {

	if gDownloadInfo == nil {
		var err error
		gDownloadInfo, err = NewDownloadInfo(kDownloadInfoFile)
		if err != nil {
			util.Fatal("%s\n", err.Error())
		}
	}

	h := md5.New()
	h.Write(bs)
	hash := fmt.Sprintf("%x", h.Sum(nil))

	data := DownloadData{}
	task.GetData(&data)

	if dup, fid := duplicate(taskset.GetDB(), hash); dup {
		setFileId(taskset.GetDB(), data.Pid, fid)
		return
	}

	iv, bs := encrypt(bs, gDownloadInfo.EncryptKey)

	fileData := FileData{}
	fileData.FileName = fmt.Sprintf("%x%s", iv[:4], hash[:24])
	fileData.Hash = hash

	err := gDownloadInfo.Bucket.PutObject(fileData.FileName, bytes.NewReader(bs))
	if err != nil {
		util.Error("can't create file %s\n", err.Error())
	} else {
		util.Info("downloading %s\n", task.Url)
		taskset.GetDB().Create(&fileData)
		setFileId(taskset.GetDB(), data.Pid, fileData.ID)
	}
}