Пример #1
0
func TestReduceValues(t *testing.T) {
	/*
	 * Source are of two parts with nothing in each, but at each mapend, a pair
	 * of <"part", <part>> is collected. So the reducer will check whether a key
	 * of "part" with two different values are reduced.
	 */
	job := MrJob{
		Source: []Input{
			&InputStruct{
				PartCountF: func() (int, error) {
					return 2, nil
				},
			},
		},

		NewMapperF: func(src, part int) Mapper {
			return &MapperStruct{
				MapEndF: func(c PartCollector) error {
					return c.CollectTo(0, sophie.RawString("part"),
						sophie.VInt(part))
				},
			}
		},

		NewReducerF: func(part int) Reducer {
			st := make(map[sophie.VInt]bool)
			return &ReducerStruct{
				NewKeyF: sophie.NewRawString,
				NewValF: sophie.NewVInt,

				ReduceF: func(key sophie.SophieWriter,
					nextVal SophierIterator, c []sophie.Collector) error {

					keyStr := string(*key.(*sophie.RawString))
					if keyStr != "part" {
						return errors.New(`Key should be "part"`)
					}
					for {
						val, err := nextVal()
						if err == sophie.EOF {
							break
						}
						if err != nil {
							return err
						}

						part := *val.(*sophie.VInt)
						if st[part] {
							t.Errorf("Duplicated value: %v", part)
						}
						st[part] = true
					}
					return nil
				},
			}
		},
	}
	assert.NoErrorf(t, "job.Run failed: %v", job.Run())
}
Пример #2
0
func (iter *linesIter) Next(key, val sophie.SophieReader) error {
	if iter.pos >= len(iter.lines) {
		return sophie.EOF
	}
	*(key.(*sophie.RawString)) = sophie.RawString(iter.lines[iter.pos])
	iter.pos++
	return nil
}
Пример #3
0
func generateCrawlEntries(db *gcse.MemDB, hostFromID func(id string) string,
	out kv.DirOutput) error {
	now := time.Now()
	groups := make(map[string]sophie.CollectCloser)
	count := 0
	if err := db.Iterate(func(id string, val interface{}) error {
		ent, ok := val.(gcse.CrawlingEntry)
		if !ok {
			log.Printf("Wrong entry: %+v", ent)
			return nil
		}

		if ent.Version == gcse.CrawlerVersion &&
			ent.ScheduleTime.After(now) {
			return nil
		}

		host := hostFromID(id)

		// check host black list
		if gcse.NonCrawlHosts.In(host) {
			return nil
		}

		c, ok := groups[host]
		if !ok {
			index := len(groups)
			var err error
			c, err = out.Collector(index)
			if err != nil {
				return err
			}
			groups[host] = c
		}

		if rand.Intn(10) == 0 {
			// randomly set Etag to empty to fetch stars
			ent.Etag = ""
		}

		count++
		return c.Collect(sophie.RawString(id), &ent)
	}); err != nil {
		return err
	}

	for _, c := range groups {
		c.Close()
	}

	log.Printf("%d entries to crawl for folder %v", count, out.Path)
	return nil
}
Пример #4
0
func (wcm *WordCountMapper) Map(key, val sophie.SophieWriter, c PartCollector) error {
	//fmt.Printf("WordCountMapper (%v, %v) ...\n", key, val)
	line := *(key.(*sophie.RawString))
	words := strings.Split(string(line), " ")
	for _, word := range words {
		if len(word) == 0 {
			continue
		}
		word = strings.ToLower(word)
		//fmt.Printf("CollectTo %v\n", word)
		c.CollectTo(int(word[0]), sophie.RawString(word), sophie.RawVInt(1))
		//		c.CollectTo(0, RawString(word), RawVInt(1))
	}
	return nil
}
Пример #5
0
func main() {
	fmt.Println("Data conversion tool")
	fpRoot := sophie.LocalFsPath("./data")
	/*
	 * Doc db
	 */
	if DocDBPath.Exists() {
		if DocDBPath.Join(gcse.KindDocDB+".gob").Exists() &&
			!gcse.DataRoot.Join(fnNewDocDB).Exists() {
			src := DocDBPath.Join(gcse.KindDocDB + ".gob")
			dst := fpRoot.Join(fnNewDocDB)
			fmt.Println("Convert", src, "to", dst, "...")

			srcDB := gcse.PackedDocDB{MemDB: gcse.NewMemDB(DocDBPath, gcse.KindDocDB)}
			if err := srcDB.Load(); err != nil {
				log.Fatalf("srcDB.Load: %v", err)
			}

			fpDocs := fpRoot.Join(fnNewDocDB)
			dstDB := kv.DirOutput(fpDocs)
			c, err := dstDB.Collector(0)
			if err != nil {
				log.Fatalf("dstDB.Collector: %v", err)
			}

			count := 0
			if err := srcDB.Iterate(func(key string, val interface{}) error {
				k := sophie.RawString(key)
				v := val.(gcse.DocInfo)

				if count < 10 {
					fmtp.Printfln("  key: %+v, value: %+v", k, v)
				}

				count++
				return c.Collect(k, &v)
			}); err != nil {
				fpDocs.Remove()
				log.Fatalf("srcDB.Iterate: %v", err)
			}
			c.Close()

			fmtp.Printfln("Conversion sucess, %d entries collected.", count)
		}
	}
}
Пример #6
0
func TestIndex(t *testing.T) {
	docs := []DocInfo{
		{
			Package: "github.com/daviddengcn/gcse",
			Name:    "gcse",
			TestImports: []string{
				"github.com/daviddengcn/go-villa",
				"github.com/daviddengcn/gcse",
			},
		}, {
			Package: "github.com/daviddengcn/gcse/indexer",
			Name:    "main",
			Imports: []string{
				"github.com/daviddengcn/gcse",
				"github.com/daviddengcn/go-villa",
				"github.com/daviddengcn/gcse/indexer",
			},
		}, {
			Package: "github.com/daviddengcn/go-villa",
			Name:    "villa",
		},
	}
	ts, err := Index(&mr.InputStruct{
		PartCountF: func() (int, error) {
			return 1, nil
		},
		IteratorF: func(int) (sophie.IterateCloser, error) {
			index := 0
			return &sophie.IterateCloserStruct{
				NextF: func(key, val sophie.SophieReader) error {
					if index >= len(docs) {
						return sophie.EOF
					}
					*key.(*sophie.RawString) = sophie.RawString(
						docs[index].Package)
					*val.(*DocInfo) = docs[index]
					val.(*DocInfo).Imports = append([]string{}, docs[index].Imports...)
					val.(*DocInfo).TestImports = append([]string{}, docs[index].TestImports...)

					index++
					return nil
				},
			}, nil
		},
	})
	if err != nil {
		t.Error(err)
		return
	}

	numDocs := ts.DocCount()
	assert.Equal(t, "DocCount", numDocs, 3)

	var pkgs []string
	if err := ts.Search(map[string]villa.StrSet{IndexTextField: nil},
		func(docID int32, data interface{}) error {
			hit := data.(HitInfo)
			pkgs = append(pkgs, hit.Package)
			return nil
		},
	); err != nil {
		t.Error(err)
		return
	}
	assert.StringEqual(t, "all", pkgs,
		[]string{
			"github.com/daviddengcn/gcse",
			"github.com/daviddengcn/go-villa",
			"github.com/daviddengcn/gcse/indexer",
		})

	var gcseInfo HitInfo
	if err := ts.Search(map[string]villa.StrSet{
		IndexPkgField: villa.NewStrSet("github.com/daviddengcn/gcse"),
	}, func(docID int32, data interface{}) error {
		gcseInfo = data.(HitInfo)
		return nil
	}); err != nil {
		t.Errorf("ts.Search: %v", err)
		return
	}
	assert.StringEqual(t, "gcseInfo.Imported",
		gcseInfo.Imported,
		[]string{
			"github.com/daviddengcn/gcse/indexer",
		})
	assert.StringEqual(t, "gcseInfo.TestImports",
		gcseInfo.TestImports,
		[]string{
			"github.com/daviddengcn/go-villa",
		})

	var indexerInfo HitInfo
	if err := ts.Search(map[string]villa.StrSet{
		IndexPkgField: villa.NewStrSet("github.com/daviddengcn/gcse/indexer"),
	}, func(docID int32, data interface{}) error {
		gcseInfo = data.(HitInfo)
		return nil
	}); err != nil {
		t.Errorf("ts.Search: %v", err)
		return
	}
	assert.StringEqual(t, "indexerInfo.Imported",
		indexerInfo.Imported, []string{})
	assert.StringEqual(t, "indexerInfo.Imports",
		indexerInfo.Imports, []string{})

	if err := ts.Search(map[string]villa.StrSet{
		IndexPkgField: villa.NewStrSet("github.com/daviddengcn/go-villa"),
	}, func(docID int32, data interface{}) error {
		gcseInfo = data.(HitInfo)
		return nil
	}); err != nil {
		t.Errorf("ts.Search: %v", err)
		return
	}
	assert.StringEqual(t, "indexerInfo.Imported",
		fmt.Sprintf("%+v", indexerInfo.Imported),
		"[]")
	assert.StringEqual(t, "gcseInfo.TestImported",
		gcseInfo.TestImported,
		[]string{"github.com/daviddengcn/gcse"})
}
Пример #7
0
func TestIndex(t *testing.T) {
	const (
		package0 = "github.com/daviddengcn/gcse"
		package1 = "github.com/daviddengcn/gcse/indexer"
		package2 = "github.com/daviddengcn/go-villa"
	)

	docs := []DocInfo{
		{
			Package: package0,
			Name:    "gcse",
			TestImports: []string{
				package2, package0,
			},
		}, {
			Package: package1,
			Name:    "main",
			Imports: []string{
				package0,
				package2,
				package1,
			},
		}, {
			Package: package2,
			Name:    "villa",
		},
	}
	ts, err := Index(&mr.InputStruct{
		PartCountF: func() (int, error) {
			return 1, nil
		},
		IteratorF: func(int) (sophie.IterateCloser, error) {
			index := 0
			return &sophie.IterateCloserStruct{
				NextF: func(key, val sophie.SophieReader) error {
					if index >= len(docs) {
						return io.EOF
					}
					*key.(*sophie.RawString) = sophie.RawString(
						docs[index].Package)
					*val.(*DocInfo) = docs[index]
					val.(*DocInfo).Imports = append([]string{}, docs[index].Imports...)
					val.(*DocInfo).TestImports = append([]string{}, docs[index].TestImports...)

					index++
					return nil
				},
			}, nil
		},
	}, "./tmp")
	assert.NoErrorOrDie(t, err)

	hitsArr, err := index.OpenConstArray(path.Join("./tmp", HitsArrFn))
	for _, doc := range docs {
		idx := -1
		ts.Search(index.SingleFieldQuery(IndexPkgField, doc.Package), func(docID int32, data interface{}) error {
			idx = int(docID)
			return nil
		})
		d, err := hitsArr.GetGob(idx)
		assert.NoError(t, err)
		assert.Equal(t, "d.Package", d.(HitInfo).Package, doc.Package)
	}
	numDocs := ts.DocCount()
	assert.Equal(t, "DocCount", numDocs, 3)

	var pkgs []string
	if err := ts.Search(map[string]stringsp.Set{IndexTextField: nil},
		func(docID int32, data interface{}) error {
			hit := data.(HitInfo)
			pkgs = append(pkgs, hit.Package)
			return nil
		},
	); err != nil {
		t.Error(err)
		return
	}
	assert.StringEqual(t, "all", pkgs,
		[]string{
			"github.com/daviddengcn/gcse",
			"github.com/daviddengcn/go-villa",
			"github.com/daviddengcn/gcse/indexer",
		})

	var gcseInfo HitInfo
	if err := ts.Search(map[string]stringsp.Set{
		IndexPkgField: stringsp.NewSet("github.com/daviddengcn/gcse"),
	}, func(docID int32, data interface{}) error {
		gcseInfo = data.(HitInfo)
		return nil
	}); err != nil {
		t.Errorf("ts.Search: %v", err)
		return
	}
	assert.Equal(t, "gcseInfo.Imported", gcseInfo.Imported, []string(nil))
	assert.Equal(t, "gcseInfo.ImportedLen", gcseInfo.ImportedLen, 1)
	assert.Equal(t, "gcseInfo.TestImports", gcseInfo.TestImports, []string{"github.com/daviddengcn/go-villa"})

	var indexerInfo HitInfo
	if err := ts.Search(map[string]stringsp.Set{
		IndexPkgField: stringsp.NewSet("github.com/daviddengcn/gcse/indexer"),
	}, func(docID int32, data interface{}) error {
		gcseInfo = data.(HitInfo)
		return nil
	}); err != nil {
		t.Errorf("ts.Search: %v", err)
		return
	}
	assert.StringEqual(t, "indexerInfo.Imported",
		indexerInfo.Imported, []string{})
	assert.StringEqual(t, "indexerInfo.Imports",
		indexerInfo.Imports, []string{})

	if err := ts.Search(map[string]stringsp.Set{
		IndexPkgField: stringsp.NewSet("github.com/daviddengcn/go-villa"),
	}, func(docID int32, data interface{}) error {
		gcseInfo = data.(HitInfo)
		return nil
	}); err != nil {
		t.Errorf("ts.Search: %v", err)
		return
	}
	assert.Equal(t, "indexerInfo.Imported", indexerInfo.Imported, []string(nil))
	assert.Equal(t, "gcseInfo.TestImportedLen", gcseInfo.TestImportedLen, 1)
	assert.Equal(t, "gcseInfo.TestImported", gcseInfo.TestImported, []string(nil))
}
Пример #8
0
func generateCrawlEntries(db *gcse.MemDB, hostFromID func(id string) string, out kv.DirOutput) error {
	now := time.Now()
	type idAndCrawlingEntry struct {
		id  string
		ent *gcse.CrawlingEntry
	}
	groups := make(map[string][]idAndCrawlingEntry)
	count := 0
	type nameAndAges struct {
		maxName string
		maxAge  time.Duration

		sumAgeHours float64
		cnt         int
	}
	ages := make(map[string]nameAndAges)
	if err := db.Iterate(func(id string, val interface{}) error {
		ent, ok := val.(gcse.CrawlingEntry)
		if !ok {
			log.Printf("Wrong entry: %+v", ent)
			return nil
		}
		if ent.Version == gcse.CrawlerVersion && ent.ScheduleTime.After(now) {
			return nil
		}
		host := hostFromID(id)

		// check host black list
		if configs.NonCrawlHosts.Contain(host) {
			return nil
		}
		if rand.Intn(10) == 0 {
			// randomly set Etag to empty to fetch stars
			ent.Etag = ""
		}
		groups[host] = append(groups[host], idAndCrawlingEntry{id, &ent})

		age := now.Sub(ent.ScheduleTime)
		na := ages[host]
		if age > na.maxAge {
			na.maxName, na.maxAge = id, age
		}
		na.sumAgeHours += age.Hours()
		na.cnt++
		ages[host] = na

		count++
		return nil
	}); err != nil {
		return errorsp.WithStacks(err)
	}
	index := 0
	for _, g := range groups {
		sortp.SortF(len(g), func(i, j int) bool {
			return g[i].ent.ScheduleTime.Before(g[j].ent.ScheduleTime)
		}, func(i, j int) {
			g[i], g[j] = g[j], g[i]
		})
		if err := func(index int, ies []idAndCrawlingEntry) error {
			c, err := out.Collector(index)
			if err != nil {
				return err
			}
			defer c.Close()

			for _, ie := range ies {
				if err := c.Collect(sophie.RawString(ie.id), ie.ent); err != nil {
					return err
				}
			}
			return nil
		}(index, g); err != nil {
			log.Printf("Saving ents failed: %v", err)
		}
		index++
	}
	for host, na := range ages {
		aveAge := time.Duration(na.sumAgeHours / float64(na.cnt) * float64(time.Hour))
		log.Printf("%s age: max -> %v(%s), ave -> %v", host, na.maxAge, na.maxName, aveAge)
		if host == "github.com" && strings.Contains(out.Path, configs.FnPackage) {
			gcse.AddBiValueAndProcess(bi.Average, "crawler.github_max_age.hours", int(na.maxAge.Hours()))
			gcse.AddBiValueAndProcess(bi.Average, "crawler.github_max_age.days", int(na.maxAge/timep.Day))
			gcse.AddBiValueAndProcess(bi.Average, "crawler.github_ave_age.hours", int(aveAge.Hours()))
			gcse.AddBiValueAndProcess(bi.Average, "crawler.github_ave_age.days", int(aveAge/timep.Day))
		}
	}
	log.Printf("%d entries to crawl for folder %v", count, out.Path)
	return nil
}
Пример #9
0
func TestMRFromFile(t *testing.T) {
	fmt.Println("TestMRFromFile starts")
	fpRoot := sophie.LocalFsPath(".")

	mrin := fpRoot.Join("mrin")
	mrin.Mkdir(0755)

	mrtmp := fpRoot.Join("tmp")

	/*
	 * Prepare input
	 */
	var inF *kv.Writer = nil
	index := 0
	lines := strings.Split(WORDS, "\n")
	for i, line := range lines {
		if i%3 == 0 {
			if inF != nil {
				assert.NoErrorf(t, "inF.Close: %v", inF.Close())
				index++
			}
			var err error
			inF, err = kv.NewWriter(mrin.Join(fmt.Sprintf("part-%05d", index)))
			assert.NoErrorf(t, "NewKVWriter: %v", err)
		}

		assert.NoErrorf(t, "inF.Collect",
			inF.Collect(sophie.RawString(line), sophie.Null{}))
	}
	if inF != nil {
		assert.NoErrorf(t, "inF.Close: %v", inF.Close())
	}

	mrout := fpRoot.Join("mrout")
	assert.NoErrorf(t, "Remove mrout: %v", mrout.Remove())

	/*
	 * MrJob
	 */
	var mapper WordCountMapper
	reducer := WordCountReducer{counts: make(map[string]int)}

	job := MrJob{
		Source: []Input{kv.DirInput(mrin)},
		NewMapperF: func(src, part int) Mapper {
			return &mapper
		},

		Sorter: NewFileSorter(mrtmp),

		NewReducerF: func(part int) Reducer {
			return &reducer
		},
		Dest: []Output{kv.DirOutput(mrout)},
	}

	assert.NoErrorf(t, "RunJob: %v", job.Run())

	/*
	 * Check result
	 */
	resIn := kv.DirInput(mrout)
	n, err := resIn.PartCount()
	assert.NoErrorf(t, "resIn.PartCount(): %v", err)
	var word sophie.RawString
	var cnt sophie.RawVInt
	actCnts := make(map[string]int)
	for i := 0; i < n; i++ {
		iter, err := resIn.Iterator(i)
		assert.NoErrorf(t, "resIn.Iterator: %v", err)
		for {
			err := iter.Next(&word, &cnt)
			if err == sophie.EOF {
				break
			}
			assert.NoErrorf(t, "iter.Next: %v", err)
			actCnts[string(word)] = int(cnt)
		}
	}

	expCnts := statWords(WORDS)
	// fmt.Println(expCnts)
	// fmt.Println(actCnts)

	assertMapEquals(t, actCnts, expCnts)
	fmt.Println("TestMRFromFile ends")
}
Пример #10
0
// OnlyMapper.Map
func (pc *PackageCrawler) Map(key, val sophie.SophieWriter, c []sophie.Collector) error {
	if time.Now().After(AppStopTime) {
		log.Printf("[Part %d] Timeout(key = %v), PackageCrawler returns EOM",
			pc.part, key)
		return mr.EOM
	}
	pkg := string(*key.(*sophie.RawString))
	ent := val.(*gcse.CrawlingEntry)
	if ent.Version < gcse.CrawlerVersion {
		// if gcse.CrawlerVersion is larger than Version, Etag is ignored.
		ent.Etag = ""
	}
	log.Printf("[Part %d] Crawling package %v with etag %s\n", pc.part, pkg, ent.Etag)

	p, flds, err := gcse.CrawlPackage(pc.httpClient, pkg, ent.Etag)
	for _, fld := range flds {
		if spider.LikeGoSubFolder(fld.Name) {
			appendNewPackage(pkg+"/"+fld.Path, "parent")
		}
	}
	site, path := utils.SplitPackage(pkg)
	if err != nil && errorsp.Cause(err) != gcse.ErrPackageNotModifed {
		log.Printf("[Part %d] Crawling pkg %s failed: %v", pc.part, pkg, err)
		if gcse.IsBadPackage(err) {
			utils.LogError(store.AppendPackageEvent(site, path, "", time.Now(), sppb.HistoryEvent_Action_Invalid), "AppendPackageEvent %v %v failed", site, path)
			bi.AddValue(bi.Sum, "crawler.package.wrong-package", 1)
			// a wrong path
			nda := gcse.NewDocAction{
				Action: gcse.NDA_DEL,
			}
			c[0].Collect(sophie.RawString(pkg), &nda)
			cDB.PackageDB.Delete(pkg)
			log.Printf("[Part %d] Remove wrong package %s", pc.part, pkg)
		} else {
			utils.LogError(store.AppendPackageEvent(site, path, "", time.Now(), sppb.HistoryEvent_Action_Failed), "AppendPackageEvent %v %v failed", site, path)
			bi.Inc("crawler.package.failed")
			if strings.HasPrefix(pkg, "github.com/") {
				bi.Inc("crawler.package.failed.github")
			}
			pc.failCount++

			cDB.SchedulePackage(pkg, time.Now().Add(12*time.Hour), ent.Etag)

			if pc.failCount >= 10 || strings.Contains(err.Error(), "403") {
				durToSleep := 10 * time.Minute
				if time.Now().Add(durToSleep).After(AppStopTime) {
					log.Printf("[Part %d] Timeout(key = %v), PackageCrawler returns EOM",
						pc.part, key)
					return mr.EOM
				}

				log.Printf("[Part %d] Last ten crawling packages failed, sleep for a while...(current: %s)",
					pc.part, pkg)
				time.Sleep(durToSleep)
				pc.failCount = 0
			}
		}
		return nil
	}
	utils.LogError(store.AppendPackageEvent(site, path, "", time.Now(), sppb.HistoryEvent_Action_Success), "AppendPackageEvent %v %v failed", site, path)
	pc.failCount = 0
	if errorsp.Cause(err) == gcse.ErrPackageNotModifed {
		// TODO crawling stars for unchanged project
		log.Printf("[Part %d] Package %s unchanged!", pc.part, pkg)
		schedulePackageNextCrawl(pkg, ent.Etag)
		bi.AddValue(bi.Sum, "crawler.package.not-modified", 1)
		return nil
	}
	bi.AddValue(bi.Sum, "crawler.package.success", 1)
	if strings.HasPrefix(pkg, "github.com/") {
		bi.AddValue(bi.Sum, "crawler.package.success.github", 1)
	}
	log.Printf("[Part %d] Crawled package %s success!", pc.part, pkg)

	var pkgInfo *stpb.PackageInfo
	if err := store.UpdatePackage(site, path, func(pi *stpb.PackageInfo) error {
		fillPackageInfo(p, pi)
		pkgInfo = pi
		return nil
	}); err != nil {
		log.Printf("UpdatePackage %v %v failed: %v", site, path, err)
	}
	saveRelatedInfo(pkgInfo)

	nda := gcse.NewDocAction{
		Action:  gcse.NDA_UPDATE,
		DocInfo: packageToDoc(p),
	}
	c[0].Collect(sophie.RawString(pkg), &nda)
	log.Printf("[Part %d] Package %s saved!", pc.part, pkg)

	if !strings.HasPrefix(pkg, "github.com/") {
		// github.com throttling is done within the GithubSpider.
		time.Sleep(10 * time.Second)
	}
	return nil
}
Пример #11
0
// OnlyMapper.Map
func (pc *PackageCrawler) Map(key, val sophie.SophieWriter,
	c []sophie.Collector) error {
	if time.Now().After(AppStopTime) {
		log.Printf("[Part %d] Timeout(key = %v), PackageCrawler returns EOM",
			pc.part, key)
		return mr.EOM
	}

	pkg := string(*key.(*sophie.RawString))
	ent := val.(*gcse.CrawlingEntry)
	if ent.Version < gcse.CrawlerVersion {
		// if gcse.CrawlerVersion is larger than Version, Etag is ignored.
		ent.Etag = ""
	}
	log.Printf("[Part %d] Crawling package %v with etag %s\n", pc.part, pkg, ent.Etag)

	p, err := gcse.CrawlPackage(pc.httpClient, pkg, ent.Etag)
	_ = p
	if err != nil && err != gcse.ErrPackageNotModifed {
		log.Printf("[Part %d] Crawling pkg %s failed: %v", pc.part, pkg, err)
		if gcse.IsBadPackage(err) {
			// a wrong path
			nda := gcse.NewDocAction{
				Action: gcse.NDA_DEL,
			}
			c[0].Collect(sophie.RawString(pkg), &nda)
			cDB.PackageDB.Delete(pkg)
			log.Printf("[Part %d] Remove wrong package %s", pc.part, pkg)
		} else {
			pc.failCount++

			cDB.SchedulePackage(pkg, time.Now().Add(12*time.Hour), ent.Etag)

			if pc.failCount >= 10 || strings.Contains(err.Error(), "403") {
				durToSleep := 10 * time.Minute
				if time.Now().Add(durToSleep).After(AppStopTime) {
					log.Printf("[Part %d] Timeout(key = %v), PackageCrawler returns EOM",
						pc.part, key)
					return mr.EOM
				}

				log.Printf("[Part %d] Last ten crawling packages failed, sleep for a while...(current: %s)",
					pc.part, pkg)
				time.Sleep(durToSleep)
				pc.failCount = 0
			}
		}
		return nil
	}

	pc.failCount = 0
	if err == gcse.ErrPackageNotModifed {
		// TODO crawling stars for unchanged project
		log.Printf("[Part %d] Package %s unchanged!", pc.part, pkg)
		schedulePackageNextCrawl(pkg, ent.Etag)
		return nil
	}

	log.Printf("[Part %d] Crawled package %s success!", pc.part, pkg)

	nda := gcse.NewDocAction{
		Action:  gcse.NDA_UPDATE,
		DocInfo: packageToDoc(p),
	}
	c[0].Collect(sophie.RawString(pkg), &nda)
	log.Printf("[Part %d] Package %s saved!", pc.part, pkg)

	time.Sleep(10 * time.Second)

	return nil
}