Example #1
0
func fillPackageInfo(p *gcse.Package, pi *stpb.PackageInfo) {
	pi.Package = p.Package
	pi.Name = p.Name
	pi.Synopsis = p.Synopsis
	pi.Description = p.Doc
	pi.Author = gcse.AuthorOfPackage(p.Package)
	pi.ProjectUrl = p.ProjectURL
	pi.Stars = int32(p.StarCount)
	pi.ReadmeFn = p.ReadmeFn
	pi.ReadmeData = p.ReadmeData
	pi.Exported = p.Exported
	pi.References = p.References

	pi.Imports = nil
	for _, imp := range p.Imports {
		if doc.IsValidRemotePath(imp) {
			pi.Imports = append(pi.Imports, imp)
		}
	}
	pi.TestImports = nil
	for _, imp := range p.TestImports {
		if doc.IsValidRemotePath(imp) {
			pi.TestImports = append(pi.TestImports, imp)
		}
	}
}
Example #2
0
func packageToDoc(p *gcse.Package) gcse.DocInfo {
	// copy Package as a DocInfo
	d := gcse.DocInfo{
		Package:     p.Package,
		Name:        p.Name,
		Synopsis:    p.Synopsis,
		Description: p.Doc,
		LastUpdated: time.Now(),
		Author:      gcse.AuthorOfPackage(p.Package),
		ProjectURL:  p.ProjectURL,
		StarCount:   p.StarCount,
		ReadmeFn:    p.ReadmeFn,
		ReadmeData:  p.ReadmeData,
		Exported:    p.Exported,
	}

	d.Imports = nil
	for _, imp := range p.Imports {
		if doc.IsValidRemotePath(imp) {
			d.Imports = append(d.Imports, imp)
		}
	}
	d.TestImports = nil
	for _, imp := range p.TestImports {
		if doc.IsValidRemotePath(imp) {
			d.TestImports = append(d.TestImports, imp)
		}
	}

	// append new authors
	if strings.HasPrefix(d.Package, "github.com/") {
		cDB.AppendPerson("github.com", d.Author)
	} else if strings.HasPrefix(d.Package, "bitbucket.org/") {
		cDB.AppendPerson("bitbucket.org", d.Author)
	}

	for _, imp := range d.Imports {
		appendPackage(imp)
	}
	for _, imp := range d.TestImports {
		appendPackage(imp)
	}
	log.Printf("[pushPackage] References: %v", p.References)
	for _, ref := range p.References {
		appendPackage(ref)
	}

	schedulePackageNextCrawl(d.Package, p.Etag)

	return d
}
Example #3
0
func filterPackages(pkgs []string) (res []string) {
	for _, pkg := range pkgs {
		pkg = gcse.TrimPackageName(pkg)
		if !doc.IsValidRemotePath(pkg) {
			continue
		}
		res = append(res, pkg)
	}
	return
}
Example #4
0
func pushPackage(c appengine.Context, p *gcc.Package) (succ bool) {
	// copy Package as a DocInfo
	d := DocInfo{
		Name:        p.Name,
		Package:     p.ImportPath,
		Synopsis:    p.Synopsis,
		Description: p.Doc,
		LastUpdated: time.Now(),
		Author:      authorOfPackage(p.ImportPath),
		ProjectURL:  p.ProjectURL,
		StarCount:   p.StarCount,
		ReadmeFn:    p.ReadmeFn,
		ReadmeData:  p.ReadmeData,
	}

	d.Imports = nil
	for _, imp := range p.Imports {
		if doc.IsValidRemotePath(imp) {
			d.Imports = append(d.Imports, imp)
		}
	}

	// save DocInfo into fetchedDoc DB
	ddb := NewDocDB(c, kindFetchedDoc)
	err := ddb.Put(d.Package, &d)
	if err != nil {
		c.Errorf("ddb.Put(%s) failed: %v", err)
		return false
	}

	// append new authors
	if strings.HasPrefix(d.Package, "github.com/") {
		appendPerson(c, "github.com", d.Author)
	} else if strings.HasPrefix(d.Package, "bitbucket.org/") {
		appendPerson(c, "bitbucket.org", d.Author)
	}

	for _, imp := range d.Imports {
		appendPackage(c, imp)
	}
	log.Printf("[crawlPackage] References: %v", p.References)
	for _, ref := range p.References {
		appendPackage(c, ref)
	}

	schedulePackage(c, d.Package, time.Now().Add(DefaultPackageAge).Add(
		time.Duration(rand.Int63n(int64(DefaultPackageAge)/10)-
			int64(DefaultPackageAge)/5)))

	return true
}
Example #5
0
// touchPackage forces a package to update if it was not crawled before a
// specific time.
func touchPackage(pkg string, crawledBefore time.Time, pkgUTs map[string]time.Time) {
	pkg = strings.TrimSpace(pkg)
	if !doc.IsValidRemotePath(pkg) {
		//log.Printf("  [touchPackage] Not a valid remote path: %s", pkg)
		return
	}

	ut, ok := pkgUTs[pkg]
	if ok && ut.After(crawledBefore) {
		return
	}

	// set Etag to "" to force updating
	cDB.PushToCrawlPackage(pkg)
}
Example #6
0
// AppendPackage appends a package. If the package did not exist in either
// PackageDB or Docs, schedule it (immediately).
func (cdb *CrawlerDB) AppendPackage(pkg string, inDocs func(pkg string) bool) {
	pkg = TrimPackageName(pkg)
	if !doc.IsValidRemotePath(pkg) {
		return
	}
	var ent CrawlingEntry
	if cdb.PackageDB.Get(pkg, &ent) {
		if ent.ScheduleTime.Before(time.Now()) || inDocs(pkg) {
			return
		}
		// if the docs is missing in Docs, schedule it earlier
		log.Printf("Scheduling a package with missing docs: %v", pkg)
	} else {
		log.Printf("Scheduling new package: %v", pkg)
	}
	cdb.SchedulePackage(pkg, time.Now(), "")
}
Example #7
0
// reschedule if last crawl time is later than crawledBefore
func touchPackage(pkg string, crawledBefore time.Time) bool {
	pkg = strings.TrimSpace(pkg)
	if !doc.IsValidRemotePath(pkg) {
		//log.Printf("  [touchPackage] Not a valid remote path: %s", pkg)
		return false
	}

	var ent gcse.DocInfo
	if docDB.Get(pkg, &ent) {
		if ent.LastUpdated.After(crawledBefore) {
			//log.Printf("  [touchPackage] no need to update: %s", pkg)
			return false
		}
	}

	// set Etag to "" to force updating
	return schedulePackage(pkg, time.Now(), "") == nil
}
Example #8
0
// AppendPackage appends a package. If the package did not exist in either
// PackageDB or Docs, shedulet it (immediately).
func (cdb *CrawlerDB) AppendPackage(pkg string,
	inDocs func(pkg string) bool) {
	pkg = TrimPackageName(pkg)
	if !doc.IsValidRemotePath(pkg) {
		return
	}

	var ent CrawlingEntry
	exists := cdb.PackageDB.Get(pkg, &ent)
	if exists {
		if inDocs(pkg) {
			return
		}
		// if the docs is missing in Docs, still schedule it now
	}

	// if the package doesn't exist in docDB, Etag is discarded
	cdb.SchedulePackage(pkg, time.Now(), "")
}
Example #9
0
// returns true if a new package is appended to the crawling list
func appendPackage(c appengine.Context, pkg string) bool {
	if !doc.IsValidRemotePath(pkg) {
		// log.Printf("  [appendPackage] Not a valid remote path: %s", pkg)
		return false
	}
	ddb := NewCachedDocDB(c, kindCrawlerPackage)

	var ent CrawlingEntry
	err, exists := ddb.Get(pkg, &ent)
	if exists {
		// already scheduled
		log.Printf("  [appendPackage] Package %s was scheduled to %v", pkg, ent.ScheduleTime)
		return false
	}

	if err != nil {
		log.Printf("  [appendPackage] Get(crawler, %s) failed: %v", pkg, err)
		return false
	}

	return schedulePackage(c, pkg, time.Now()) == nil
}
Example #10
0
func appendPackage(pkg string) bool {
	pkg = strings.TrimFunc(strings.TrimSpace(pkg), func(r rune) bool {
		return r > rune(128)
	})
	if !doc.IsValidRemotePath(pkg) {
		//log.Printf("  [appendPackage] Not a valid remote path: %s", pkg)
		return false
	}

	var ent CrawlingEntry
	exists := cPackageDB.Get(pkg, &ent)
	if exists {
		var di gcse.DocInfo
		exists := docDB.Get(pkg, &di)
		if exists {
			// already scheduled
			// log.Printf("  [appendPackage] Package %s was scheduled to %v", pkg, ent.ScheduleTime)
			return false
		}
	}

	// if the package doesn't exist in docDB, Etag is discarded
	return schedulePackage(pkg, time.Now(), "") == nil
}