예제 #1
0
func CrawlPackage(httpClient *http.Client, pkg string) (p *Package, err error) {
	pdoc, err := doc.Get(httpClient, pkg, "")
	if err != nil {
		return nil, villa.NestErrorf(err, "CrawlPackage(%s)", pkg)
	}

	readmeFn, readmeData := "", ""
	for fn, data := range pdoc.ReadmeFiles {
		readmeFn, readmeData = fn, string(data)
		if utf8.ValidString(readmeData) {
			break
		} else {
			readmeFn, readmeData = "", ""
		}
	}

	if pdoc.Doc == "" && pdoc.Synopsis == "" {
		pdoc.Synopsis = godoc.Synopsis(readmeData)
	}

	return &Package{
		Name:       pdoc.Name,
		ImportPath: pdoc.ImportPath,
		Synopsis:   pdoc.Synopsis,
		Doc:        pdoc.Doc,
		ProjectURL: pdoc.ProjectURL,
		StarCount:  pdoc.StarCount,

		ReadmeFn:   readmeFn,
		ReadmeData: readmeData,

		Imports:    pdoc.Imports,
		References: pdoc.References,
	}, nil
}
예제 #2
0
func CrawlPackage(httpClient doc.HttpClient, pkg string, etag string) (p *Package, folders []*sppb.FolderInfo, err error) {
	defer func() {
		if perr := recover(); perr != nil {
			p, folders, err = nil, nil, errorsp.NewWithStacks("Panic when crawling package %s: %v", pkg, perr)
		}
	}()
	var pdoc *doc.Package

	if strings.HasPrefix(pkg, "thezombie.net") {
		return nil, folders, ErrInvalidPackage
	} else if strings.HasPrefix(pkg, "github.com/") {
		if GithubSpider != nil {
			pdoc, folders, err = getGithub(pkg)
		} else {
			pdoc, err = doc.Get(httpClient, pkg, etag)
		}
	} else {
		pdoc, err = newDocGet(httpClient, pkg, etag)
	}
	if err == doc.ErrNotModified {
		return nil, folders, ErrPackageNotModifed
	}
	if err != nil {
		return nil, folders, errorsp.WithStacks(err)
	}
	if pdoc.StarCount < 0 {
		// if starcount is not fetched, choose fusion of Plusone and
		// Like Button
		plus, like := -1, -1
		if starCount, err := Plusone(httpClient, pdoc.ProjectURL); err == nil {
			plus = starCount
		}
		if starCount, err := LikeButton(httpClient, pdoc.ProjectURL); err == nil {
			like = starCount
		}
		pdoc.StarCount = fuseStars(plus, like)
	}
	readmeFn, readmeData := "", ""
	for fn, data := range pdoc.ReadmeFiles {
		readmeFn, readmeData = strings.TrimSpace(fn),
			strings.TrimSpace(string(data))
		if len(readmeData) > 1 && utf8.ValidString(readmeData) {
			break
		} else {
			readmeFn, readmeData = "", ""
		}
	}
	// try find synopsis from readme
	if pdoc.Doc == "" && pdoc.Synopsis == "" {
		pdoc.Synopsis = godoc.Synopsis(ReadmeToText(readmeFn, readmeData))
	}
	if len(readmeData) > 100*1024 {
		readmeData = readmeData[:100*1024]
	}
	importsSet := stringsp.NewSet(pdoc.Imports...)
	importsSet.Delete(pdoc.ImportPath)
	imports := importsSet.Elements()
	testImports := stringsp.NewSet(pdoc.TestImports...)
	testImports.Add(pdoc.XTestImports...)
	testImports.Delete(imports...)
	testImports.Delete(pdoc.ImportPath)

	var exported stringsp.Set
	for _, f := range pdoc.Funcs {
		exported.Add(f.Name)
	}
	for _, t := range pdoc.Types {
		exported.Add(t.Name)
	}
	return &Package{
		Package:    pdoc.ImportPath,
		Name:       pdoc.Name,
		Synopsis:   pdoc.Synopsis,
		Doc:        pdoc.Doc,
		ProjectURL: pdoc.ProjectURL,
		StarCount:  pdoc.StarCount,

		ReadmeFn:   readmeFn,
		ReadmeData: readmeData,

		Imports:     imports,
		TestImports: testImports.Elements(),
		Exported:    exported.Elements(),

		References: pdoc.References,
		Etag:       pdoc.Etag,
	}, folders, nil
}
예제 #3
0
파일: crawler.go 프로젝트: pombredanne/gcse
func CrawlPackage(httpClient *http.Client, pkg string, etag string) (p *Package, err error) {
	pdoc, err := doc.Get(httpClient, pkg, etag)
	if err == doc.ErrNotModified {
		return nil, ErrPackageNotModifed
	}
	if err != nil {
		return nil, villa.NestErrorf(err, "CrawlPackage(%s)", pkg)
	}

	if pdoc.StarCount < 0 {
		// if starcount is not fetched, choose fusion of Plusone and LikeButton
		plus, like := -1, -1
		if starCount, err := Plusone(httpClient, pdoc.ProjectURL); err == nil {
			plus = starCount
		}
		if starCount, err := LikeButton(httpClient, pdoc.ProjectURL); err == nil {
			like = starCount
		}
		pdoc.StarCount = fuseStars(plus, like)
	}

	readmeFn, readmeData := "", ""
	for fn, data := range pdoc.ReadmeFiles {
		readmeFn, readmeData = strings.TrimSpace(fn), strings.TrimSpace(string(data))
		if len(readmeData) > 1 && utf8.ValidString(readmeData) {
			break
		} else {
			readmeFn, readmeData = "", ""
		}
	}

	// try find synopsis from readme
	if pdoc.Doc == "" && pdoc.Synopsis == "" {
		pdoc.Synopsis = godoc.Synopsis(ReadmeToText(readmeFn, readmeData))
	}

	if len(readmeData) > 100*1024 {
		readmeData = readmeData[:100*1024]
	}

	imports := villa.NewStrSet(pdoc.Imports...)
	imports.Put(pdoc.TestImports...)
	imports.Put(pdoc.XTestImports...)

	var exported villa.StrSet
	for _, f := range pdoc.Funcs {
		exported.Put(f.Name)
	}
	for _, t := range pdoc.Types {
		exported.Put(t.Name)
	}

	return &Package{
		Package:    pdoc.ImportPath,
		Name:       pdoc.Name,
		Synopsis:   pdoc.Synopsis,
		Doc:        pdoc.Doc,
		ProjectURL: pdoc.ProjectURL,
		StarCount:  pdoc.StarCount,
		ReadmeFn:   readmeFn,
		ReadmeData: readmeData,
		Imports:    imports.Elements(),
		Exported:   exported.Elements(),

		References: pdoc.References,
		Etag:       pdoc.Etag,
	}, nil
}