func CrawlPackage(httpClient *http.Client, pkg string) (p *Package, err error) { pdoc, err := doc.Get(httpClient, pkg, "") if err != nil { return nil, villa.NestErrorf(err, "CrawlPackage(%s)", pkg) } readmeFn, readmeData := "", "" for fn, data := range pdoc.ReadmeFiles { readmeFn, readmeData = fn, string(data) if utf8.ValidString(readmeData) { break } else { readmeFn, readmeData = "", "" } } if pdoc.Doc == "" && pdoc.Synopsis == "" { pdoc.Synopsis = godoc.Synopsis(readmeData) } return &Package{ Name: pdoc.Name, ImportPath: pdoc.ImportPath, Synopsis: pdoc.Synopsis, Doc: pdoc.Doc, ProjectURL: pdoc.ProjectURL, StarCount: pdoc.StarCount, ReadmeFn: readmeFn, ReadmeData: readmeData, Imports: pdoc.Imports, References: pdoc.References, }, nil }
func CrawlPackage(httpClient doc.HttpClient, pkg string, etag string) (p *Package, folders []*sppb.FolderInfo, err error) { defer func() { if perr := recover(); perr != nil { p, folders, err = nil, nil, errorsp.NewWithStacks("Panic when crawling package %s: %v", pkg, perr) } }() var pdoc *doc.Package if strings.HasPrefix(pkg, "thezombie.net") { return nil, folders, ErrInvalidPackage } else if strings.HasPrefix(pkg, "github.com/") { if GithubSpider != nil { pdoc, folders, err = getGithub(pkg) } else { pdoc, err = doc.Get(httpClient, pkg, etag) } } else { pdoc, err = newDocGet(httpClient, pkg, etag) } if err == doc.ErrNotModified { return nil, folders, ErrPackageNotModifed } if err != nil { return nil, folders, errorsp.WithStacks(err) } if pdoc.StarCount < 0 { // if starcount is not fetched, choose fusion of Plusone and // Like Button plus, like := -1, -1 if starCount, err := Plusone(httpClient, pdoc.ProjectURL); err == nil { plus = starCount } if starCount, err := LikeButton(httpClient, pdoc.ProjectURL); err == nil { like = starCount } pdoc.StarCount = fuseStars(plus, like) } readmeFn, readmeData := "", "" for fn, data := range pdoc.ReadmeFiles { readmeFn, readmeData = strings.TrimSpace(fn), strings.TrimSpace(string(data)) if len(readmeData) > 1 && utf8.ValidString(readmeData) { break } else { readmeFn, readmeData = "", "" } } // try find synopsis from readme if pdoc.Doc == "" && pdoc.Synopsis == "" { pdoc.Synopsis = godoc.Synopsis(ReadmeToText(readmeFn, readmeData)) } if len(readmeData) > 100*1024 { readmeData = readmeData[:100*1024] } importsSet := stringsp.NewSet(pdoc.Imports...) importsSet.Delete(pdoc.ImportPath) imports := importsSet.Elements() testImports := stringsp.NewSet(pdoc.TestImports...) testImports.Add(pdoc.XTestImports...) testImports.Delete(imports...) testImports.Delete(pdoc.ImportPath) var exported stringsp.Set for _, f := range pdoc.Funcs { exported.Add(f.Name) } for _, t := range pdoc.Types { exported.Add(t.Name) } return &Package{ Package: pdoc.ImportPath, Name: pdoc.Name, Synopsis: pdoc.Synopsis, Doc: pdoc.Doc, ProjectURL: pdoc.ProjectURL, StarCount: pdoc.StarCount, ReadmeFn: readmeFn, ReadmeData: readmeData, Imports: imports, TestImports: testImports.Elements(), Exported: exported.Elements(), References: pdoc.References, Etag: pdoc.Etag, }, folders, nil }
func CrawlPackage(httpClient *http.Client, pkg string, etag string) (p *Package, err error) { pdoc, err := doc.Get(httpClient, pkg, etag) if err == doc.ErrNotModified { return nil, ErrPackageNotModifed } if err != nil { return nil, villa.NestErrorf(err, "CrawlPackage(%s)", pkg) } if pdoc.StarCount < 0 { // if starcount is not fetched, choose fusion of Plusone and LikeButton plus, like := -1, -1 if starCount, err := Plusone(httpClient, pdoc.ProjectURL); err == nil { plus = starCount } if starCount, err := LikeButton(httpClient, pdoc.ProjectURL); err == nil { like = starCount } pdoc.StarCount = fuseStars(plus, like) } readmeFn, readmeData := "", "" for fn, data := range pdoc.ReadmeFiles { readmeFn, readmeData = strings.TrimSpace(fn), strings.TrimSpace(string(data)) if len(readmeData) > 1 && utf8.ValidString(readmeData) { break } else { readmeFn, readmeData = "", "" } } // try find synopsis from readme if pdoc.Doc == "" && pdoc.Synopsis == "" { pdoc.Synopsis = godoc.Synopsis(ReadmeToText(readmeFn, readmeData)) } if len(readmeData) > 100*1024 { readmeData = readmeData[:100*1024] } imports := villa.NewStrSet(pdoc.Imports...) imports.Put(pdoc.TestImports...) imports.Put(pdoc.XTestImports...) var exported villa.StrSet for _, f := range pdoc.Funcs { exported.Put(f.Name) } for _, t := range pdoc.Types { exported.Put(t.Name) } return &Package{ Package: pdoc.ImportPath, Name: pdoc.Name, Synopsis: pdoc.Synopsis, Doc: pdoc.Doc, ProjectURL: pdoc.ProjectURL, StarCount: pdoc.StarCount, ReadmeFn: readmeFn, ReadmeData: readmeData, Imports: imports.Elements(), Exported: exported.Elements(), References: pdoc.References, Etag: pdoc.Etag, }, nil }