Example #1
0
File: crawl.go Project: golang/gddo
func put(pdoc *doc.Package, nextCrawl time.Time) error {
	if pdoc.Status == gosrc.NoRecentCommits &&
		isActivePkg(pdoc.ImportPath, gosrc.NoRecentCommits) {
		pdoc.Status = gosrc.Active
	}
	if err := db.Put(pdoc, nextCrawl, false); err != nil {
		return fmt.Errorf("ERROR db.Put(%q): %v", pdoc.ImportPath, err)
	}
	return nil
}
Example #2
0
File: crawl.go Project: golang/gddo
// crawlDoc fetches the package documentation from the VCS and updates the database.
func crawlDoc(source string, importPath string, pdoc *doc.Package, hasSubdirs bool, nextCrawl time.Time) (*doc.Package, error) {
	message := []interface{}{source}
	defer func() {
		message = append(message, importPath)
		log.Println(message...)
	}()

	if !nextCrawl.IsZero() {
		d := time.Since(nextCrawl) / time.Hour
		if d > 0 {
			message = append(message, "late:", int64(d))
		}
	}

	etag := ""
	if pdoc != nil {
		etag = pdoc.Etag
		message = append(message, "etag:", etag)
	}

	start := time.Now()
	var err error
	if strings.HasPrefix(importPath, "code.google.com/p/go.") {
		// Old import path for Go sub-repository.
		pdoc = nil
		err = gosrc.NotFoundError{Message: "old Go sub-repo", Redirect: "golang.org/x/" + importPath[len("code.google.com/p/go."):]}
	} else if blocked, e := db.IsBlocked(importPath); blocked && e == nil {
		pdoc = nil
		err = gosrc.NotFoundError{Message: "blocked."}
	} else if testdataPat.MatchString(importPath) {
		pdoc = nil
		err = gosrc.NotFoundError{Message: "testdata."}
	} else {
		var pdocNew *doc.Package
		pdocNew, err = doc.Get(httpClient, importPath, etag)
		message = append(message, "fetch:", int64(time.Since(start)/time.Millisecond))
		if err == nil && pdocNew.Name == "" && !hasSubdirs {
			for _, e := range pdocNew.Errors {
				message = append(message, "err:", e)
			}
			pdoc = nil
			err = gosrc.NotFoundError{Message: "no Go files or subdirs"}
		} else if _, ok := err.(gosrc.NotModifiedError); !ok {
			pdoc = pdocNew
		}
	}

	nextCrawl = start.Add(*maxAge)
	switch {
	case strings.HasPrefix(importPath, "github.com/") || (pdoc != nil && len(pdoc.Errors) > 0):
		nextCrawl = start.Add(*maxAge * 7)
	case strings.HasPrefix(importPath, "gist.github.com/"):
		// Don't spend time on gists. It's silly thing to do.
		nextCrawl = start.Add(*maxAge * 30)
	}

	if err == nil {
		message = append(message, "put:", pdoc.Etag)
		if err := put(pdoc, nextCrawl); err != nil {
			log.Println(err)
		}
		return pdoc, nil
	} else if e, ok := err.(gosrc.NotModifiedError); ok {
		if pdoc.Status == gosrc.Active && !isActivePkg(importPath, e.Status) {
			if e.Status == gosrc.NoRecentCommits {
				e.Status = gosrc.Inactive
			}
			message = append(message, "archive", e)
			pdoc.Status = e.Status
			if err := db.Put(pdoc, nextCrawl, false); err != nil {
				log.Printf("ERROR db.Put(%q): %v", importPath, err)
			}
		} else {
			// Touch the package without updating and move on to next one.
			message = append(message, "touch")
			if err := db.SetNextCrawl(importPath, nextCrawl); err != nil {
				log.Printf("ERROR db.SetNextCrawl(%q): %v", importPath, err)
			}
		}
		return pdoc, nil
	} else if e, ok := err.(gosrc.NotFoundError); ok {
		message = append(message, "notfound:", e)
		if err := db.Delete(importPath); err != nil {
			log.Printf("ERROR db.Delete(%q): %v", importPath, err)
		}
		return nil, e
	} else {
		message = append(message, "ERROR:", err)
		return nil, err
	}
}