// Reindex gets all the packages in database and put them into the search index. // This will update the search index with the path, synopsis, score, import counts // of all the packages in the database. func (db *Database) Reindex(ctx context.Context) error { c := db.Pool.Get() defer c.Close() idx, err := search.Open("packages") if err != nil { return fmt.Errorf("database: failed to open packages: %v", err) } npkgs := 0 for { // Get 200 packages from the nextCrawl set each time. Use npkgs as a cursor // to store the current position we actually indexed. Retry from the cursor // position if we received a timeout error from app engine. values, err := redis.Values(c.Do( "SORT", "nextCrawl", "LIMIT", strconv.Itoa(npkgs), "200", "GET", "pkg:*->path", "GET", "pkg:*->synopsis", "GET", "pkg:*->score", )) if err != nil { return err } if len(values) == 0 { break // all done } // The Search API should support put in batches of up to 200 documents, // the Go version of this API does not support this yet. // TODO(shantuo): Put packages in batch operations. for ; len(values) > 0; npkgs++ { var pdoc doc.Package var score float64 values, err = redis.Scan(values, &pdoc.ImportPath, &pdoc.Synopsis, &score) if err != nil { return err } // There are some corrupted data in our current database // that causes an error when putting the package into the // search index which only supports UTF8 encoding. if !utf8.ValidString(pdoc.Synopsis) { pdoc.Synopsis = "" } id, n, err := pkgIDAndImportCount(c, pdoc.ImportPath) if err != nil { return err } if _, err := idx.Put(ctx, id, &Package{ Path: pdoc.ImportPath, Synopsis: pdoc.Synopsis, Score: score, ImportCount: n, }); err != nil { if appengine.IsTimeoutError(err) { log.Printf("App Engine timeout: %v. Continue...", err) break } return fmt.Errorf("Failed to put index %s: %v", id, err) } } } log.Printf("%d packages are reindexed", npkgs) return nil }