// IndexGitRepo indexes the git repository as specified by the options and arguments. func IndexGitRepo(opts build.Options, branchPrefix string, branches []string, submodules bool) error { repo, err := git.OpenRepository(opts.RepoDir) if err != nil { return err } url, err := GuessRepoURL(opts.RepoDir) if err != nil { log.Printf("guessRepoURL(%s): %s", opts.RepoDir, err) } else { opts.RepoURL = url } builder, err := build.NewBuilder(opts) if err != nil { return err } // name => branch allfiles := map[string][]string{} var names []string // branch => name => sha1 data := map[string]map[string]git.Oid{} repos := map[git.Oid]*git.Repository{} for _, b := range branches { tree, err := getTree(repo, filepath.Join(branchPrefix, b)) if err != nil { return err } fs, subRepos, err := treeToFiles(repo, tree, submodules) if err != nil { return err } for k, v := range subRepos { repos[k] = v } for f := range fs { allfiles[f] = append(allfiles[f], b) } data[b] = fs } for n := range allfiles { names = append(names, n) } sort.Strings(names) for _, n := range names { shas := map[git.Oid][]string{} for _, b := range allfiles[n] { shas[data[b][n]] = append(shas[data[b][n]], b) } for sha, branches := range shas { r := repos[sha] if r == nil { return fmt.Errorf("no repo found for %s (%s)", n, branches) } blob, err := r.LookupBlob(&sha) if err != nil { return err } const maxSz = 128 << 10 if blob.Size() > maxSz { continue } builder.Add(zoekt.Document{ Name: n, Content: blob.Contents(), Branches: branches, }) } } builder.Finish() return nil }
func main() { var sizeMax = flag.Int("file_limit", 128*1024, "maximum file size") var shardLimit = flag.Int("shard_limit", 100<<20, "maximum corpus size for a shard") var parallelism = flag.Int("parallelism", 4, "maximum number of parallel indexing processes.") var recursive = flag.Bool("recursive", false, "recurse into directories to index all git repos") submodules := flag.Bool("submodules", true, "if set to false, do not recurse into submodules") branchesStr := flag.String("branches", "master", "git branches to index. If set, arguments should be bare git repositories.") branchPrefix := flag.String("prefix", "refs/heads/", "prefix for branch names") indexDir := flag.String("index", build.DefaultDir, "index directory for *.zoekt files.") incremental := flag.Bool("incremental", true, "only index changed repositories") flag.Parse() opts := build.Options{ Parallelism: *parallelism, SizeMax: *sizeMax, ShardMax: *shardLimit, IndexDir: *indexDir, } opts.SetDefaults() var branches []string if *branchesStr != "" { branches = strings.Split(*branchesStr, ",") } gitRepos := map[string]string{} if *recursive { for _, arg := range flag.Args() { repos, err := gitindex.FindGitRepos(arg) if err != nil { log.Fatal(err) } for k, v := range repos { gitRepos[k] = v } } } else { for _, repoDir := range flag.Args() { if _, err := os.Lstat(filepath.Join(repoDir, ".git")); err == nil { repoDir = filepath.Join(repoDir, ".git") } repoDir, err := filepath.Abs(repoDir) if err != nil { log.Fatal(err) } name := filepath.Base(repoDir) if name == ".git" { name = filepath.Base(filepath.Dir(repoDir)) } name = strings.TrimSuffix(name, ".git") gitRepos[repoDir] = name } } exitStatus := 0 for dir, name := range gitRepos { opts.RepoName = name opts.RepoDir = filepath.Clean(dir) if mod, err := gitindex.RepoModTime(opts.RepoDir); *incremental && err == nil && mod.Before(opts.Timestamp()) { continue } log.Printf("indexing %s (%s)", dir, name) if err := gitindex.IndexGitRepo(opts, *branchPrefix, branches, *submodules); err != nil { log.Printf("indexGitRepo(%s): %v", dir, err) exitStatus = 1 } } os.Exit(exitStatus) }