Ejemplo n.º 1
0
// IndexGitRepo indexes the git repository as specified by the options and arguments.
func IndexGitRepo(opts build.Options, branchPrefix string, branches []string, submodules bool) error {
	repo, err := git.OpenRepository(opts.RepoDir)
	if err != nil {
		return err
	}

	url, err := GuessRepoURL(opts.RepoDir)
	if err != nil {
		log.Printf("guessRepoURL(%s): %s", opts.RepoDir, err)
	} else {
		opts.RepoURL = url
	}

	builder, err := build.NewBuilder(opts)
	if err != nil {
		return err
	}

	// name => branch
	allfiles := map[string][]string{}

	var names []string

	// branch => name => sha1
	data := map[string]map[string]git.Oid{}
	repos := map[git.Oid]*git.Repository{}
	for _, b := range branches {
		tree, err := getTree(repo, filepath.Join(branchPrefix, b))
		if err != nil {
			return err
		}

		fs, subRepos, err := treeToFiles(repo, tree, submodules)
		if err != nil {
			return err
		}
		for k, v := range subRepos {
			repos[k] = v
		}

		for f := range fs {
			allfiles[f] = append(allfiles[f], b)
		}
		data[b] = fs
	}

	for n := range allfiles {
		names = append(names, n)
	}
	sort.Strings(names)

	for _, n := range names {
		shas := map[git.Oid][]string{}
		for _, b := range allfiles[n] {
			shas[data[b][n]] = append(shas[data[b][n]], b)
		}

		for sha, branches := range shas {
			r := repos[sha]
			if r == nil {
				return fmt.Errorf("no repo found for %s (%s)", n, branches)
			}
			blob, err := r.LookupBlob(&sha)
			if err != nil {
				return err
			}

			const maxSz = 128 << 10
			if blob.Size() > maxSz {
				continue
			}

			builder.Add(zoekt.Document{
				Name:     n,
				Content:  blob.Contents(),
				Branches: branches,
			})
		}
	}
	builder.Finish()

	return nil
}
Ejemplo n.º 2
0
func main() {
	var sizeMax = flag.Int("file_limit", 128*1024, "maximum file size")
	var shardLimit = flag.Int("shard_limit", 100<<20, "maximum corpus size for a shard")
	var parallelism = flag.Int("parallelism", 4, "maximum number of parallel indexing processes.")
	var recursive = flag.Bool("recursive", false, "recurse into directories to index all git repos")
	submodules := flag.Bool("submodules", true, "if set to false, do not recurse into submodules")
	branchesStr := flag.String("branches", "master", "git branches to index. If set, arguments should be bare git repositories.")
	branchPrefix := flag.String("prefix", "refs/heads/", "prefix for branch names")

	indexDir := flag.String("index", build.DefaultDir, "index directory for *.zoekt files.")
	incremental := flag.Bool("incremental", true, "only index changed repositories")
	flag.Parse()

	opts := build.Options{
		Parallelism: *parallelism,
		SizeMax:     *sizeMax,
		ShardMax:    *shardLimit,
		IndexDir:    *indexDir,
	}
	opts.SetDefaults()

	var branches []string
	if *branchesStr != "" {
		branches = strings.Split(*branchesStr, ",")
	}

	gitRepos := map[string]string{}
	if *recursive {
		for _, arg := range flag.Args() {
			repos, err := gitindex.FindGitRepos(arg)
			if err != nil {
				log.Fatal(err)
			}
			for k, v := range repos {
				gitRepos[k] = v
			}
		}
	} else {
		for _, repoDir := range flag.Args() {
			if _, err := os.Lstat(filepath.Join(repoDir, ".git")); err == nil {
				repoDir = filepath.Join(repoDir, ".git")
			}
			repoDir, err := filepath.Abs(repoDir)
			if err != nil {
				log.Fatal(err)
			}

			name := filepath.Base(repoDir)
			if name == ".git" {
				name = filepath.Base(filepath.Dir(repoDir))
			}
			name = strings.TrimSuffix(name, ".git")

			gitRepos[repoDir] = name
		}
	}
	exitStatus := 0
	for dir, name := range gitRepos {
		opts.RepoName = name
		opts.RepoDir = filepath.Clean(dir)

		if mod, err := gitindex.RepoModTime(opts.RepoDir); *incremental && err == nil && mod.Before(opts.Timestamp()) {
			continue
		}

		log.Printf("indexing %s (%s)", dir, name)
		if err := gitindex.IndexGitRepo(opts, *branchPrefix, branches, *submodules); err != nil {
			log.Printf("indexGitRepo(%s): %v", dir, err)
			exitStatus = 1
		}
	}
	os.Exit(exitStatus)
}