// RemoveAll removes a tree recursively. func RemoveAll(path string, vfs rwvfs.WalkableFileSystem) error { w := fs.WalkFS(path, vfs) remove := func(par *parallel.Run, path string) { par.Do(func() error { return vfs.Remove(path) }) } var dirs []string // remove dirs after removing all files filesPar := parallel.NewRun(20) for w.Step() { if err := w.Err(); err != nil { return err } if w.Stat().IsDir() { dirs = append(dirs, w.Path()) } else { remove(filesPar, w.Path()) } } if err := filesPar.Wait(); err != nil { return err } dirsPar := parallel.NewRun(20) sort.Sort(sort.Reverse(sort.StringSlice(dirs))) // reverse so we delete leaf dirs first for _, dir := range dirs { remove(dirsPar, dir) } return dirsPar.Wait() }
func (s unitStores) Defs(fs ...DefFilter) ([]*graph.Def, error) { uss, err := openUnitStores(s.opener, fs) if err != nil { return nil, err } var ( allDefs []*graph.Def allDefsMu sync.Mutex ) par := parallel.NewRun(storeFetchPar) for u_, us_ := range uss { u, us := u_, us_ if us == nil { continue } par.Do(func() error { defs, err := us.Defs(filtersForUnit(u, fs).([]DefFilter)...) if err != nil && !isStoreNotExist(err) { return err } for _, def := range defs { def.UnitType = u.Type def.Unit = u.Name } allDefsMu.Lock() allDefs = append(allDefs, defs...) allDefsMu.Unlock() return nil }) } err = par.Wait() return allDefs, err }
func (s repoStores) Defs(f ...DefFilter) ([]*graph.Def, error) { rss, err := openRepoStores(s.opener, f) if err != nil { return nil, err } var ( allDefs []*graph.Def allDefsMu sync.Mutex ) par := parallel.NewRun(storeFetchPar) for repo_, rs_ := range rss { repo, rs := repo_, rs_ if rs == nil { continue } par.Do(func() error { defs, err := rs.Defs(filtersForRepo(repo, f).([]DefFilter)...) if err != nil && !isStoreNotExist(err) { return err } for _, def := range defs { def.Repo = repo } allDefsMu.Lock() allDefs = append(allDefs, defs...) allDefsMu.Unlock() return nil }) } err = par.Wait() return allDefs, err }
func OpenRepo(dir string) (*Repo, error) { if fi, err := os.Stat(dir); err != nil || !fi.Mode().IsDir() { return nil, fmt.Errorf("not a directory: %q", dir) } rc := new(Repo) // VCS and root directory var err error rc.RootDir, rc.VCSType, err = getRootDir(dir) if err != nil { return nil, fmt.Errorf("detecting git/hg repository in %s: %s", dir, err) } if rc.RootDir == "" { return nil, fmt.Errorf("no git/hg repository found in or above %s", dir) } par := parallel.NewRun(4) par.Do(func() error { // Current commit ID var err error rc.CommitID, err = resolveWorkingTreeRevision(rc.VCSType, rc.RootDir) return err }) par.Do(func() error { // Get repo URI from clone URL. rc.CloneURL = getVCSCloneURL(rc.VCSType, rc.RootDir) return nil }) return rc, par.Wait() }
func (s repoStores) Units(f ...UnitFilter) ([]*unit.SourceUnit, error) { rss, err := openRepoStores(s.opener, f) if err != nil { return nil, err } var ( allUnits []*unit.SourceUnit allUnitsMu sync.Mutex ) par := parallel.NewRun(storeFetchPar) for repo_, rs_ := range rss { repo, rs := repo_, rs_ if rs == nil { continue } par.Do(func() error { units, err := rs.Units(filtersForRepo(repo, f).([]UnitFilter)...) if err != nil && !isStoreNotExist(err) { return err } for _, unit := range units { unit.Repo = repo } allUnitsMu.Lock() allUnits = append(allUnits, units...) allUnitsMu.Unlock() return nil }) } err = par.Wait() return allUnits, err }
// refsAtOffsets reads the refs at the given serialized byte offsets // from the ref data file and returns them in arbitrary order. func (s *fsUnitStore) refsAtOffsets(ofs byteOffsets, fs []RefFilter) (refs []*graph.Ref, err error) { vlog.Printf("%s: reading refs at %d offsets with filters %v...", s, len(ofs), fs) f, err := openFetcherOrOpen(s.fs, unitRefsFilename) if err != nil { return nil, err } defer func() { err2 := f.Close() if err == nil { err = err2 } }() ffs := refFilters(fs) p := parFetches(s.fs, fs) if p == 0 { return nil, nil } var refsLock sync.Mutex par := parallel.NewRun(p) for _, ofs_ := range ofs { ofs := ofs_ par.Do(func() error { if _, moreOK := LimitRemaining(fs); !moreOK { return nil } // Guess how many bytes this ref is. The s3vfs (if that's the // VFS impl in use) will autofetch beyond that if needed. const byteEstimate = decodeBufSize r, err := rangeReader(s.fs, unitRefsFilename, f, ofs, byteEstimate) if err != nil { return err } dec := Codec.NewDecoder(r) var ref graph.Ref if _, err := dec.Decode(&ref); err != nil { return err } if ffs.SelectRef(&ref) { refsLock.Lock() refs = append(refs, &ref) refsLock.Unlock() } return nil }) } if err := par.Wait(); err != nil { return refs, err } sort.Sort(refsByFileStartEnd(refs)) vlog.Printf("%s: read %v refs at %d offsets with filters %v.", s, len(refs), len(ofs), fs) return refs, nil }
func (c *BuildDataFetchCmd) Execute(args []string) error { localFS, localRepoLabel, err := c.getLocalFileSystem() if err != nil { return err } remoteFS, remoteRepoLabel, repoRevSpec, err := c.getRemoteFileSystem() if err != nil { return err } // Use uncached API client because the .srclib-cache already // caches it, and we want to be able to stream large files. // // TODO(sqs): this uncached client isn't authed because it doesn't // have the other API client's http.Client or http.RoundTripper cl := newAPIClientWithAuth(false) remoteFS, err = cl.BuildData.FileSystem(repoRevSpec) if err != nil { return err } if GlobalOpt.Verbose { log.Printf("Fetching remote build files for %s to %s...", remoteRepoLabel, localRepoLabel) } // TODO(sqs): check if file exists in local cache and don't fetch it if it does and if it is identical par := parallel.NewRun(8) w := fs.WalkFS(".", rwvfs.Walkable(remoteFS)) for w.Step() { path := w.Path() if err := w.Err(); err != nil { if path == "." { log.Printf("# No build data to pull from %s", remoteRepoLabel) return nil } return fmt.Errorf("walking remote dir tree: %s", err) } fi := w.Stat() if fi == nil { continue } if !fi.Mode().IsRegular() { continue } par.Do(func() error { return fetchFile(remoteFS, localFS, path, fi, c.DryRun) }) } if err := par.Wait(); err != nil { return fmt.Errorf("error fetching: %s", err) } return nil }
// BuildIndexes builds all indexes on store and its lower-level stores // that match the specified criteria. It returns the status of each // index that was built (or rebuilt). func BuildIndexes(store interface{}, c IndexCriteria, indexChan chan<- IndexStatus) ([]IndexStatus, error) { var built []IndexStatus var builtMu sync.Mutex indexChan2 := make(chan IndexStatus) done := make(chan struct{}) go func() { var par *parallel.Run lastDependsOnChildren := false for sx := range indexChan2 { doBuild := func(sx IndexStatus) { start := time.Now() err := sx.store.BuildIndex(sx.Name, sx.index) sx.BuildDuration = time.Since(start) if err == nil { sx.Stale = false } else { sx.BuildError = err.Error() } builtMu.Lock() built = append(built, sx) builtMu.Unlock() if indexChan != nil { indexChan <- sx } } // Run indexes in parallel, but if we // encounter an index that depends on children, wait for // all previously seen indexes to finish before building // those indexes. if sx.DependsOnChildren != lastDependsOnChildren && par != nil { par.Wait() par = nil } if par == nil { par = parallel.NewRun(MaxIndexParallel) } sx_ := sx par.Do(func() error { doBuild(sx_); return nil }) lastDependsOnChildren = sx.DependsOnChildren } if par != nil { par.Wait() } done <- struct{}{} }() err := listIndexes(store, c, indexChan2, nil) close(indexChan2) <-done return built, err }
func (s unitStores) Refs(f ...RefFilter) ([]*graph.Ref, error) { uss, err := openUnitStores(s.opener, f) if err != nil { return nil, err } c_unitStores_Refs_last_numUnitsQueried = 0 var ( allRefsMu sync.Mutex allRefs []*graph.Ref ) par := parallel.NewRun(storeFetchPar) for u, us := range uss { if us == nil { continue } u, us := u, us c_unitStores_Refs_last_numUnitsQueried++ par.Do(func() error { if _, moreOK := LimitRemaining(f); !moreOK { return nil } fCopy := filtersForUnit(u, f).([]RefFilter) fCopy = withImpliedUnit(fCopy, u) refs, err := us.Refs(fCopy...) if err != nil && !isStoreNotExist(err) { return err } for _, ref := range refs { ref.UnitType = u.Type ref.Unit = u.Name if ref.DefUnitType == "" { ref.DefUnitType = u.Type } if ref.DefUnit == "" { ref.DefUnit = u.Name } } allRefsMu.Lock() allRefs = append(allRefs, refs...) allRefsMu.Unlock() return nil }) } err = par.Wait() return allRefs, err }
// ReadCached reads a Tree's configuration from all of its source unit // definition files (which may either be in a local VFS rooted at a // .srclib-cache/<COMMITID> dir, or a remote VFS). It does not read // the Srcfile; the Srcfile's directives are already accounted for in // the cached source unit definition files. // // bdfs should be a VFS obtained from a call to // (buildstore.RepoBuildStore).Commit. func ReadCached(bdfs vfs.FileSystem) (*Tree, error) { if _, err := bdfs.Lstat("."); os.IsNotExist(err) { return nil, fmt.Errorf("build cache dir does not exist (did you run `srclib config` to create it)?") } else if err != nil { return nil, err } // Collect all **/*.unit.json files. var unitFiles []string unitSuffix := buildstore.DataTypeSuffix(unit.SourceUnit{}) w := fs.WalkFS(".", rwvfs.Walkable(rwvfs.ReadOnly(bdfs))) for w.Step() { if err := w.Err(); err != nil { return nil, err } if path := w.Path(); strings.HasSuffix(path, unitSuffix) { unitFiles = append(unitFiles, path) } } // Parse units sort.Strings(unitFiles) units := make([]*unit.SourceUnit, len(unitFiles)) par := parallel.NewRun(runtime.GOMAXPROCS(0)) for i_, unitFile_ := range unitFiles { i, unitFile := i_, unitFile_ par.Do(func() error { f, err := bdfs.Open(unitFile) if err != nil { return err } if err := json.NewDecoder(f).Decode(&units[i]); err != nil { f.Close() return err } if err := f.Close(); err != nil { return err } return nil }) } if err := par.Wait(); err != nil { return nil, err } return &Tree{SourceUnits: units}, nil }
func brokenRefsOnly(refs []*graph.Ref, s interface{}) ([]*graph.Ref, error) { uniqRefDefs := map[graph.DefKey][]*graph.Ref{} loggedDefRepos := map[string]struct{}{} for _, ref := range refs { if ref.Repo != ref.DefRepo { if _, logged := loggedDefRepos[ref.DefRepo]; !logged { // TODO(sqs): need to skip these because we don't know the // "DefCommitID" in the def's repo, and ByDefKey requires // the key to have a CommitID. log.Printf("WARNING: Can't check resolution of cross-repo ref (ref.Repo=%q != ref.DefRepo=%q) - cross-repo ref checking is not yet implemented. (This log message will not be repeated.)", ref.Repo, ref.DefRepo) loggedDefRepos[ref.DefRepo] = struct{}{} } continue } def := ref.DefKey() def.CommitID = ref.CommitID uniqRefDefs[def] = append(uniqRefDefs[def], ref) } var ( brokenRefs []*graph.Ref brokenRefMu sync.Mutex par = parallel.NewRun(runtime.GOMAXPROCS(0)) ) for def_, refs_ := range uniqRefDefs { def, refs := def_, refs_ par.Do(func() error { defs, err := s.(store.RepoStore).Defs(store.ByDefKey(def)) if err != nil { return err } if len(defs) == 0 { brokenRefMu.Lock() brokenRefs = append(brokenRefs, refs...) brokenRefMu.Unlock() } return nil }) } err := par.Wait() sort.Sort(graph.Refs(brokenRefs)) return brokenRefs, err }
func TestParallelMaxPar(t *testing.T) { const ( totalDo = 10 maxPar = 3 ) var mu sync.Mutex max := 0 n := 0 tot := 0 r := parallel.NewRun(maxPar) for i := 0; i < totalDo; i++ { r.Do(func() error { mu.Lock() tot++ n++ if n > max { max = n } mu.Unlock() time.Sleep(0.1e9) mu.Lock() n-- mu.Unlock() return nil }) } err := r.Wait() if n != 0 { t.Errorf("%d functions still running", n) } if tot != totalDo { t.Errorf("all functions not executed; want %d got %d", totalDo, tot) } if err != nil { t.Errorf("wrong error; want nil got %v", err) } if max != maxPar { t.Errorf("wrong number of do's ran at once; want %d got %d", maxPar, max) } }
func OpenRepo(dir string) (*Repo, error) { if fi, err := os.Stat(dir); err != nil || !fi.Mode().IsDir() { return nil, fmt.Errorf("not a directory: %q", dir) } rc := new(Repo) // VCS and root directory var err error rc.RootDir, rc.VCSType, err = getRootDir(dir) if err != nil || rc.RootDir == "" { log.Printf("Failed to detect git/hg repository root dir for %q; continuing.", dir) // Be permissive and return a repo even if there is no git/hg repository. wd, err := os.Getwd() if err != nil { return nil, err } rc.RootDir = wd // TODO: Ensure that builds without commit ids are successful. rc.CommitID = "ffffffffffffffffffffffffffffffffffffffff" rc.VCSType = "" rc.CloneURL = "" return rc, nil } par := parallel.NewRun(4) par.Do(func() error { // Current commit ID var err error rc.CommitID, err = resolveWorkingTreeRevision(rc.VCSType, rc.RootDir) return err }) par.Do(func() error { // Get repo URI from clone URL. rc.CloneURL = getVCSCloneURL(rc.VCSType, rc.RootDir) return nil }) return rc, par.Wait() }
func TestParallelError(t *testing.T) { const ( totalDo = 10 errDo = 5 ) r := parallel.NewRun(6) for i := 0; i < totalDo; i++ { i := i if i >= errDo { r.Do(func() error { return intError(i) }) } else { r.Do(func() error { return nil }) } } err := r.Wait() if err == nil { t.Fatalf("expected error, got none") } errs := err.(parallel.Errors) if len(errs) != totalDo-errDo { t.Fatalf("wrong error count; want %d got %d", len(errs), totalDo-errDo) } ints := make([]int, len(errs)) for i, err := range errs { ints[i] = int(err.(intError)) } sort.Ints(ints) for i, n := range ints { if n != i+errDo { t.Errorf("unexpected error value; want %d got %d", i+errDo, n) } } }
func (c *BuildDataUploadCmd) Execute(args []string) error { localFS, localRepoLabel, err := c.getLocalFileSystem() if err != nil { return err } remoteFS, remoteRepoLabel, _, err := c.getRemoteFileSystem() if err != nil { return err } if GlobalOpt.Verbose { log.Printf("Uploading build files from %s to %s...", localRepoLabel, remoteRepoLabel) } // TODO(sqs): check if file exists remotely and don't upload it if it does and if it is identical par := parallel.NewRun(8) w := fs.WalkFS(".", rwvfs.Walkable(localFS)) for w.Step() { if err := w.Err(); err != nil { return err } fi := w.Stat() if fi == nil { continue } if !fi.Mode().IsRegular() { continue } path := w.Path() par.Do(func() error { return uploadFile(localFS, remoteFS, path, fi, c.DryRun) }) } return par.Wait() }
// ScanMulti runs multiple scanner tools in parallel. It passes command-line // options from opt to each one, and it sends the JSON representation of cfg // (the repo/tree's Config) to each tool's stdin. func ScanMulti(scanners []toolchain.Tool, opt Options, treeConfig map[string]interface{}) ([]*unit.SourceUnit, error) { if treeConfig == nil { treeConfig = map[string]interface{}{} } var ( units []*unit.SourceUnit mu sync.Mutex ) run := parallel.NewRun(runtime.GOMAXPROCS(0)) for _, scanner_ := range scanners { scanner := scanner_ run.Do(func() error { units2, err := Scan(scanner, opt, treeConfig) if err != nil { cmd, newErr := scanner.Command() if newErr != nil { return fmt.Errorf("cmd error: %s", newErr) } return fmt.Errorf("scanner %v: %s", cmd.Args, err) } mu.Lock() defer mu.Unlock() units = append(units, units2...) return nil }) } err := run.Wait() // Return error only if none of the commands succeeded. if len(units) == 0 { return nil, err } return units, nil }
// Import calls to the underlying fsUnitStore to write the def // and ref data files. It also builds and writes the indexes. func (s *indexedUnitStore) Import(data graph.Output) error { cleanForImport(&data, "", "", "") var defOfs, refOfs byteOffsets var refFBRs fileByteRanges par := parallel.NewRun(2) par.Do(func() (err error) { defOfs, err = s.fsUnitStore.writeDefs(data.Defs) return err }) par.Do(func() (err error) { refFBRs, refOfs, err = s.fsUnitStore.writeRefs(data.Refs) return err }) if err := par.Wait(); err != nil { return err } if err := s.buildIndexes(s.Indexes(), &data, defOfs, refFBRs, refOfs); err != nil { return err } return nil }
// refsAtByteRanges reads the refs at the given serialized byte ranges // from the ref data file and returns them in arbitrary order. func (s *fsUnitStore) refsAtByteRanges(brs []byteRanges, fs []RefFilter) (refs []*graph.Ref, err error) { vlog.Printf("%s: reading refs at %d byte ranges with filters %v...", s, len(brs), fs) f, err := openFetcherOrOpen(s.fs, unitRefsFilename) if err != nil { return nil, err } defer func() { err2 := f.Close() if err == nil { err = err2 } }() ffs := refFilters(fs) p := parFetches(s.fs, fs) if p == 0 { return nil, nil } // See how many bytes we need to read to get the refs in all // byteRanges. readLengths := make([]int64, len(brs)) totalRefs := 0 for i, br := range brs { var n int64 for _, b := range br[1:] { n += b totalRefs++ } readLengths[i] = n } var refsLock sync.Mutex par := parallel.NewRun(p) for i_, br_ := range brs { i, br := i_, br_ par.Do(func() error { if _, moreOK := LimitRemaining(fs); !moreOK { return nil } r, err := rangeReader(s.fs, unitRefsFilename, f, br.start(), readLengths[i]) if err != nil { return err } dec := Codec.NewDecoder(r) for range br[1:] { var ref graph.Ref if _, err := dec.Decode(&ref); err != nil { return err } if ffs.SelectRef(&ref) { refsLock.Lock() refs = append(refs, &ref) refsLock.Unlock() } } return nil }) } if err := par.Wait(); err != nil { return refs, err } sort.Sort(refsByFileStartEnd(refs)) vlog.Printf("%s: read %d refs at %d byte ranges with filters %v.", s, len(refs), len(brs), fs) return refs, nil }
// Import imports build data into a RepoStore or MultiRepoStore. func Import(buildDataFS vfs.FileSystem, stor interface{}, opt ImportOpt) error { // Traverse the build data directory for this repo and commit to // create the makefile that lists the targets (which are the data // files we will import). treeConfig, err := config.ReadCached(buildDataFS) if err != nil { return fmt.Errorf("error calling config.ReadCached: %s", err) } mf, err := plan.CreateMakefile(".", nil, "", treeConfig, plan.Options{NoCache: true}) if err != nil { return fmt.Errorf("error calling plan.Makefile: %s", err) } var ( mu sync.Mutex hasIndexableData bool ) par := parallel.NewRun(10) for _, rule_ := range mf.Rules { rule := rule_ if opt.Unit != "" || opt.UnitType != "" { type ruleForSourceUnit interface { SourceUnit() *unit.SourceUnit } if rule, ok := rule.(ruleForSourceUnit); ok { u := rule.SourceUnit() if (opt.Unit != "" && u.Name != opt.Unit) || (opt.UnitType != "" && u.Type != opt.UnitType) { continue } } else { // Skip all non-source-unit rules if --unit or // --unit-type are specified. continue } } par.Do(func() error { switch rule := rule.(type) { case *grapher.GraphUnitRule: var data graph.Output if err := readJSONFileFS(buildDataFS, rule.Target(), &data); err != nil { if os.IsNotExist(err) { log.Printf("Warning: no build data for unit %s %s.", rule.Unit.Type, rule.Unit.Name) return nil } return fmt.Errorf("error reading JSON file %s for unit %s %s: %s", rule.Target(), rule.Unit.Type, rule.Unit.Name, err) } if opt.DryRun || GlobalOpt.Verbose { log.Printf("# Importing graph data (%d defs, %d refs, %d docs, %d anns) for unit %s %s", len(data.Defs), len(data.Refs), len(data.Docs), len(data.Anns), rule.Unit.Type, rule.Unit.Name) if opt.DryRun { return nil } } // HACK: Transfer docs to [def].Docs. docsByPath := make(map[string]*graph.Doc, len(data.Docs)) for _, doc := range data.Docs { docsByPath[doc.Path] = doc } for _, def := range data.Defs { if doc, present := docsByPath[def.Path]; present { def.Docs = append(def.Docs, &graph.DefDoc{Format: doc.Format, Data: doc.Data}) } } switch imp := stor.(type) { case store.RepoImporter: if err := imp.Import(opt.CommitID, rule.Unit, data); err != nil { return fmt.Errorf("error running store.RepoImporter.Import: %s", err) } case store.MultiRepoImporter: if err := imp.Import(opt.Repo, opt.CommitID, rule.Unit, data); err != nil { return fmt.Errorf("error running store.MultiRepoImporter.Import: %s", err) } default: return fmt.Errorf("store (type %T) does not implement importing", stor) } mu.Lock() hasIndexableData = true mu.Unlock() } return nil }) } if err := par.Wait(); err != nil { return err } if hasIndexableData && !opt.NoIndex { if GlobalOpt.Verbose { log.Printf("# Building indexes") } switch s := stor.(type) { case store.RepoIndexer: if err := s.Index(opt.CommitID); err != nil { return fmt.Errorf("Error indexing commit %s: %s", opt.CommitID, err) } case store.MultiRepoIndexer: if err := s.Index(opt.Repo, opt.CommitID); err != nil { return fmt.Errorf("error indexing %s@%s: %s", opt.Repo, opt.CommitID, err) } } } return nil }
func (c *SimpleRepoCmd) Execute(args []string) error { if err := c.validate(); err != nil { return err } if err := removeGlob(".srclib-*"); err != nil { return err } units := make([]*unit.SourceUnit, 0) unitNames := hierarchicalNames("u", "unit", "", c.NUnits) for _, unitName := range unitNames { units = append(units, &unit.SourceUnit{ Name: fmt.Sprintf(unitName), Type: "GoPackage", Repo: c.Repo, CommitID: c.CommitID, Files: []string{}, Dir: unitName, }) } if c.GenSource { if err := resetSource(); err != nil { return err } // generate source files par := parallel.NewRun(runtime.GOMAXPROCS(0)) for _, ut_ := range units { ut := ut_ par.Do(func() error { return c.genUnit(ut) }) } if err := par.Wait(); err != nil { return err } // get commit ID commitID, err := getGitCommitID() if err != nil { return err } // update command to generate graph data c.CommitID = commitID c.GenSource = false } // generate graph data par := parallel.NewRun(runtime.GOMAXPROCS(0)) for _, ut_ := range units { ut := ut_ ut.CommitID = c.CommitID par.Do(func() error { return c.genUnit(ut) }) } if err := par.Wait(); err != nil { return err } return nil }
func TestIntegration(t *testing.T) { if testing.Short() { t.Skip("long-running integration test") } repoInfos := getUserRepos(t, *repoOwner) type nodeInfo struct { cmd *exec.Cmd baseURL string } nodes := map[string]*nodeInfo{} repos := map[repoInfo]vcs.Repository{} var reposMu sync.Mutex if *etcdDebugLog { etcd_client.SetLogger(log.New(os.Stderr, "etcd: ", 0)) } withEtcd(t, func(etcdConfig *config.Config, ec *etcd_client.Client) { defer func() { if *waitBeforeExit { log.Printf("\n\nTest run ended. Ctrl-C to exit.") select {} } }() b := datad.NewEtcdBackend("/datad/vcs", ec) cc := NewClient(datad.NewClient(b), nil) if err := exec.Command("go", "install", "sourcegraph.com/sourcegraph/vcsstore/cmd/vcsstore").Run(); err != nil { t.Fatal(err) } killNode := func(name string, ni *nodeInfo) { if ni != nil && ni.cmd != nil { ni.cmd.Process.Kill() ni.cmd = nil delete(nodes, name) } } // Start the nodes and vcsstore servers. for i := 0; i < *numNodes; i++ { n := 6000 + i nodeName := fmt.Sprintf("127.0.0.1:%d", n) storageDir := fmt.Sprintf("/tmp/test-vcsstore%d", n) cmd := exec.Command("vcsstore", "-v", "-etcd="+etcdConfig.Addr, "-s="+storageDir, "serve", "-datad", "-d", fmt.Sprintf("-http=:%d", n), "-datad-node-name="+nodeName) nodes[nodeName] = &nodeInfo{cmd: cmd, baseURL: "http://" + nodeName} cmd.Stdout, cmd.Stderr = os.Stderr, os.Stderr err := cmd.Start() if err != nil { t.Fatalf("error starting %v: %s", cmd.Args, err) } log.Printf("Launched node %s with storage dir %s (%v).", nodeName, storageDir, cmd.Args) defer func() { killNode(nodeName, nodes[nodeName]) }() } // Wait for servers. time.Sleep(400 * time.Millisecond) // Clone the repositories. cloneStart := time.Now() var wg sync.WaitGroup for _, ri_ := range repoInfos { ri := ri_ wg.Add(1) go func() { defer wg.Done() log.Printf("cloning %v...", ri) repo, err := cc.Repository(ri.vcsType, mustParseURL(ri.cloneURL)) if err != nil { t.Errorf("clone %v failed: %s", ri, err) return } err = repo.(vcsclient.RepositoryCloneUpdater).CloneOrUpdate(vcs.RemoteOpts{}) if err != nil { t.Errorf("remote clone %v failed: %s", ri, err) return } reposMu.Lock() defer reposMu.Unlock() repos[ri] = repo }() } wg.Wait() t.Logf("Cloned %d repositories in %s.", len(repoInfos), time.Since(cloneStart)) performRepoOps := func() error { par := parallel.NewRun(1) // keep at 1, libgit2 has concurrency segfaults :( // Perform some operations on the repos. for ri_, repo_ := range repos { par.Do(func() error { ri, repo := ri_, repo_ commitID, err := repo.ResolveBranch("master") if err != nil { return fmt.Errorf("repo %v: resolve branch 'master' failed: %s", ri, err) } commits, _, err := repo.Commits(vcs.CommitsOptions{Head: commitID}) if err != nil { return fmt.Errorf("repo %v: commit log of 'master' failed: %s", ri, err) } if len(commits) == 0 { return fmt.Errorf("repo %v: commit log has 0 entries", ri) } fs, err := repo.FileSystem(commitID) if err != nil { return fmt.Errorf("repo %v: filesystem at 'master' failed: %s", ri, err) } entries, err := fs.ReadDir("/") if err != nil { return fmt.Errorf("repo %v: readdir '/' at 'master' failed: %s", ri, err) } _ = entries return nil }) } return par.Wait() } opsStart := time.Now() err := performRepoOps() if err != nil { t.Fatalf("before killing any nodes, got error in repo ops: %s", err) } log.Printf("\n\n\nPerformed various operations on %d repositories in %s.\n\n\n", len(repos), time.Since(opsStart)) if *numNodes <= 1 { t.Fatal("can't test cluster resilience with only 1 node") } // Kill half of all nodes to test resilience. for i := 0; i < *numNodes/2; i++ { for name, ni := range nodes { reg := datad.NewRegistry(b) regKeys, err := reg.KeysForNode(name) if err != nil { t.Fatal(err) } killNode(name, ni) log.Printf("\n\n\nKilled node %s. Before killing, it had registered keys: %v. Expect to see failures related to these keys in the next set of operations we perform.\n\n\n", name, regKeys) break } } time.Sleep(time.Millisecond * 300) killedTime := time.Now() // After killing nodes, run the same set of operations. We expect some // KeyTransportErrors here because the cluster detects that some nodes // are down but hasn't had time yet to fetch the data to other nodes. // // Keep running the operations until we get no more errors. log.Printf("\n\n\nAfter killing some nodes, we're going to keep performing VCS ops on the cluster until it fully heals itself and we see no more errors. (We expect some errors until it heals itself.)\n\n\n") try := 0 for { err := performRepoOps() if err != nil { log.Printf("\n\n\nTry #%d (%s): got error %+v\n\n\n", try, time.Since(killedTime), err) try++ time.Sleep(2000 * time.Millisecond) continue } log.Printf("\n\n\nTry #%d: SUCCESS. The cluster healed itself after %s.\n\n\n", try, time.Since(killedTime)) break } }) }
func (s *indexedTreeStore) buildIndexes(xs map[string]Index, units []*unit.SourceUnit, unitRefIndexes map[unit.ID2]*defRefsIndex, unitDefQueryIndexes map[unit.ID2]*defQueryIndex) error { // TODO(sqs): there's a race condition here if multiple imports // are running concurrently, they could clobber each other's // indexes. (S3 is eventually consistent.) var getUnitsErr error var getUnitsOnce sync.Once getUnits := func() ([]*unit.SourceUnit, error) { getUnitsOnce.Do(func() { if getUnitsErr == nil && units == nil { units, getUnitsErr = s.fsTreeStore.Units() } if units == nil { units = []*unit.SourceUnit{} } }) return units, getUnitsErr } var getUnitRefIndexesErr error var getUnitRefIndexesOnce sync.Once var unitRefIndexesLock sync.Mutex getUnitRefIndexes := func() (map[unit.ID2]*defRefsIndex, error) { getUnitRefIndexesOnce.Do(func() { if getUnitRefIndexesErr == nil && unitRefIndexes == nil { // Read in the defRefsIndex for all source units. units, err := getUnits() if err != nil { getUnitRefIndexesErr = err return } // Use openUnitStore on the list from getUnits so we // don't need to traverse the FS tree to enumerate all // the source units again (which is slow). uss := make(map[unit.ID2]UnitStore, len(units)) for _, u := range units { uss[u.ID2()] = s.fsTreeStore.openUnitStore(u.ID2()) } unitRefIndexes = make(map[unit.ID2]*defRefsIndex, len(units)) par := parallel.NewRun(runtime.GOMAXPROCS(0)) for u_, us_ := range uss { u := u_ us, ok := us_.(*indexedUnitStore) if !ok { continue } par.Do(func() error { x := us.indexes[defToRefsIndexName] if err := prepareIndex(us.fs, defToRefsIndexName, x); err != nil { return err } unitRefIndexesLock.Lock() defer unitRefIndexesLock.Unlock() unitRefIndexes[u] = x.(*defRefsIndex) return nil }) } getUnitRefIndexesErr = par.Wait() } if unitRefIndexes == nil { unitRefIndexes = map[unit.ID2]*defRefsIndex{} } }) return unitRefIndexes, getUnitRefIndexesErr } var getUnitDefQueryIndexesErr error var getUnitDefQueryIndexesOnce sync.Once var unitDefQueryIndexesLock sync.Mutex getUnitDefQueryIndexes := func() (map[unit.ID2]*defQueryIndex, error) { getUnitDefQueryIndexesOnce.Do(func() { if getUnitDefQueryIndexesErr == nil && unitDefQueryIndexes == nil { // Read in the defQueryIndex for all source units. units, err := getUnits() if err != nil { getUnitDefQueryIndexesErr = err return } // Use openUnitStore on the list from getUnits so we // don't need to traverse the FS tree to enumerate all // the source units again (which is slow). uss := make(map[unit.ID2]UnitStore, len(units)) for _, u := range units { uss[u.ID2()] = s.fsTreeStore.openUnitStore(u.ID2()) } unitDefQueryIndexes = make(map[unit.ID2]*defQueryIndex, len(units)) par := parallel.NewRun(len(units)) for u_, us_ := range uss { u := u_ us, ok := us_.(*indexedUnitStore) if !ok { continue } par.Do(func() error { x := us.indexes[defQueryIndexName] if err := prepareIndex(us.fs, defQueryIndexName, x); err != nil { return err } unitDefQueryIndexesLock.Lock() defer unitDefQueryIndexesLock.Unlock() unitDefQueryIndexes[u] = x.(*defQueryIndex) return nil }) } getUnitDefQueryIndexesErr = par.Wait() } if unitDefQueryIndexes == nil { unitDefQueryIndexes = map[unit.ID2]*defQueryIndex{} } }) return unitDefQueryIndexes, getUnitDefQueryIndexesErr } par := parallel.NewRun(len(xs)) for name_, x_ := range xs { name, x := name_, x_ par.Do(func() error { switch x := x.(type) { case unitIndexBuilder: units, err := getUnits() if err != nil { return err } if err := x.Build(units); err != nil { return err } case unitRefIndexBuilder: unitRefIndexes, err := getUnitRefIndexes() if err != nil { return err } if err := x.Build(unitRefIndexes); err != nil { return err } case defQueryTreeIndexBuilder: unitDefQueryIndexes, err := getUnitDefQueryIndexes() if err != nil { return err } if err := x.Build(unitDefQueryIndexes); err != nil { return err } default: return fmt.Errorf("don't know how to build index %q of type %T", name, x) } if x, ok := x.(persistedIndex); ok { if err := writeIndex(s.fs, name, x); err != nil { return err } } return nil }) } return par.Wait() }
// listIndexes lists indexes in s (a store) asynchronously, sending // status objects to ch. If f != nil, it is called to set/modify // fields on each status object before the IndexStatus object is sent to // the channel. func listIndexes(s interface{}, c IndexCriteria, ch chan<- IndexStatus, f func(*IndexStatus)) error { switch s := s.(type) { case indexedStore: xx := s.Indexes() var waitingOnChildren []IndexStatus for name, x := range xx { st := IndexStatus{ Name: name, Type: strings.TrimPrefix(reflect.TypeOf(x).String(), "*store."), index: x, store: s, } if !strings.Contains(st.Name, c.Name) { continue } if !strings.Contains(st.Type, c.Type) { continue } fi, err := s.statIndex(name) if os.IsNotExist(err) { st.Stale = true } else if err != nil { st.Error = err.Error() } else { st.Size = fi.Size() } switch x.(type) { case unitRefIndexBuilder, defQueryTreeIndexBuilder: st.DependsOnChildren = true } if c.Stale != nil && st.Stale != *c.Stale { continue } if f != nil { f(&st) } if c.Unit != nil && c.Unit != NoSourceUnit { if st.Unit == nil || *c.Unit != *st.Unit { continue } } if st.DependsOnChildren { waitingOnChildren = append(waitingOnChildren, st) } else { ch <- st } } switch s := s.(type) { case *indexedTreeStore: if err := listIndexes(s.fsTreeStore, c, ch, f); err != nil { return err } case *indexedUnitStore: if err := listIndexes(s.fsUnitStore, c, ch, f); err != nil { return err } } for _, si := range waitingOnChildren { ch <- si } case repoStoreOpener: var rss map[string]RepoStore if c.Repo == "" { var err error rss, err = s.openAllRepoStores() if err != nil && !isStoreNotExist(err) { return err } } else { rss = map[string]RepoStore{c.Repo: s.openRepoStore(c.Repo)} } // Sort repos for determinism. repos := make([]string, 0, len(rss)) for repo := range rss { repos = append(repos, repo) } sort.Strings(repos) if c.ReposOffset != 0 { if c.ReposOffset < len(repos) { repos = repos[c.ReposOffset:] } else { log.Printf("Warning: A ReposOffset (%d) was specified that equals or exceeds the total number of repos (%d).", c.ReposOffset, len(repos)) } } if c.ReposLimit != 0 && c.ReposLimit < len(repos) { repos = repos[:c.ReposLimit] } for _, repo := range repos { rs := rss[repo] err := listIndexes(rs, c, ch, func(x *IndexStatus) { x.Repo = repo if f != nil { f(x) } }) if err != nil { return err } } case treeStoreOpener: var tss map[string]TreeStore if c.CommitID == "" { var err error tss, err = s.openAllTreeStores() if err != nil && !isStoreNotExist(err) { return err } } else { tss = map[string]TreeStore{c.CommitID: s.openTreeStore(c.CommitID)} } for commitID, ts := range tss { err := listIndexes(ts, c, ch, func(x *IndexStatus) { x.CommitID = commitID if f != nil { f(x) } }) if err != nil { return err } } case unitStoreOpener: if c.Unit == NoSourceUnit { return nil } var uss map[unit.ID2]UnitStore if c.Unit == nil { var err error uss, err = s.openAllUnitStores() if err != nil && !isStoreNotExist(err) { return err } } else { uss = map[unit.ID2]UnitStore{*c.Unit: s.openUnitStore(*c.Unit)} } if len(uss) > 0 { par := parallel.NewRun(MaxIndexParallel) for unit_, us_ := range uss { unit, us := unit_, us_ par.Do(func() error { unitCopy := unit return listIndexes(us, c, ch, func(x *IndexStatus) { x.Unit = &unitCopy if f != nil { f(x) } }) }) } if err := par.Wait(); err != nil { return err } } } return nil }
func (c *URefsRepoCmd) Execute(args []string) error { if err := c.validate(); err != nil { return err } if err := removeGlob(".srclib-*"); err != nil { return err } units := make([]*unit.SourceUnit, 0) unitNames := hierarchicalNames("u", "unit", "", c.NUnits) for _, unitName := range unitNames { ut := &unit.SourceUnit{ Name: fmt.Sprintf(unitName), Type: "GoPackage", Repo: c.Repo, CommitID: c.CommitID, Files: []string{}, Dir: unitName, } units = append(units, ut) } if c.GenSource { if err := resetSource(); err != nil { return err } // generate source files par := parallel.NewRun(runtime.GOMAXPROCS(0)) for _, ut := range units { ut := ut par.Do(func() error { _, _, _, err := c.genUnit(ut, units) return err }) } if err := par.Wait(); err != nil { return err } // get commit ID commitID, err := getGitCommitID() if err != nil { return err } // update command to generate graph data c.CommitID = commitID c.GenSource = false } // generate graph data gr := make(map[string]*graph.Output) for _, ut := range units { gr[ut.Name] = &graph.Output{} } var grmu sync.Mutex par := parallel.NewRun(runtime.GOMAXPROCS(0)) for _, ut := range units { ut := ut ut.CommitID = c.CommitID par.Do(func() error { defs, refs, reffiles, err := c.genUnit(ut, units) if err != nil { return err } grmu.Lock() defer grmu.Unlock() gr[ut.Name].Defs = append(gr[ut.Name].Defs, defs...) for utName, utRefs := range refs { gr[utName].Refs = append(gr[utName].Refs, utRefs...) } for _, ut2 := range units { ut2.Files = append(ut2.Files, reffiles[ut2.Name]...) } return nil }) } if err := par.Wait(); err != nil { return err } for _, ut := range units { utgraph := gr[ut.Name] utgraph.Docs = make([]*graph.Doc, 0) if err := writeSrclibCache(ut, utgraph, make([]*dep.Resolution, 0)); err != nil { return err } } return nil }
func (s *indexedUnitStore) buildIndexes(xs map[string]Index, data *graph.Output, defOfs byteOffsets, refFBRs fileByteRanges, refOfs byteOffsets) error { var defs []*graph.Def var refs []*graph.Ref if data != nil { // Allow us to distinguish between empty (empty slice) and not-yet-fetched (nil). defs = data.Defs if defs == nil { defs = []*graph.Def{} } refs = data.Refs if refs == nil { refs = []*graph.Ref{} } } var getDefsErr error var getDefsOnce sync.Once getDefs := func() ([]*graph.Def, byteOffsets, error) { getDefsOnce.Do(func() { // Don't refetch if passed in as arg or if getData was // already called. if defs == nil { defs, defOfs, getDefsErr = s.fsUnitStore.readDefs() } if defs == nil { defs = []*graph.Def{} } }) return defs, defOfs, getDefsErr } var getRefsErr error var getRefsOnce sync.Once getRefs := func() ([]*graph.Ref, fileByteRanges, byteOffsets, error) { getRefsOnce.Do(func() { // Don't refetch if passed in as arg or if getData was // already called. if refs == nil { refs, refFBRs, refOfs, getRefsErr = s.fsUnitStore.readRefs() } if refs == nil { refs = []*graph.Ref{} } }) return refs, refFBRs, refOfs, getRefsErr } par := parallel.NewRun(len(xs)) for name_, x_ := range xs { name, x := name_, x_ par.Do(func() error { switch x := x.(type) { case defIndexBuilder: defs, defOfs, err := getDefs() if err != nil { return err } if err := x.Build(defs, defOfs); err != nil { return err } case refIndexBuilder: refs, refFBRs, refOfs, err := getRefs() if err != nil { return err } if err := x.Build(refs, refFBRs, refOfs); err != nil { return err } default: return fmt.Errorf("don't know how to build index %q of type %T", name, x) } if x, ok := x.(persistedIndex); ok { if err := writeIndex(s.fs, name, x); err != nil { return err } } return nil }) } return par.Wait() }