Beispiel #1
0
// RemoveAll removes a tree recursively.
func RemoveAll(path string, vfs rwvfs.WalkableFileSystem) error {
	w := fs.WalkFS(path, vfs)

	remove := func(par *parallel.Run, path string) {
		par.Do(func() error { return vfs.Remove(path) })
	}

	var dirs []string // remove dirs after removing all files
	filesPar := parallel.NewRun(20)
	for w.Step() {
		if err := w.Err(); err != nil {
			return err
		}
		if w.Stat().IsDir() {
			dirs = append(dirs, w.Path())
		} else {
			remove(filesPar, w.Path())
		}
	}

	if err := filesPar.Wait(); err != nil {
		return err
	}

	dirsPar := parallel.NewRun(20)
	sort.Sort(sort.Reverse(sort.StringSlice(dirs))) // reverse so we delete leaf dirs first
	for _, dir := range dirs {
		remove(dirsPar, dir)
	}
	return dirsPar.Wait()
}
Beispiel #2
0
func (s unitStores) Defs(fs ...DefFilter) ([]*graph.Def, error) {
	uss, err := openUnitStores(s.opener, fs)
	if err != nil {
		return nil, err
	}

	var (
		allDefs   []*graph.Def
		allDefsMu sync.Mutex
	)
	par := parallel.NewRun(storeFetchPar)
	for u_, us_ := range uss {
		u, us := u_, us_
		if us == nil {
			continue
		}

		par.Do(func() error {
			defs, err := us.Defs(filtersForUnit(u, fs).([]DefFilter)...)
			if err != nil && !isStoreNotExist(err) {
				return err
			}
			for _, def := range defs {
				def.UnitType = u.Type
				def.Unit = u.Name
			}
			allDefsMu.Lock()
			allDefs = append(allDefs, defs...)
			allDefsMu.Unlock()
			return nil
		})
	}
	err = par.Wait()
	return allDefs, err
}
Beispiel #3
0
func (s repoStores) Defs(f ...DefFilter) ([]*graph.Def, error) {
	rss, err := openRepoStores(s.opener, f)
	if err != nil {
		return nil, err
	}

	var (
		allDefs   []*graph.Def
		allDefsMu sync.Mutex
	)
	par := parallel.NewRun(storeFetchPar)
	for repo_, rs_ := range rss {
		repo, rs := repo_, rs_
		if rs == nil {
			continue
		}

		par.Do(func() error {
			defs, err := rs.Defs(filtersForRepo(repo, f).([]DefFilter)...)
			if err != nil && !isStoreNotExist(err) {
				return err
			}
			for _, def := range defs {
				def.Repo = repo
			}
			allDefsMu.Lock()
			allDefs = append(allDefs, defs...)
			allDefsMu.Unlock()
			return nil
		})
	}
	err = par.Wait()
	return allDefs, err
}
Beispiel #4
0
func OpenRepo(dir string) (*Repo, error) {
	if fi, err := os.Stat(dir); err != nil || !fi.Mode().IsDir() {
		return nil, fmt.Errorf("not a directory: %q", dir)
	}

	rc := new(Repo)

	// VCS and root directory
	var err error
	rc.RootDir, rc.VCSType, err = getRootDir(dir)
	if err != nil {
		return nil, fmt.Errorf("detecting git/hg repository in %s: %s", dir, err)
	}
	if rc.RootDir == "" {
		return nil, fmt.Errorf("no git/hg repository found in or above %s", dir)
	}

	par := parallel.NewRun(4)
	par.Do(func() error {
		// Current commit ID
		var err error
		rc.CommitID, err = resolveWorkingTreeRevision(rc.VCSType, rc.RootDir)
		return err
	})
	par.Do(func() error {
		// Get repo URI from clone URL.
		rc.CloneURL = getVCSCloneURL(rc.VCSType, rc.RootDir)
		return nil
	})
	return rc, par.Wait()
}
Beispiel #5
0
func (s repoStores) Units(f ...UnitFilter) ([]*unit.SourceUnit, error) {
	rss, err := openRepoStores(s.opener, f)
	if err != nil {
		return nil, err
	}

	var (
		allUnits   []*unit.SourceUnit
		allUnitsMu sync.Mutex
	)
	par := parallel.NewRun(storeFetchPar)
	for repo_, rs_ := range rss {
		repo, rs := repo_, rs_
		if rs == nil {
			continue
		}

		par.Do(func() error {
			units, err := rs.Units(filtersForRepo(repo, f).([]UnitFilter)...)
			if err != nil && !isStoreNotExist(err) {
				return err
			}
			for _, unit := range units {
				unit.Repo = repo
			}
			allUnitsMu.Lock()
			allUnits = append(allUnits, units...)
			allUnitsMu.Unlock()
			return nil
		})
	}
	err = par.Wait()
	return allUnits, err
}
Beispiel #6
0
// refsAtOffsets reads the refs at the given serialized byte offsets
// from the ref data file and returns them in arbitrary order.
func (s *fsUnitStore) refsAtOffsets(ofs byteOffsets, fs []RefFilter) (refs []*graph.Ref, err error) {
	vlog.Printf("%s: reading refs at %d offsets with filters %v...", s, len(ofs), fs)
	f, err := openFetcherOrOpen(s.fs, unitRefsFilename)
	if err != nil {
		return nil, err
	}
	defer func() {
		err2 := f.Close()
		if err == nil {
			err = err2
		}
	}()

	ffs := refFilters(fs)

	p := parFetches(s.fs, fs)
	if p == 0 {
		return nil, nil
	}

	var refsLock sync.Mutex
	par := parallel.NewRun(p)
	for _, ofs_ := range ofs {
		ofs := ofs_
		par.Do(func() error {
			if _, moreOK := LimitRemaining(fs); !moreOK {
				return nil
			}

			// Guess how many bytes this ref is. The s3vfs (if that's the
			// VFS impl in use) will autofetch beyond that if needed.
			const byteEstimate = decodeBufSize
			r, err := rangeReader(s.fs, unitRefsFilename, f, ofs, byteEstimate)
			if err != nil {
				return err
			}
			dec := Codec.NewDecoder(r)
			var ref graph.Ref
			if _, err := dec.Decode(&ref); err != nil {
				return err
			}
			if ffs.SelectRef(&ref) {
				refsLock.Lock()
				refs = append(refs, &ref)
				refsLock.Unlock()
			}
			return nil
		})
	}
	if err := par.Wait(); err != nil {
		return refs, err
	}
	sort.Sort(refsByFileStartEnd(refs))
	vlog.Printf("%s: read %v refs at %d offsets with filters %v.", s, len(refs), len(ofs), fs)
	return refs, nil
}
Beispiel #7
0
func (c *BuildDataFetchCmd) Execute(args []string) error {
	localFS, localRepoLabel, err := c.getLocalFileSystem()
	if err != nil {
		return err
	}

	remoteFS, remoteRepoLabel, repoRevSpec, err := c.getRemoteFileSystem()
	if err != nil {
		return err
	}

	// Use uncached API client because the .srclib-cache already
	// caches it, and we want to be able to stream large files.
	//
	// TODO(sqs): this uncached client isn't authed because it doesn't
	// have the other API client's http.Client or http.RoundTripper
	cl := newAPIClientWithAuth(false)
	remoteFS, err = cl.BuildData.FileSystem(repoRevSpec)
	if err != nil {
		return err
	}

	if GlobalOpt.Verbose {
		log.Printf("Fetching remote build files for %s to %s...", remoteRepoLabel, localRepoLabel)
	}

	// TODO(sqs): check if file exists in local cache and don't fetch it if it does and if it is identical

	par := parallel.NewRun(8)
	w := fs.WalkFS(".", rwvfs.Walkable(remoteFS))
	for w.Step() {
		path := w.Path()
		if err := w.Err(); err != nil {
			if path == "." {
				log.Printf("# No build data to pull from %s", remoteRepoLabel)
				return nil
			}
			return fmt.Errorf("walking remote dir tree: %s", err)
		}
		fi := w.Stat()
		if fi == nil {
			continue
		}
		if !fi.Mode().IsRegular() {
			continue
		}
		par.Do(func() error {
			return fetchFile(remoteFS, localFS, path, fi, c.DryRun)
		})
	}
	if err := par.Wait(); err != nil {
		return fmt.Errorf("error fetching: %s", err)
	}
	return nil
}
Beispiel #8
0
// BuildIndexes builds all indexes on store and its lower-level stores
// that match the specified criteria. It returns the status of each
// index that was built (or rebuilt).
func BuildIndexes(store interface{}, c IndexCriteria, indexChan chan<- IndexStatus) ([]IndexStatus, error) {
	var built []IndexStatus
	var builtMu sync.Mutex
	indexChan2 := make(chan IndexStatus)
	done := make(chan struct{})
	go func() {
		var par *parallel.Run
		lastDependsOnChildren := false
		for sx := range indexChan2 {
			doBuild := func(sx IndexStatus) {
				start := time.Now()
				err := sx.store.BuildIndex(sx.Name, sx.index)
				sx.BuildDuration = time.Since(start)
				if err == nil {
					sx.Stale = false
				} else {
					sx.BuildError = err.Error()
				}
				builtMu.Lock()
				built = append(built, sx)
				builtMu.Unlock()
				if indexChan != nil {
					indexChan <- sx
				}
			}

			// Run indexes in parallel, but if we
			// encounter an index that depends on children, wait for
			// all previously seen indexes to finish before building
			// those indexes.
			if sx.DependsOnChildren != lastDependsOnChildren && par != nil {
				par.Wait()
				par = nil
			}
			if par == nil {
				par = parallel.NewRun(MaxIndexParallel)
			}
			sx_ := sx
			par.Do(func() error { doBuild(sx_); return nil })

			lastDependsOnChildren = sx.DependsOnChildren
		}
		if par != nil {
			par.Wait()
		}
		done <- struct{}{}
	}()
	err := listIndexes(store, c, indexChan2, nil)
	close(indexChan2)
	<-done
	return built, err
}
Beispiel #9
0
func (s unitStores) Refs(f ...RefFilter) ([]*graph.Ref, error) {
	uss, err := openUnitStores(s.opener, f)
	if err != nil {
		return nil, err
	}

	c_unitStores_Refs_last_numUnitsQueried = 0
	var (
		allRefsMu sync.Mutex
		allRefs   []*graph.Ref
	)
	par := parallel.NewRun(storeFetchPar)
	for u, us := range uss {
		if us == nil {
			continue
		}
		u, us := u, us

		c_unitStores_Refs_last_numUnitsQueried++

		par.Do(func() error {
			if _, moreOK := LimitRemaining(f); !moreOK {
				return nil
			}
			fCopy := filtersForUnit(u, f).([]RefFilter)
			fCopy = withImpliedUnit(fCopy, u)

			refs, err := us.Refs(fCopy...)
			if err != nil && !isStoreNotExist(err) {
				return err
			}
			for _, ref := range refs {
				ref.UnitType = u.Type
				ref.Unit = u.Name
				if ref.DefUnitType == "" {
					ref.DefUnitType = u.Type
				}
				if ref.DefUnit == "" {
					ref.DefUnit = u.Name
				}
			}

			allRefsMu.Lock()
			allRefs = append(allRefs, refs...)
			allRefsMu.Unlock()
			return nil
		})
	}
	err = par.Wait()
	return allRefs, err
}
Beispiel #10
0
// ReadCached reads a Tree's configuration from all of its source unit
// definition files (which may either be in a local VFS rooted at a
// .srclib-cache/<COMMITID> dir, or a remote VFS). It does not read
// the Srcfile; the Srcfile's directives are already accounted for in
// the cached source unit definition files.
//
// bdfs should be a VFS obtained from a call to
// (buildstore.RepoBuildStore).Commit.
func ReadCached(bdfs vfs.FileSystem) (*Tree, error) {
	if _, err := bdfs.Lstat("."); os.IsNotExist(err) {
		return nil, fmt.Errorf("build cache dir does not exist (did you run `srclib config` to create it)?")
	} else if err != nil {
		return nil, err
	}

	// Collect all **/*.unit.json files.
	var unitFiles []string
	unitSuffix := buildstore.DataTypeSuffix(unit.SourceUnit{})
	w := fs.WalkFS(".", rwvfs.Walkable(rwvfs.ReadOnly(bdfs)))
	for w.Step() {
		if err := w.Err(); err != nil {
			return nil, err
		}
		if path := w.Path(); strings.HasSuffix(path, unitSuffix) {
			unitFiles = append(unitFiles, path)
		}
	}

	// Parse units
	sort.Strings(unitFiles)
	units := make([]*unit.SourceUnit, len(unitFiles))
	par := parallel.NewRun(runtime.GOMAXPROCS(0))
	for i_, unitFile_ := range unitFiles {
		i, unitFile := i_, unitFile_
		par.Do(func() error {
			f, err := bdfs.Open(unitFile)
			if err != nil {
				return err
			}
			if err := json.NewDecoder(f).Decode(&units[i]); err != nil {
				f.Close()
				return err
			}
			if err := f.Close(); err != nil {
				return err
			}
			return nil
		})
	}
	if err := par.Wait(); err != nil {
		return nil, err
	}
	return &Tree{SourceUnits: units}, nil
}
Beispiel #11
0
func brokenRefsOnly(refs []*graph.Ref, s interface{}) ([]*graph.Ref, error) {
	uniqRefDefs := map[graph.DefKey][]*graph.Ref{}
	loggedDefRepos := map[string]struct{}{}
	for _, ref := range refs {
		if ref.Repo != ref.DefRepo {
			if _, logged := loggedDefRepos[ref.DefRepo]; !logged {
				// TODO(sqs): need to skip these because we don't know the
				// "DefCommitID" in the def's repo, and ByDefKey requires
				// the key to have a CommitID.
				log.Printf("WARNING: Can't check resolution of cross-repo ref (ref.Repo=%q != ref.DefRepo=%q) - cross-repo ref checking is not yet implemented. (This log message will not be repeated.)", ref.Repo, ref.DefRepo)
				loggedDefRepos[ref.DefRepo] = struct{}{}
			}
			continue
		}
		def := ref.DefKey()
		def.CommitID = ref.CommitID
		uniqRefDefs[def] = append(uniqRefDefs[def], ref)
	}

	var (
		brokenRefs  []*graph.Ref
		brokenRefMu sync.Mutex
		par         = parallel.NewRun(runtime.GOMAXPROCS(0))
	)
	for def_, refs_ := range uniqRefDefs {
		def, refs := def_, refs_
		par.Do(func() error {
			defs, err := s.(store.RepoStore).Defs(store.ByDefKey(def))
			if err != nil {
				return err
			}
			if len(defs) == 0 {
				brokenRefMu.Lock()
				brokenRefs = append(brokenRefs, refs...)
				brokenRefMu.Unlock()
			}
			return nil
		})
	}
	err := par.Wait()
	sort.Sort(graph.Refs(brokenRefs))
	return brokenRefs, err
}
Beispiel #12
0
func TestParallelMaxPar(t *testing.T) {
	const (
		totalDo = 10
		maxPar  = 3
	)
	var mu sync.Mutex
	max := 0
	n := 0
	tot := 0
	r := parallel.NewRun(maxPar)
	for i := 0; i < totalDo; i++ {
		r.Do(func() error {
			mu.Lock()
			tot++
			n++
			if n > max {
				max = n
			}
			mu.Unlock()
			time.Sleep(0.1e9)
			mu.Lock()
			n--
			mu.Unlock()
			return nil
		})
	}
	err := r.Wait()
	if n != 0 {
		t.Errorf("%d functions still running", n)
	}
	if tot != totalDo {
		t.Errorf("all functions not executed; want %d got %d", totalDo, tot)
	}
	if err != nil {
		t.Errorf("wrong error; want nil got %v", err)
	}
	if max != maxPar {
		t.Errorf("wrong number of do's ran at once; want %d got %d", maxPar, max)
	}
}
Beispiel #13
0
func OpenRepo(dir string) (*Repo, error) {
	if fi, err := os.Stat(dir); err != nil || !fi.Mode().IsDir() {
		return nil, fmt.Errorf("not a directory: %q", dir)
	}

	rc := new(Repo)

	// VCS and root directory
	var err error
	rc.RootDir, rc.VCSType, err = getRootDir(dir)
	if err != nil || rc.RootDir == "" {
		log.Printf("Failed to detect git/hg repository root dir for %q; continuing.", dir)
		// Be permissive and return a repo even if there is no git/hg repository.
		wd, err := os.Getwd()
		if err != nil {
			return nil, err
		}
		rc.RootDir = wd
		// TODO: Ensure that builds without commit ids are successful.
		rc.CommitID = "ffffffffffffffffffffffffffffffffffffffff"
		rc.VCSType = ""
		rc.CloneURL = ""
		return rc, nil
	}

	par := parallel.NewRun(4)
	par.Do(func() error {
		// Current commit ID
		var err error
		rc.CommitID, err = resolveWorkingTreeRevision(rc.VCSType, rc.RootDir)
		return err
	})
	par.Do(func() error {
		// Get repo URI from clone URL.
		rc.CloneURL = getVCSCloneURL(rc.VCSType, rc.RootDir)
		return nil
	})
	return rc, par.Wait()
}
Beispiel #14
0
func TestParallelError(t *testing.T) {
	const (
		totalDo = 10
		errDo   = 5
	)
	r := parallel.NewRun(6)
	for i := 0; i < totalDo; i++ {
		i := i
		if i >= errDo {
			r.Do(func() error {
				return intError(i)
			})
		} else {
			r.Do(func() error {
				return nil
			})
		}
	}
	err := r.Wait()
	if err == nil {
		t.Fatalf("expected error, got none")
	}
	errs := err.(parallel.Errors)
	if len(errs) != totalDo-errDo {
		t.Fatalf("wrong error count; want %d got %d", len(errs), totalDo-errDo)
	}
	ints := make([]int, len(errs))
	for i, err := range errs {
		ints[i] = int(err.(intError))
	}
	sort.Ints(ints)
	for i, n := range ints {
		if n != i+errDo {
			t.Errorf("unexpected error value; want %d got %d", i+errDo, n)
		}
	}
}
Beispiel #15
0
func (c *BuildDataUploadCmd) Execute(args []string) error {
	localFS, localRepoLabel, err := c.getLocalFileSystem()
	if err != nil {
		return err
	}

	remoteFS, remoteRepoLabel, _, err := c.getRemoteFileSystem()
	if err != nil {
		return err
	}

	if GlobalOpt.Verbose {
		log.Printf("Uploading build files from %s to %s...", localRepoLabel, remoteRepoLabel)
	}

	// TODO(sqs): check if file exists remotely and don't upload it if it does and if it is identical

	par := parallel.NewRun(8)
	w := fs.WalkFS(".", rwvfs.Walkable(localFS))
	for w.Step() {
		if err := w.Err(); err != nil {
			return err
		}
		fi := w.Stat()
		if fi == nil {
			continue
		}
		if !fi.Mode().IsRegular() {
			continue
		}
		path := w.Path()
		par.Do(func() error {
			return uploadFile(localFS, remoteFS, path, fi, c.DryRun)
		})
	}
	return par.Wait()
}
Beispiel #16
0
// ScanMulti runs multiple scanner tools in parallel. It passes command-line
// options from opt to each one, and it sends the JSON representation of cfg
// (the repo/tree's Config) to each tool's stdin.
func ScanMulti(scanners []toolchain.Tool, opt Options, treeConfig map[string]interface{}) ([]*unit.SourceUnit, error) {
	if treeConfig == nil {
		treeConfig = map[string]interface{}{}
	}

	var (
		units []*unit.SourceUnit
		mu    sync.Mutex
	)

	run := parallel.NewRun(runtime.GOMAXPROCS(0))
	for _, scanner_ := range scanners {
		scanner := scanner_
		run.Do(func() error {
			units2, err := Scan(scanner, opt, treeConfig)
			if err != nil {
				cmd, newErr := scanner.Command()
				if newErr != nil {
					return fmt.Errorf("cmd error: %s", newErr)
				}
				return fmt.Errorf("scanner %v: %s", cmd.Args, err)
			}

			mu.Lock()
			defer mu.Unlock()
			units = append(units, units2...)
			return nil
		})
	}
	err := run.Wait()
	// Return error only if none of the commands succeeded.
	if len(units) == 0 {
		return nil, err
	}
	return units, nil
}
Beispiel #17
0
// Import calls to the underlying fsUnitStore to write the def
// and ref data files. It also builds and writes the indexes.
func (s *indexedUnitStore) Import(data graph.Output) error {
	cleanForImport(&data, "", "", "")

	var defOfs, refOfs byteOffsets
	var refFBRs fileByteRanges

	par := parallel.NewRun(2)
	par.Do(func() (err error) {
		defOfs, err = s.fsUnitStore.writeDefs(data.Defs)
		return err
	})
	par.Do(func() (err error) {
		refFBRs, refOfs, err = s.fsUnitStore.writeRefs(data.Refs)
		return err
	})
	if err := par.Wait(); err != nil {
		return err
	}

	if err := s.buildIndexes(s.Indexes(), &data, defOfs, refFBRs, refOfs); err != nil {
		return err
	}
	return nil
}
Beispiel #18
0
// refsAtByteRanges reads the refs at the given serialized byte ranges
// from the ref data file and returns them in arbitrary order.
func (s *fsUnitStore) refsAtByteRanges(brs []byteRanges, fs []RefFilter) (refs []*graph.Ref, err error) {
	vlog.Printf("%s: reading refs at %d byte ranges with filters %v...", s, len(brs), fs)
	f, err := openFetcherOrOpen(s.fs, unitRefsFilename)
	if err != nil {
		return nil, err
	}
	defer func() {
		err2 := f.Close()
		if err == nil {
			err = err2
		}
	}()

	ffs := refFilters(fs)

	p := parFetches(s.fs, fs)
	if p == 0 {
		return nil, nil
	}

	// See how many bytes we need to read to get the refs in all
	// byteRanges.
	readLengths := make([]int64, len(brs))
	totalRefs := 0
	for i, br := range brs {
		var n int64
		for _, b := range br[1:] {
			n += b
			totalRefs++
		}
		readLengths[i] = n
	}

	var refsLock sync.Mutex
	par := parallel.NewRun(p)
	for i_, br_ := range brs {
		i, br := i_, br_
		par.Do(func() error {
			if _, moreOK := LimitRemaining(fs); !moreOK {
				return nil
			}

			r, err := rangeReader(s.fs, unitRefsFilename, f, br.start(), readLengths[i])
			if err != nil {
				return err
			}
			dec := Codec.NewDecoder(r)
			for range br[1:] {
				var ref graph.Ref
				if _, err := dec.Decode(&ref); err != nil {
					return err
				}
				if ffs.SelectRef(&ref) {
					refsLock.Lock()
					refs = append(refs, &ref)
					refsLock.Unlock()
				}
			}
			return nil
		})
	}
	if err := par.Wait(); err != nil {
		return refs, err
	}
	sort.Sort(refsByFileStartEnd(refs))
	vlog.Printf("%s: read %d refs at %d byte ranges with filters %v.", s, len(refs), len(brs), fs)
	return refs, nil
}
Beispiel #19
0
// Import imports build data into a RepoStore or MultiRepoStore.
func Import(buildDataFS vfs.FileSystem, stor interface{}, opt ImportOpt) error {
	// Traverse the build data directory for this repo and commit to
	// create the makefile that lists the targets (which are the data
	// files we will import).
	treeConfig, err := config.ReadCached(buildDataFS)
	if err != nil {
		return fmt.Errorf("error calling config.ReadCached: %s", err)
	}
	mf, err := plan.CreateMakefile(".", nil, "", treeConfig, plan.Options{NoCache: true})
	if err != nil {
		return fmt.Errorf("error calling plan.Makefile: %s", err)
	}

	var (
		mu               sync.Mutex
		hasIndexableData bool
	)

	par := parallel.NewRun(10)
	for _, rule_ := range mf.Rules {
		rule := rule_

		if opt.Unit != "" || opt.UnitType != "" {
			type ruleForSourceUnit interface {
				SourceUnit() *unit.SourceUnit
			}
			if rule, ok := rule.(ruleForSourceUnit); ok {
				u := rule.SourceUnit()
				if (opt.Unit != "" && u.Name != opt.Unit) || (opt.UnitType != "" && u.Type != opt.UnitType) {
					continue
				}
			} else {
				// Skip all non-source-unit rules if --unit or
				// --unit-type are specified.
				continue
			}
		}

		par.Do(func() error {
			switch rule := rule.(type) {
			case *grapher.GraphUnitRule:
				var data graph.Output
				if err := readJSONFileFS(buildDataFS, rule.Target(), &data); err != nil {
					if os.IsNotExist(err) {
						log.Printf("Warning: no build data for unit %s %s.", rule.Unit.Type, rule.Unit.Name)
						return nil
					}
					return fmt.Errorf("error reading JSON file %s for unit %s %s: %s", rule.Target(), rule.Unit.Type, rule.Unit.Name, err)
				}
				if opt.DryRun || GlobalOpt.Verbose {
					log.Printf("# Importing graph data (%d defs, %d refs, %d docs, %d anns) for unit %s %s", len(data.Defs), len(data.Refs), len(data.Docs), len(data.Anns), rule.Unit.Type, rule.Unit.Name)
					if opt.DryRun {
						return nil
					}
				}

				// HACK: Transfer docs to [def].Docs.
				docsByPath := make(map[string]*graph.Doc, len(data.Docs))
				for _, doc := range data.Docs {
					docsByPath[doc.Path] = doc
				}
				for _, def := range data.Defs {
					if doc, present := docsByPath[def.Path]; present {
						def.Docs = append(def.Docs, &graph.DefDoc{Format: doc.Format, Data: doc.Data})
					}
				}

				switch imp := stor.(type) {
				case store.RepoImporter:
					if err := imp.Import(opt.CommitID, rule.Unit, data); err != nil {
						return fmt.Errorf("error running store.RepoImporter.Import: %s", err)
					}
				case store.MultiRepoImporter:
					if err := imp.Import(opt.Repo, opt.CommitID, rule.Unit, data); err != nil {
						return fmt.Errorf("error running store.MultiRepoImporter.Import: %s", err)
					}
				default:
					return fmt.Errorf("store (type %T) does not implement importing", stor)
				}

				mu.Lock()
				hasIndexableData = true
				mu.Unlock()
			}
			return nil
		})
	}
	if err := par.Wait(); err != nil {
		return err
	}

	if hasIndexableData && !opt.NoIndex {
		if GlobalOpt.Verbose {
			log.Printf("# Building indexes")
		}
		switch s := stor.(type) {
		case store.RepoIndexer:
			if err := s.Index(opt.CommitID); err != nil {
				return fmt.Errorf("Error indexing commit %s: %s", opt.CommitID, err)
			}
		case store.MultiRepoIndexer:
			if err := s.Index(opt.Repo, opt.CommitID); err != nil {
				return fmt.Errorf("error indexing %s@%s: %s", opt.Repo, opt.CommitID, err)
			}
		}
	}

	return nil
}
Beispiel #20
0
func (c *SimpleRepoCmd) Execute(args []string) error {
	if err := c.validate(); err != nil {
		return err
	}

	if err := removeGlob(".srclib-*"); err != nil {
		return err
	}

	units := make([]*unit.SourceUnit, 0)
	unitNames := hierarchicalNames("u", "unit", "", c.NUnits)
	for _, unitName := range unitNames {
		units = append(units, &unit.SourceUnit{
			Name:     fmt.Sprintf(unitName),
			Type:     "GoPackage",
			Repo:     c.Repo,
			CommitID: c.CommitID,
			Files:    []string{},
			Dir:      unitName,
		})
	}

	if c.GenSource {
		if err := resetSource(); err != nil {
			return err
		}

		// generate source files
		par := parallel.NewRun(runtime.GOMAXPROCS(0))
		for _, ut_ := range units {
			ut := ut_
			par.Do(func() error { return c.genUnit(ut) })
		}
		if err := par.Wait(); err != nil {
			return err
		}

		// get commit ID
		commitID, err := getGitCommitID()
		if err != nil {
			return err
		}

		// update command to generate graph data
		c.CommitID = commitID
		c.GenSource = false
	}

	// generate graph data
	par := parallel.NewRun(runtime.GOMAXPROCS(0))
	for _, ut_ := range units {
		ut := ut_
		ut.CommitID = c.CommitID
		par.Do(func() error { return c.genUnit(ut) })
	}
	if err := par.Wait(); err != nil {
		return err
	}

	return nil
}
Beispiel #21
0
func TestIntegration(t *testing.T) {
	if testing.Short() {
		t.Skip("long-running integration test")
	}

	repoInfos := getUserRepos(t, *repoOwner)

	type nodeInfo struct {
		cmd     *exec.Cmd
		baseURL string
	}
	nodes := map[string]*nodeInfo{}

	repos := map[repoInfo]vcs.Repository{}
	var reposMu sync.Mutex

	if *etcdDebugLog {
		etcd_client.SetLogger(log.New(os.Stderr, "etcd: ", 0))
	}

	withEtcd(t, func(etcdConfig *config.Config, ec *etcd_client.Client) {
		defer func() {
			if *waitBeforeExit {
				log.Printf("\n\nTest run ended. Ctrl-C to exit.")
				select {}
			}
		}()

		b := datad.NewEtcdBackend("/datad/vcs", ec)
		cc := NewClient(datad.NewClient(b), nil)

		if err := exec.Command("go", "install", "sourcegraph.com/sourcegraph/vcsstore/cmd/vcsstore").Run(); err != nil {
			t.Fatal(err)
		}

		killNode := func(name string, ni *nodeInfo) {
			if ni != nil && ni.cmd != nil {
				ni.cmd.Process.Kill()
				ni.cmd = nil
				delete(nodes, name)
			}
		}

		// Start the nodes and vcsstore servers.
		for i := 0; i < *numNodes; i++ {
			n := 6000 + i
			nodeName := fmt.Sprintf("127.0.0.1:%d", n)
			storageDir := fmt.Sprintf("/tmp/test-vcsstore%d", n)
			cmd := exec.Command("vcsstore", "-v", "-etcd="+etcdConfig.Addr, "-s="+storageDir, "serve", "-datad", "-d", fmt.Sprintf("-http=:%d", n), "-datad-node-name="+nodeName)
			nodes[nodeName] = &nodeInfo{cmd: cmd, baseURL: "http://" + nodeName}
			cmd.Stdout, cmd.Stderr = os.Stderr, os.Stderr
			err := cmd.Start()
			if err != nil {
				t.Fatalf("error starting %v: %s", cmd.Args, err)
			}
			log.Printf("Launched node %s with storage dir %s (%v).", nodeName, storageDir, cmd.Args)
			defer func() {
				killNode(nodeName, nodes[nodeName])
			}()
		}

		// Wait for servers.
		time.Sleep(400 * time.Millisecond)

		// Clone the repositories.
		cloneStart := time.Now()
		var wg sync.WaitGroup
		for _, ri_ := range repoInfos {
			ri := ri_
			wg.Add(1)
			go func() {
				defer wg.Done()
				log.Printf("cloning %v...", ri)
				repo, err := cc.Repository(ri.vcsType, mustParseURL(ri.cloneURL))
				if err != nil {
					t.Errorf("clone %v failed: %s", ri, err)
					return
				}
				err = repo.(vcsclient.RepositoryCloneUpdater).CloneOrUpdate(vcs.RemoteOpts{})
				if err != nil {
					t.Errorf("remote clone %v failed: %s", ri, err)
					return
				}

				reposMu.Lock()
				defer reposMu.Unlock()
				repos[ri] = repo
			}()
		}
		wg.Wait()
		t.Logf("Cloned %d repositories in %s.", len(repoInfos), time.Since(cloneStart))

		performRepoOps := func() error {
			par := parallel.NewRun(1) // keep at 1, libgit2 has concurrency segfaults :(
			// Perform some operations on the repos.
			for ri_, repo_ := range repos {
				par.Do(func() error {
					ri, repo := ri_, repo_
					commitID, err := repo.ResolveBranch("master")
					if err != nil {
						return fmt.Errorf("repo %v: resolve branch 'master' failed: %s", ri, err)
					}
					commits, _, err := repo.Commits(vcs.CommitsOptions{Head: commitID})
					if err != nil {
						return fmt.Errorf("repo %v: commit log of 'master' failed: %s", ri, err)
					}
					if len(commits) == 0 {
						return fmt.Errorf("repo %v: commit log has 0 entries", ri)
					}
					fs, err := repo.FileSystem(commitID)
					if err != nil {
						return fmt.Errorf("repo %v: filesystem at 'master' failed: %s", ri, err)
					}
					entries, err := fs.ReadDir("/")
					if err != nil {
						return fmt.Errorf("repo %v: readdir '/' at 'master' failed: %s", ri, err)
					}
					_ = entries
					return nil
				})
			}
			return par.Wait()
		}

		opsStart := time.Now()
		err := performRepoOps()
		if err != nil {
			t.Fatalf("before killing any nodes, got error in repo ops: %s", err)
		}
		log.Printf("\n\n\nPerformed various operations on %d repositories in %s.\n\n\n", len(repos), time.Since(opsStart))

		if *numNodes <= 1 {
			t.Fatal("can't test cluster resilience with only 1 node")
		}

		// Kill half of all nodes to test resilience.
		for i := 0; i < *numNodes/2; i++ {
			for name, ni := range nodes {
				reg := datad.NewRegistry(b)
				regKeys, err := reg.KeysForNode(name)
				if err != nil {
					t.Fatal(err)
				}
				killNode(name, ni)
				log.Printf("\n\n\nKilled node %s. Before killing, it had registered keys: %v. Expect to see failures related to these keys in the next set of operations we perform.\n\n\n", name, regKeys)
				break
			}
		}
		time.Sleep(time.Millisecond * 300)
		killedTime := time.Now()

		// After killing nodes, run the same set of operations. We expect some
		// KeyTransportErrors here because the cluster detects that some nodes
		// are down but hasn't had time yet to fetch the data to other nodes.
		//
		// Keep running the operations until we get no more errors.

		log.Printf("\n\n\nAfter killing some nodes, we're going to keep performing VCS ops on the cluster until it fully heals itself and we see no more errors. (We expect some errors until it heals itself.)\n\n\n")

		try := 0
		for {
			err := performRepoOps()
			if err != nil {
				log.Printf("\n\n\nTry #%d (%s): got error %+v\n\n\n", try, time.Since(killedTime), err)
				try++
				time.Sleep(2000 * time.Millisecond)
				continue
			}

			log.Printf("\n\n\nTry #%d: SUCCESS. The cluster healed itself after %s.\n\n\n", try, time.Since(killedTime))
			break
		}
	})
}
Beispiel #22
0
func (s *indexedTreeStore) buildIndexes(xs map[string]Index, units []*unit.SourceUnit, unitRefIndexes map[unit.ID2]*defRefsIndex, unitDefQueryIndexes map[unit.ID2]*defQueryIndex) error {
	// TODO(sqs): there's a race condition here if multiple imports
	// are running concurrently, they could clobber each other's
	// indexes. (S3 is eventually consistent.)

	var getUnitsErr error
	var getUnitsOnce sync.Once
	getUnits := func() ([]*unit.SourceUnit, error) {
		getUnitsOnce.Do(func() {
			if getUnitsErr == nil && units == nil {
				units, getUnitsErr = s.fsTreeStore.Units()
			}
			if units == nil {
				units = []*unit.SourceUnit{}
			}
		})
		return units, getUnitsErr
	}

	var getUnitRefIndexesErr error
	var getUnitRefIndexesOnce sync.Once
	var unitRefIndexesLock sync.Mutex
	getUnitRefIndexes := func() (map[unit.ID2]*defRefsIndex, error) {
		getUnitRefIndexesOnce.Do(func() {
			if getUnitRefIndexesErr == nil && unitRefIndexes == nil {
				// Read in the defRefsIndex for all source units.
				units, err := getUnits()
				if err != nil {
					getUnitRefIndexesErr = err
					return
				}

				// Use openUnitStore on the list from getUnits so we
				// don't need to traverse the FS tree to enumerate all
				// the source units again (which is slow).
				uss := make(map[unit.ID2]UnitStore, len(units))
				for _, u := range units {
					uss[u.ID2()] = s.fsTreeStore.openUnitStore(u.ID2())
				}

				unitRefIndexes = make(map[unit.ID2]*defRefsIndex, len(units))
				par := parallel.NewRun(runtime.GOMAXPROCS(0))
				for u_, us_ := range uss {
					u := u_
					us, ok := us_.(*indexedUnitStore)
					if !ok {
						continue
					}

					par.Do(func() error {
						x := us.indexes[defToRefsIndexName]
						if err := prepareIndex(us.fs, defToRefsIndexName, x); err != nil {
							return err
						}
						unitRefIndexesLock.Lock()
						defer unitRefIndexesLock.Unlock()
						unitRefIndexes[u] = x.(*defRefsIndex)
						return nil
					})
				}
				getUnitRefIndexesErr = par.Wait()
			}
			if unitRefIndexes == nil {
				unitRefIndexes = map[unit.ID2]*defRefsIndex{}
			}
		})
		return unitRefIndexes, getUnitRefIndexesErr
	}

	var getUnitDefQueryIndexesErr error
	var getUnitDefQueryIndexesOnce sync.Once
	var unitDefQueryIndexesLock sync.Mutex
	getUnitDefQueryIndexes := func() (map[unit.ID2]*defQueryIndex, error) {
		getUnitDefQueryIndexesOnce.Do(func() {
			if getUnitDefQueryIndexesErr == nil && unitDefQueryIndexes == nil {
				// Read in the defQueryIndex for all source units.
				units, err := getUnits()
				if err != nil {
					getUnitDefQueryIndexesErr = err
					return
				}

				// Use openUnitStore on the list from getUnits so we
				// don't need to traverse the FS tree to enumerate all
				// the source units again (which is slow).
				uss := make(map[unit.ID2]UnitStore, len(units))
				for _, u := range units {
					uss[u.ID2()] = s.fsTreeStore.openUnitStore(u.ID2())
				}

				unitDefQueryIndexes = make(map[unit.ID2]*defQueryIndex, len(units))
				par := parallel.NewRun(len(units))
				for u_, us_ := range uss {
					u := u_
					us, ok := us_.(*indexedUnitStore)
					if !ok {
						continue
					}

					par.Do(func() error {
						x := us.indexes[defQueryIndexName]
						if err := prepareIndex(us.fs, defQueryIndexName, x); err != nil {
							return err
						}
						unitDefQueryIndexesLock.Lock()
						defer unitDefQueryIndexesLock.Unlock()
						unitDefQueryIndexes[u] = x.(*defQueryIndex)
						return nil
					})
				}
				getUnitDefQueryIndexesErr = par.Wait()
			}
			if unitDefQueryIndexes == nil {
				unitDefQueryIndexes = map[unit.ID2]*defQueryIndex{}
			}
		})
		return unitDefQueryIndexes, getUnitDefQueryIndexesErr
	}

	par := parallel.NewRun(len(xs))
	for name_, x_ := range xs {
		name, x := name_, x_
		par.Do(func() error {
			switch x := x.(type) {
			case unitIndexBuilder:
				units, err := getUnits()
				if err != nil {
					return err
				}
				if err := x.Build(units); err != nil {
					return err
				}
			case unitRefIndexBuilder:
				unitRefIndexes, err := getUnitRefIndexes()
				if err != nil {
					return err
				}
				if err := x.Build(unitRefIndexes); err != nil {
					return err
				}
			case defQueryTreeIndexBuilder:
				unitDefQueryIndexes, err := getUnitDefQueryIndexes()
				if err != nil {
					return err
				}
				if err := x.Build(unitDefQueryIndexes); err != nil {
					return err
				}
			default:
				return fmt.Errorf("don't know how to build index %q of type %T", name, x)
			}
			if x, ok := x.(persistedIndex); ok {
				if err := writeIndex(s.fs, name, x); err != nil {
					return err
				}
			}
			return nil
		})
	}
	return par.Wait()
}
Beispiel #23
0
// listIndexes lists indexes in s (a store) asynchronously, sending
// status objects to ch. If f != nil, it is called to set/modify
// fields on each status object before the IndexStatus object is sent to
// the channel.
func listIndexes(s interface{}, c IndexCriteria, ch chan<- IndexStatus, f func(*IndexStatus)) error {
	switch s := s.(type) {
	case indexedStore:
		xx := s.Indexes()
		var waitingOnChildren []IndexStatus
		for name, x := range xx {
			st := IndexStatus{
				Name:  name,
				Type:  strings.TrimPrefix(reflect.TypeOf(x).String(), "*store."),
				index: x,
				store: s,
			}

			if !strings.Contains(st.Name, c.Name) {
				continue
			}
			if !strings.Contains(st.Type, c.Type) {
				continue
			}

			fi, err := s.statIndex(name)
			if os.IsNotExist(err) {
				st.Stale = true
			} else if err != nil {
				st.Error = err.Error()
			} else {
				st.Size = fi.Size()
			}

			switch x.(type) {
			case unitRefIndexBuilder, defQueryTreeIndexBuilder:
				st.DependsOnChildren = true
			}

			if c.Stale != nil && st.Stale != *c.Stale {
				continue
			}

			if f != nil {
				f(&st)
			}

			if c.Unit != nil && c.Unit != NoSourceUnit {
				if st.Unit == nil || *c.Unit != *st.Unit {
					continue
				}
			}

			if st.DependsOnChildren {
				waitingOnChildren = append(waitingOnChildren, st)
			} else {
				ch <- st
			}
		}

		switch s := s.(type) {
		case *indexedTreeStore:
			if err := listIndexes(s.fsTreeStore, c, ch, f); err != nil {
				return err
			}
		case *indexedUnitStore:
			if err := listIndexes(s.fsUnitStore, c, ch, f); err != nil {
				return err
			}
		}

		for _, si := range waitingOnChildren {
			ch <- si
		}

	case repoStoreOpener:
		var rss map[string]RepoStore
		if c.Repo == "" {
			var err error
			rss, err = s.openAllRepoStores()
			if err != nil && !isStoreNotExist(err) {
				return err
			}
		} else {
			rss = map[string]RepoStore{c.Repo: s.openRepoStore(c.Repo)}
		}

		// Sort repos for determinism.
		repos := make([]string, 0, len(rss))
		for repo := range rss {
			repos = append(repos, repo)
		}
		sort.Strings(repos)

		if c.ReposOffset != 0 {
			if c.ReposOffset < len(repos) {
				repos = repos[c.ReposOffset:]
			} else {
				log.Printf("Warning: A ReposOffset (%d) was specified that equals or exceeds the total number of repos (%d).", c.ReposOffset, len(repos))
			}
		}
		if c.ReposLimit != 0 && c.ReposLimit < len(repos) {
			repos = repos[:c.ReposLimit]
		}

		for _, repo := range repos {
			rs := rss[repo]
			err := listIndexes(rs, c, ch, func(x *IndexStatus) {
				x.Repo = repo
				if f != nil {
					f(x)
				}
			})
			if err != nil {
				return err
			}
		}

	case treeStoreOpener:
		var tss map[string]TreeStore
		if c.CommitID == "" {
			var err error
			tss, err = s.openAllTreeStores()
			if err != nil && !isStoreNotExist(err) {
				return err
			}
		} else {
			tss = map[string]TreeStore{c.CommitID: s.openTreeStore(c.CommitID)}
		}
		for commitID, ts := range tss {
			err := listIndexes(ts, c, ch, func(x *IndexStatus) {
				x.CommitID = commitID
				if f != nil {
					f(x)
				}
			})
			if err != nil {
				return err
			}
		}

	case unitStoreOpener:
		if c.Unit == NoSourceUnit {
			return nil
		}
		var uss map[unit.ID2]UnitStore
		if c.Unit == nil {
			var err error
			uss, err = s.openAllUnitStores()
			if err != nil && !isStoreNotExist(err) {
				return err
			}
		} else {
			uss = map[unit.ID2]UnitStore{*c.Unit: s.openUnitStore(*c.Unit)}
		}
		if len(uss) > 0 {
			par := parallel.NewRun(MaxIndexParallel)
			for unit_, us_ := range uss {
				unit, us := unit_, us_
				par.Do(func() error {
					unitCopy := unit
					return listIndexes(us, c, ch, func(x *IndexStatus) {
						x.Unit = &unitCopy
						if f != nil {
							f(x)
						}
					})
				})
			}
			if err := par.Wait(); err != nil {
				return err
			}
		}

	}
	return nil
}
Beispiel #24
0
func (c *URefsRepoCmd) Execute(args []string) error {
	if err := c.validate(); err != nil {
		return err
	}

	if err := removeGlob(".srclib-*"); err != nil {
		return err
	}

	units := make([]*unit.SourceUnit, 0)
	unitNames := hierarchicalNames("u", "unit", "", c.NUnits)
	for _, unitName := range unitNames {
		ut := &unit.SourceUnit{
			Name:     fmt.Sprintf(unitName),
			Type:     "GoPackage",
			Repo:     c.Repo,
			CommitID: c.CommitID,
			Files:    []string{},
			Dir:      unitName,
		}
		units = append(units, ut)
	}

	if c.GenSource {
		if err := resetSource(); err != nil {
			return err
		}

		// generate source files
		par := parallel.NewRun(runtime.GOMAXPROCS(0))
		for _, ut := range units {
			ut := ut
			par.Do(func() error {
				_, _, _, err := c.genUnit(ut, units)
				return err
			})
		}
		if err := par.Wait(); err != nil {
			return err
		}

		// get commit ID
		commitID, err := getGitCommitID()
		if err != nil {
			return err
		}

		// update command to generate graph data
		c.CommitID = commitID
		c.GenSource = false
	}

	// generate graph data
	gr := make(map[string]*graph.Output)
	for _, ut := range units {
		gr[ut.Name] = &graph.Output{}
	}
	var grmu sync.Mutex
	par := parallel.NewRun(runtime.GOMAXPROCS(0))
	for _, ut := range units {
		ut := ut
		ut.CommitID = c.CommitID
		par.Do(func() error {
			defs, refs, reffiles, err := c.genUnit(ut, units)
			if err != nil {
				return err
			}

			grmu.Lock()
			defer grmu.Unlock()

			gr[ut.Name].Defs = append(gr[ut.Name].Defs, defs...)
			for utName, utRefs := range refs {
				gr[utName].Refs = append(gr[utName].Refs, utRefs...)
			}
			for _, ut2 := range units {
				ut2.Files = append(ut2.Files, reffiles[ut2.Name]...)
			}
			return nil
		})
	}
	if err := par.Wait(); err != nil {
		return err
	}
	for _, ut := range units {
		utgraph := gr[ut.Name]
		utgraph.Docs = make([]*graph.Doc, 0)
		if err := writeSrclibCache(ut, utgraph, make([]*dep.Resolution, 0)); err != nil {
			return err
		}
	}

	return nil
}
Beispiel #25
0
func (s *indexedUnitStore) buildIndexes(xs map[string]Index, data *graph.Output, defOfs byteOffsets, refFBRs fileByteRanges, refOfs byteOffsets) error {
	var defs []*graph.Def
	var refs []*graph.Ref
	if data != nil {
		// Allow us to distinguish between empty (empty slice) and not-yet-fetched (nil).
		defs = data.Defs
		if defs == nil {
			defs = []*graph.Def{}
		}
		refs = data.Refs
		if refs == nil {
			refs = []*graph.Ref{}
		}
	}

	var getDefsErr error
	var getDefsOnce sync.Once
	getDefs := func() ([]*graph.Def, byteOffsets, error) {
		getDefsOnce.Do(func() {
			// Don't refetch if passed in as arg or if getData was
			// already called.
			if defs == nil {
				defs, defOfs, getDefsErr = s.fsUnitStore.readDefs()
			}
			if defs == nil {
				defs = []*graph.Def{}
			}
		})
		return defs, defOfs, getDefsErr
	}

	var getRefsErr error
	var getRefsOnce sync.Once
	getRefs := func() ([]*graph.Ref, fileByteRanges, byteOffsets, error) {
		getRefsOnce.Do(func() {
			// Don't refetch if passed in as arg or if getData was
			// already called.
			if refs == nil {
				refs, refFBRs, refOfs, getRefsErr = s.fsUnitStore.readRefs()
			}
			if refs == nil {
				refs = []*graph.Ref{}
			}
		})
		return refs, refFBRs, refOfs, getRefsErr
	}

	par := parallel.NewRun(len(xs))
	for name_, x_ := range xs {
		name, x := name_, x_
		par.Do(func() error {
			switch x := x.(type) {
			case defIndexBuilder:
				defs, defOfs, err := getDefs()
				if err != nil {
					return err
				}
				if err := x.Build(defs, defOfs); err != nil {
					return err
				}
			case refIndexBuilder:
				refs, refFBRs, refOfs, err := getRefs()
				if err != nil {
					return err
				}
				if err := x.Build(refs, refFBRs, refOfs); err != nil {
					return err
				}
			default:
				return fmt.Errorf("don't know how to build index %q of type %T", name, x)
			}
			if x, ok := x.(persistedIndex); ok {
				if err := writeIndex(s.fs, name, x); err != nil {
					return err
				}
			}
			return nil
		})
	}
	return par.Wait()
}