func (d *driver) StartCommit(repo *pfs.Repo, commitID string, parentID string, branch string, started *google_protobuf.Timestamp, provenance []*pfs.Commit, shards map[uint64]bool) error { d.lock.Lock() defer d.lock.Unlock() // make sure that the parent commit exists if parentID != "" { _, err := d.inspectCommit(client.NewCommit(repo.Name, parentID), shards) if err != nil { return err } } for shard := range shards { if len(provenance) != 0 { diffInfo, ok := d.diffs.get(client.NewDiff(repo.Name, "", shard)) if !ok { return pfsserver.NewErrRepoNotFound(repo.Name) } provRepos := repoSetFromCommits(diffInfo.Provenance) for _, provCommit := range provenance { if !provRepos[provCommit.Repo.Name] { return fmt.Errorf("cannot use %s/%s as provenance, %s is not provenance of %s", provCommit.Repo.Name, provCommit.ID, provCommit.Repo.Name, repo.Name) } } } diffInfo := &pfs.DiffInfo{ Diff: client.NewDiff(repo.Name, commitID, shard), Started: started, Appends: make(map[string]*pfs.Append), Branch: branch, Provenance: provenance, } if branch != "" { parentCommit, err := d.branchParent(client.NewCommit(repo.Name, commitID), branch) if err != nil { return err } if parentCommit != nil && parentID != "" { return fmt.Errorf("branch %s already exists as %s, can't create with %s as parent", branch, parentCommit.ID, parentID) } diffInfo.ParentCommit = parentCommit } if diffInfo.ParentCommit == nil && parentID != "" { diffInfo.ParentCommit = client.NewCommit(repo.Name, parentID) } if err := d.insertDiffInfo(diffInfo); err != nil { return err } } d.commitConds[commitID] = sync.NewCond(&d.lock) return nil }
// branchParent finds the parent that should be used for a new commit being started on a branch func (d *driver) branchParent(commit *pfs.Commit, branch string) (*pfs.Commit, error) { // canonicalCommit is the head of branch canonicalCommit, err := d.canonicalCommit(client.NewCommit(commit.Repo.Name, branch)) if err != nil { return nil, err } if canonicalCommit.ID == branch { // first commit on this branch, return nil return nil, nil } if canonicalCommit.ID == commit.ID { // this commit is the head of branch // that's because this isn't the first shard of this commit we've seen for _, commitToDiffInfo := range d.diffs[commit.Repo.Name] { if diffInfo, ok := commitToDiffInfo[commit.ID]; ok { return diffInfo.ParentCommit, nil } } // reaching this code means that canonicalCommit resolved the branch to // a commit we've never seen (on any shard) which indicates a bug // elsewhere return nil, fmt.Errorf("unreachable") } return canonicalCommit, nil }
func testBlock(t *testing.T, apiServer persist.APIServer) { jobInfo, err := apiServer.CreateJobInfo(context.Background(), &persist.JobInfo{ JobID: uuid.NewWithoutDashes(), }) require.NoError(t, err) jobID := jobInfo.JobID go func() { _, err := apiServer.CreateJobOutput( context.Background(), &persist.JobOutput{ JobID: jobID, OutputCommit: client.NewCommit("foo", "bar"), }) require.NoError(t, err) _, err = apiServer.CreateJobState( context.Background(), &persist.JobState{ JobID: jobID, State: ppsclient.JobState_JOB_SUCCESS, }) require.NoError(t, err) }() _, err = apiServer.InspectJob( context.Background(), &ppsclient.InspectJobRequest{ Job: &ppsclient.Job{ID: jobID}, BlockState: true, }, ) require.NoError(t, err) }
// canonicalCommit finds the canonical way of referring to a commit func (d *driver) canonicalCommit(commit *pfs.Commit) (*pfs.Commit, error) { if _, ok := d.branches[commit.Repo.Name]; !ok { return nil, pfsserver.NewErrRepoNotFound(commit.Repo.Name) } if commitID, ok := d.branches[commit.Repo.Name][commit.ID]; ok { return client.NewCommit(commit.Repo.Name, commitID), nil } return commit, nil }
func (d *driver) CreateRepo(repo *pfs.Repo, created *google_protobuf.Timestamp, provenance []*pfs.Repo, shards map[uint64]bool) error { d.lock.Lock() defer d.lock.Unlock() if _, ok := d.diffs[repo.Name]; ok { return fmt.Errorf("repo %s exists", repo.Name) } if err := validateRepoName(repo.Name); err != nil { return err } for _, provRepo := range provenance { if _, err := d.inspectRepo(provRepo, shards); err != nil { return nil } } d.createRepoState(repo) blockClient, err := d.getBlockClient() if err != nil { return err } var wg sync.WaitGroup errCh := make(chan error, 1) for shard := range shards { wg.Add(1) diffInfo := &pfs.DiffInfo{ Diff: client.NewDiff(repo.Name, "", shard), Finished: created, } for _, provRepo := range provenance { diffInfo.Provenance = append(diffInfo.Provenance, client.NewCommit(provRepo.Name, "")) } if err := d.diffs.insert(diffInfo); err != nil { return err } go func() { defer wg.Done() if _, err := blockClient.CreateDiff(context.Background(), diffInfo); err != nil { select { case errCh <- err: default: } return } }() } wg.Wait() select { case err := <-errCh: return err default: } return nil }
// fullRepoProvenance recursively computes the provenance of a repo func (d *driver) fullRepoProvenance(repo *pfs.Repo, shards map[uint64]bool) ([]*pfs.Repo, error) { provCommits, err := d.fullCommitProvenance(client.NewCommit(repo.Name, ""), make(map[string]bool), shards) if err != nil { return nil, err } var result []*pfs.Repo for _, provCommit := range provCommits { result = append(result, provCommit.Repo) } return result, nil }
func (d *driver) ListBranch(repo *pfs.Repo, shards map[uint64]bool) ([]*pfs.CommitInfo, error) { var result []*pfs.CommitInfo _, ok := d.branches[repo.Name] if !ok { return nil, pfsserver.NewErrRepoNotFound(repo.Name) } for commitID := range d.branches[repo.Name] { commitInfo, err := d.inspectCommit(client.NewCommit(repo.Name, commitID), shards) if err != nil { return nil, err } result = append(result, commitInfo) } return result, nil }
func parseCommitMounts(args []string) []*fuse.CommitMount { var result []*fuse.CommitMount for _, arg := range args { commitMount := &fuse.CommitMount{Commit: client.NewCommit("", "")} repo, commitAlias := path.Split(arg) commitMount.Commit.Repo.Name = path.Clean(repo) split := strings.Split(commitAlias, ":") if len(split) > 0 { commitMount.Commit.ID = split[0] } if len(split) > 1 { commitMount.Alias = split[1] } result = append(result, commitMount) } return result }
func (a *internalAPIServer) StartCommit(ctx context.Context, request *pfs.StartCommitRequest) (response *google_protobuf.Empty, retErr error) { defer func(start time.Time) { a.Log(request, response, retErr, time.Since(start)) }(time.Now()) version, err := a.getVersion(ctx) if err != nil { return nil, err } shards, err := a.router.GetShards(version) if err != nil { return nil, err } if err := a.driver.StartCommit(request.Repo, request.ID, request.ParentID, request.Branch, request.Started, request.Provenance, shards); err != nil { return nil, err } if err := a.pulseCommitWaiters(client.NewCommit(request.Repo.Name, request.ID), pfs.CommitType_COMMIT_TYPE_WRITE, shards); err != nil { return nil, err } return google_protobuf.EmptyInstance, nil }
func (f *filesystem) getCommitMount(nameOrAlias string) *CommitMount { if len(f.CommitMounts) == 0 { return &CommitMount{ Commit: client.NewCommit(nameOrAlias, ""), Shard: f.Shard, } } // We prefer alias matching over repo name matching, since there can be // two commit mounts with the same repo but different aliases, such as // "out" and "prev" for _, commitMount := range f.CommitMounts { if commitMount.Alias == nameOrAlias { return commitMount } } for _, commitMount := range f.CommitMounts { if commitMount.Commit.Repo.Name == nameOrAlias { return commitMount } } return nil }
func (a *apiServer) runPipeline(pipelineInfo *ppsclient.PipelineInfo) error { ctx, cancel := context.WithCancel(context.Background()) returnNil := func() bool { a.cancelFuncsLock.Lock() defer a.cancelFuncsLock.Unlock() if _, ok := a.cancelFuncs[pipelineInfo.Pipeline.Name]; ok { // The pipeline is already being run return true } if len(pipelineInfo.Inputs) == 0 { // this pipeline does not have inputs; there is nothing to be done return true } a.cancelFuncs[pipelineInfo.Pipeline.Name] = cancel return false }() if returnNil { return nil } persistClient, err := a.getPersistClient() if err != nil { return err } _, err = persistClient.UpdatePipelineState(ctx, &persist.UpdatePipelineStateRequest{ PipelineName: pipelineInfo.Pipeline.Name, State: ppsclient.PipelineState_PIPELINE_RUNNING, }) if err != nil { return err } repoToLeaves := make(map[string]map[string]bool) rawInputRepos, err := a.rawInputs(ctx, pipelineInfo) if err != nil { return err } for _, repo := range rawInputRepos { repoToLeaves[repo.Name] = make(map[string]bool) } pfsAPIClient, err := a.getPfsClient() if err != nil { return err } for { var fromCommits []*pfsclient.Commit for repo, leaves := range repoToLeaves { for leaf := range leaves { fromCommits = append( fromCommits, &pfsclient.Commit{ Repo: &pfsclient.Repo{Name: repo}, ID: leaf, }) } } listCommitRequest := &pfsclient.ListCommitRequest{ Repo: rawInputRepos, CommitType: pfsclient.CommitType_COMMIT_TYPE_READ, FromCommit: fromCommits, Block: true, } commitInfos, err := pfsAPIClient.ListCommit(ctx, listCommitRequest) if err != nil { return err } for _, commitInfo := range commitInfos.CommitInfo { repoToLeaves[commitInfo.Commit.Repo.Name][commitInfo.Commit.ID] = true if commitInfo.ParentCommit != nil { delete(repoToLeaves[commitInfo.ParentCommit.Repo.Name], commitInfo.ParentCommit.ID) } // generate all the permutations of leaves we could use this commit with commitSets := [][]*pfsclient.Commit{[]*pfsclient.Commit{}} for repoName, leaves := range repoToLeaves { if repoName == commitInfo.Commit.Repo.Name { continue } var newCommitSets [][]*pfsclient.Commit for _, commitSet := range commitSets { for leaf := range leaves { newCommitSet := make([]*pfsclient.Commit, len(commitSet)+1) copy(newCommitSet, commitSet) newCommitSet[len(commitSet)] = client.NewCommit(repoName, leaf) newCommitSets = append(newCommitSets, newCommitSet) } } commitSets = newCommitSets } for _, commitSet := range commitSets { // + 1 as the commitSet doesn't contain the commit we just got if len(commitSet)+1 < len(rawInputRepos) { continue } trueInputs, err := a.trueInputs(ctx, append(commitSet, commitInfo.Commit), pipelineInfo) if err != nil { return err } var parentJob *ppsclient.Job if commitInfo.ParentCommit != nil { parentJob, err = a.parentJob(ctx, trueInputs, commitSet, commitInfo.ParentCommit, pipelineInfo) if err != nil { return err } } _, err = a.CreateJob( ctx, &ppsclient.CreateJobRequest{ Transform: pipelineInfo.Transform, Pipeline: pipelineInfo.Pipeline, Parallelism: pipelineInfo.Parallelism, Inputs: trueInputs, ParentJob: parentJob, }, ) _, ok := err.(ErrEmptyInput) if err != nil && !ok { return err } } } } }
func testBasicRethink(t *testing.T, apiServer persist.APIServer) { _, err := apiServer.CreatePipelineInfo( context.Background(), &persist.PipelineInfo{ PipelineName: "foo", }, ) require.NoError(t, err) pipelineInfo, err := apiServer.GetPipelineInfo( context.Background(), &ppsclient.Pipeline{Name: "foo"}, ) require.NoError(t, err) require.Equal(t, pipelineInfo.PipelineName, "foo") input := &ppsclient.JobInput{Commit: client.NewCommit("bar", uuid.NewWithoutDashes())} jobInfo, err := apiServer.CreateJobInfo( context.Background(), &persist.JobInfo{ JobID: uuid.NewWithoutDashes(), PipelineName: "foo", Inputs: []*ppsclient.JobInput{input}, }, ) jobID := jobInfo.JobID input2 := &ppsclient.JobInput{Commit: client.NewCommit("fizz", uuid.NewWithoutDashes())} _, err = apiServer.CreateJobInfo( context.Background(), &persist.JobInfo{ JobID: uuid.NewWithoutDashes(), PipelineName: "buzz", Inputs: []*ppsclient.JobInput{input2}, }, ) require.NoError(t, err) jobInfo, err = apiServer.InspectJob( context.Background(), &ppsclient.InspectJobRequest{ Job: &ppsclient.Job{ ID: jobInfo.JobID, }, }, ) require.NoError(t, err) require.Equal(t, jobInfo.JobID, jobID) require.Equal(t, "foo", jobInfo.PipelineName) jobInfos, err := apiServer.ListJobInfos( context.Background(), &ppsclient.ListJobRequest{ Pipeline: &ppsclient.Pipeline{Name: "foo"}, }, ) require.NoError(t, err) require.Equal(t, len(jobInfos.JobInfo), 1) require.Equal(t, jobInfos.JobInfo[0].JobID, jobID) jobInfos, err = apiServer.ListJobInfos( context.Background(), &ppsclient.ListJobRequest{ InputCommit: []*pfsclient.Commit{input.Commit}, }, ) require.NoError(t, err) require.Equal(t, len(jobInfos.JobInfo), 1) require.Equal(t, jobInfos.JobInfo[0].JobID, jobID) jobInfos, err = apiServer.ListJobInfos( context.Background(), &ppsclient.ListJobRequest{ Pipeline: &ppsclient.Pipeline{Name: "foo"}, InputCommit: []*pfsclient.Commit{input.Commit}, }, ) require.NoError(t, err) require.Equal(t, len(jobInfos.JobInfo), 1) require.Equal(t, jobInfos.JobInfo[0].JobID, jobID) }