Exemple #1
0
func (d *driver) StartCommit(repo *pfs.Repo, commitID string, parentID string, branch string,
	started *google_protobuf.Timestamp, provenance []*pfs.Commit, shards map[uint64]bool) error {
	d.lock.Lock()
	defer d.lock.Unlock()

	// make sure that the parent commit exists
	if parentID != "" {
		_, err := d.inspectCommit(client.NewCommit(repo.Name, parentID), shards)
		if err != nil {
			return err
		}
	}

	for shard := range shards {
		if len(provenance) != 0 {
			diffInfo, ok := d.diffs.get(client.NewDiff(repo.Name, "", shard))
			if !ok {
				return pfsserver.NewErrRepoNotFound(repo.Name)
			}
			provRepos := repoSetFromCommits(diffInfo.Provenance)
			for _, provCommit := range provenance {
				if !provRepos[provCommit.Repo.Name] {
					return fmt.Errorf("cannot use %s/%s as provenance, %s is not provenance of %s",
						provCommit.Repo.Name, provCommit.ID, provCommit.Repo.Name, repo.Name)
				}
			}
		}
		diffInfo := &pfs.DiffInfo{
			Diff:       client.NewDiff(repo.Name, commitID, shard),
			Started:    started,
			Appends:    make(map[string]*pfs.Append),
			Branch:     branch,
			Provenance: provenance,
		}
		if branch != "" {
			parentCommit, err := d.branchParent(client.NewCommit(repo.Name, commitID), branch)
			if err != nil {
				return err
			}
			if parentCommit != nil && parentID != "" {
				return fmt.Errorf("branch %s already exists as %s, can't create with %s as parent",
					branch, parentCommit.ID, parentID)
			}
			diffInfo.ParentCommit = parentCommit
		}
		if diffInfo.ParentCommit == nil && parentID != "" {
			diffInfo.ParentCommit = client.NewCommit(repo.Name, parentID)
		}
		if err := d.insertDiffInfo(diffInfo); err != nil {
			return err
		}
	}
	d.commitConds[commitID] = sync.NewCond(&d.lock)
	return nil
}
Exemple #2
0
// branchParent finds the parent that should be used for a new commit being started on a branch
func (d *driver) branchParent(commit *pfs.Commit, branch string) (*pfs.Commit, error) {
	// canonicalCommit is the head of branch
	canonicalCommit, err := d.canonicalCommit(client.NewCommit(commit.Repo.Name, branch))
	if err != nil {
		return nil, err
	}
	if canonicalCommit.ID == branch {
		// first commit on this branch, return nil
		return nil, nil
	}
	if canonicalCommit.ID == commit.ID {
		// this commit is the head of branch
		// that's because this isn't the first shard of this commit we've seen
		for _, commitToDiffInfo := range d.diffs[commit.Repo.Name] {
			if diffInfo, ok := commitToDiffInfo[commit.ID]; ok {
				return diffInfo.ParentCommit, nil
			}
		}
		// reaching this code means that canonicalCommit resolved the branch to
		// a commit we've never seen (on any shard) which indicates a bug
		// elsewhere
		return nil, fmt.Errorf("unreachable")
	}
	return canonicalCommit, nil
}
func testBlock(t *testing.T, apiServer persist.APIServer) {
	jobInfo, err := apiServer.CreateJobInfo(context.Background(), &persist.JobInfo{
		JobID: uuid.NewWithoutDashes(),
	})
	require.NoError(t, err)
	jobID := jobInfo.JobID
	go func() {
		_, err := apiServer.CreateJobOutput(
			context.Background(),
			&persist.JobOutput{
				JobID:        jobID,
				OutputCommit: client.NewCommit("foo", "bar"),
			})
		require.NoError(t, err)
		_, err = apiServer.CreateJobState(
			context.Background(),
			&persist.JobState{
				JobID: jobID,
				State: ppsclient.JobState_JOB_SUCCESS,
			})
		require.NoError(t, err)
	}()
	_, err = apiServer.InspectJob(
		context.Background(),
		&ppsclient.InspectJobRequest{
			Job:        &ppsclient.Job{ID: jobID},
			BlockState: true,
		},
	)
	require.NoError(t, err)
}
Exemple #4
0
// canonicalCommit finds the canonical way of referring to a commit
func (d *driver) canonicalCommit(commit *pfs.Commit) (*pfs.Commit, error) {
	if _, ok := d.branches[commit.Repo.Name]; !ok {
		return nil, pfsserver.NewErrRepoNotFound(commit.Repo.Name)
	}
	if commitID, ok := d.branches[commit.Repo.Name][commit.ID]; ok {
		return client.NewCommit(commit.Repo.Name, commitID), nil
	}
	return commit, nil
}
Exemple #5
0
func (d *driver) CreateRepo(repo *pfs.Repo, created *google_protobuf.Timestamp,
	provenance []*pfs.Repo, shards map[uint64]bool) error {
	d.lock.Lock()
	defer d.lock.Unlock()
	if _, ok := d.diffs[repo.Name]; ok {
		return fmt.Errorf("repo %s exists", repo.Name)
	}
	if err := validateRepoName(repo.Name); err != nil {
		return err
	}
	for _, provRepo := range provenance {
		if _, err := d.inspectRepo(provRepo, shards); err != nil {
			return nil
		}
	}

	d.createRepoState(repo)

	blockClient, err := d.getBlockClient()
	if err != nil {
		return err
	}
	var wg sync.WaitGroup
	errCh := make(chan error, 1)
	for shard := range shards {
		wg.Add(1)
		diffInfo := &pfs.DiffInfo{
			Diff:     client.NewDiff(repo.Name, "", shard),
			Finished: created,
		}
		for _, provRepo := range provenance {
			diffInfo.Provenance = append(diffInfo.Provenance, client.NewCommit(provRepo.Name, ""))
		}
		if err := d.diffs.insert(diffInfo); err != nil {
			return err
		}
		go func() {
			defer wg.Done()
			if _, err := blockClient.CreateDiff(context.Background(), diffInfo); err != nil {
				select {
				case errCh <- err:
				default:
				}
				return
			}
		}()
	}
	wg.Wait()
	select {
	case err := <-errCh:
		return err
	default:
	}
	return nil
}
Exemple #6
0
// fullRepoProvenance recursively computes the provenance of a repo
func (d *driver) fullRepoProvenance(repo *pfs.Repo, shards map[uint64]bool) ([]*pfs.Repo, error) {
	provCommits, err := d.fullCommitProvenance(client.NewCommit(repo.Name, ""), make(map[string]bool), shards)
	if err != nil {
		return nil, err
	}
	var result []*pfs.Repo
	for _, provCommit := range provCommits {
		result = append(result, provCommit.Repo)
	}
	return result, nil
}
Exemple #7
0
func (d *driver) ListBranch(repo *pfs.Repo, shards map[uint64]bool) ([]*pfs.CommitInfo, error) {
	var result []*pfs.CommitInfo

	_, ok := d.branches[repo.Name]
	if !ok {
		return nil, pfsserver.NewErrRepoNotFound(repo.Name)
	}

	for commitID := range d.branches[repo.Name] {
		commitInfo, err := d.inspectCommit(client.NewCommit(repo.Name, commitID), shards)
		if err != nil {
			return nil, err
		}
		result = append(result, commitInfo)
	}
	return result, nil
}
Exemple #8
0
func parseCommitMounts(args []string) []*fuse.CommitMount {
	var result []*fuse.CommitMount
	for _, arg := range args {
		commitMount := &fuse.CommitMount{Commit: client.NewCommit("", "")}
		repo, commitAlias := path.Split(arg)
		commitMount.Commit.Repo.Name = path.Clean(repo)
		split := strings.Split(commitAlias, ":")
		if len(split) > 0 {
			commitMount.Commit.ID = split[0]
		}
		if len(split) > 1 {
			commitMount.Alias = split[1]
		}
		result = append(result, commitMount)
	}
	return result
}
func (a *internalAPIServer) StartCommit(ctx context.Context, request *pfs.StartCommitRequest) (response *google_protobuf.Empty, retErr error) {
	defer func(start time.Time) { a.Log(request, response, retErr, time.Since(start)) }(time.Now())
	version, err := a.getVersion(ctx)
	if err != nil {
		return nil, err
	}
	shards, err := a.router.GetShards(version)
	if err != nil {
		return nil, err
	}
	if err := a.driver.StartCommit(request.Repo, request.ID, request.ParentID,
		request.Branch, request.Started, request.Provenance, shards); err != nil {
		return nil, err
	}
	if err := a.pulseCommitWaiters(client.NewCommit(request.Repo.Name, request.ID), pfs.CommitType_COMMIT_TYPE_WRITE, shards); err != nil {
		return nil, err
	}
	return google_protobuf.EmptyInstance, nil
}
Exemple #10
0
func (f *filesystem) getCommitMount(nameOrAlias string) *CommitMount {
	if len(f.CommitMounts) == 0 {
		return &CommitMount{
			Commit: client.NewCommit(nameOrAlias, ""),
			Shard:  f.Shard,
		}
	}

	// We prefer alias matching over repo name matching, since there can be
	// two commit mounts with the same repo but different aliases, such as
	// "out" and "prev"
	for _, commitMount := range f.CommitMounts {
		if commitMount.Alias == nameOrAlias {
			return commitMount
		}
	}
	for _, commitMount := range f.CommitMounts {
		if commitMount.Commit.Repo.Name == nameOrAlias {
			return commitMount
		}
	}

	return nil
}
Exemple #11
0
func (a *apiServer) runPipeline(pipelineInfo *ppsclient.PipelineInfo) error {
	ctx, cancel := context.WithCancel(context.Background())
	returnNil := func() bool {
		a.cancelFuncsLock.Lock()
		defer a.cancelFuncsLock.Unlock()
		if _, ok := a.cancelFuncs[pipelineInfo.Pipeline.Name]; ok {
			// The pipeline is already being run
			return true
		}
		if len(pipelineInfo.Inputs) == 0 {
			// this pipeline does not have inputs; there is nothing to be done
			return true
		}

		a.cancelFuncs[pipelineInfo.Pipeline.Name] = cancel
		return false
	}()
	if returnNil {
		return nil
	}

	persistClient, err := a.getPersistClient()
	if err != nil {
		return err
	}
	_, err = persistClient.UpdatePipelineState(ctx, &persist.UpdatePipelineStateRequest{
		PipelineName: pipelineInfo.Pipeline.Name,
		State:        ppsclient.PipelineState_PIPELINE_RUNNING,
	})
	if err != nil {
		return err
	}

	repoToLeaves := make(map[string]map[string]bool)
	rawInputRepos, err := a.rawInputs(ctx, pipelineInfo)
	if err != nil {
		return err
	}
	for _, repo := range rawInputRepos {
		repoToLeaves[repo.Name] = make(map[string]bool)
	}
	pfsAPIClient, err := a.getPfsClient()
	if err != nil {
		return err
	}
	for {
		var fromCommits []*pfsclient.Commit
		for repo, leaves := range repoToLeaves {
			for leaf := range leaves {
				fromCommits = append(
					fromCommits,
					&pfsclient.Commit{
						Repo: &pfsclient.Repo{Name: repo},
						ID:   leaf,
					})
			}
		}
		listCommitRequest := &pfsclient.ListCommitRequest{
			Repo:       rawInputRepos,
			CommitType: pfsclient.CommitType_COMMIT_TYPE_READ,
			FromCommit: fromCommits,
			Block:      true,
		}
		commitInfos, err := pfsAPIClient.ListCommit(ctx, listCommitRequest)
		if err != nil {
			return err
		}
		for _, commitInfo := range commitInfos.CommitInfo {
			repoToLeaves[commitInfo.Commit.Repo.Name][commitInfo.Commit.ID] = true
			if commitInfo.ParentCommit != nil {
				delete(repoToLeaves[commitInfo.ParentCommit.Repo.Name], commitInfo.ParentCommit.ID)
			}
			// generate all the permutations of leaves we could use this commit with
			commitSets := [][]*pfsclient.Commit{[]*pfsclient.Commit{}}
			for repoName, leaves := range repoToLeaves {
				if repoName == commitInfo.Commit.Repo.Name {
					continue
				}
				var newCommitSets [][]*pfsclient.Commit
				for _, commitSet := range commitSets {
					for leaf := range leaves {
						newCommitSet := make([]*pfsclient.Commit, len(commitSet)+1)
						copy(newCommitSet, commitSet)
						newCommitSet[len(commitSet)] = client.NewCommit(repoName, leaf)
						newCommitSets = append(newCommitSets, newCommitSet)
					}
				}
				commitSets = newCommitSets
			}
			for _, commitSet := range commitSets {
				// + 1 as the commitSet doesn't contain the commit we just got
				if len(commitSet)+1 < len(rawInputRepos) {
					continue
				}
				trueInputs, err := a.trueInputs(ctx, append(commitSet, commitInfo.Commit), pipelineInfo)
				if err != nil {
					return err
				}
				var parentJob *ppsclient.Job
				if commitInfo.ParentCommit != nil {
					parentJob, err = a.parentJob(ctx, trueInputs, commitSet, commitInfo.ParentCommit, pipelineInfo)
					if err != nil {
						return err
					}
				}
				_, err = a.CreateJob(
					ctx,
					&ppsclient.CreateJobRequest{
						Transform:   pipelineInfo.Transform,
						Pipeline:    pipelineInfo.Pipeline,
						Parallelism: pipelineInfo.Parallelism,
						Inputs:      trueInputs,
						ParentJob:   parentJob,
					},
				)
				_, ok := err.(ErrEmptyInput)
				if err != nil && !ok {
					return err
				}
			}
		}
	}
}
func testBasicRethink(t *testing.T, apiServer persist.APIServer) {
	_, err := apiServer.CreatePipelineInfo(
		context.Background(),
		&persist.PipelineInfo{
			PipelineName: "foo",
		},
	)
	require.NoError(t, err)
	pipelineInfo, err := apiServer.GetPipelineInfo(
		context.Background(),
		&ppsclient.Pipeline{Name: "foo"},
	)
	require.NoError(t, err)
	require.Equal(t, pipelineInfo.PipelineName, "foo")
	input := &ppsclient.JobInput{Commit: client.NewCommit("bar", uuid.NewWithoutDashes())}
	jobInfo, err := apiServer.CreateJobInfo(
		context.Background(),
		&persist.JobInfo{
			JobID:        uuid.NewWithoutDashes(),
			PipelineName: "foo",
			Inputs:       []*ppsclient.JobInput{input},
		},
	)
	jobID := jobInfo.JobID
	input2 := &ppsclient.JobInput{Commit: client.NewCommit("fizz", uuid.NewWithoutDashes())}

	_, err = apiServer.CreateJobInfo(
		context.Background(),
		&persist.JobInfo{
			JobID:        uuid.NewWithoutDashes(),
			PipelineName: "buzz",
			Inputs:       []*ppsclient.JobInput{input2},
		},
	)
	require.NoError(t, err)
	jobInfo, err = apiServer.InspectJob(
		context.Background(),
		&ppsclient.InspectJobRequest{
			Job: &ppsclient.Job{
				ID: jobInfo.JobID,
			},
		},
	)
	require.NoError(t, err)
	require.Equal(t, jobInfo.JobID, jobID)
	require.Equal(t, "foo", jobInfo.PipelineName)
	jobInfos, err := apiServer.ListJobInfos(
		context.Background(),
		&ppsclient.ListJobRequest{
			Pipeline: &ppsclient.Pipeline{Name: "foo"},
		},
	)
	require.NoError(t, err)
	require.Equal(t, len(jobInfos.JobInfo), 1)
	require.Equal(t, jobInfos.JobInfo[0].JobID, jobID)
	jobInfos, err = apiServer.ListJobInfos(
		context.Background(),
		&ppsclient.ListJobRequest{
			InputCommit: []*pfsclient.Commit{input.Commit},
		},
	)
	require.NoError(t, err)
	require.Equal(t, len(jobInfos.JobInfo), 1)
	require.Equal(t, jobInfos.JobInfo[0].JobID, jobID)
	jobInfos, err = apiServer.ListJobInfos(
		context.Background(),
		&ppsclient.ListJobRequest{
			Pipeline:    &ppsclient.Pipeline{Name: "foo"},
			InputCommit: []*pfsclient.Commit{input.Commit},
		},
	)
	require.NoError(t, err)
	require.Equal(t, len(jobInfos.JobInfo), 1)
	require.Equal(t, jobInfos.JobInfo[0].JobID, jobID)
}