Esempio n. 1
0
func (a *apiServer) CreatePipeline(ctx context.Context, request *pps.CreatePipelineRequest) (response *google_protobuf.Empty, err error) {
	defer func(start time.Time) { a.Log(request, response, err, time.Since(start)) }(time.Now())
	if request.Pipeline == nil {
		return nil, fmt.Errorf("pachyderm.pps.pipelineserver: request.Pipeline cannot be nil")
	}
	persistPipelineInfo := &persist.PipelineInfo{
		PipelineName: request.Pipeline.Name,
		Transform:    request.Transform,
		Shards:       request.Shards,
		InputRepo:    request.InputRepo,
	}
	if _, err := a.persistAPIServer.CreatePipelineInfo(ctx, persistPipelineInfo); err != nil {
		return nil, err
	}
	repo := pps.PipelineRepo(request.Pipeline)
	if _, err := a.pfsAPIClient.CreateRepo(ctx, &pfs.CreateRepoRequest{Repo: repo}); err != nil {
		return nil, err
	}
	go func() {
		if err := a.runPipeline(persistPipelineInfoToPipelineInfo(persistPipelineInfo)); err != nil {
			protolog.Printf("pipeline errored: %s", err.Error())
		}
	}()
	return google_protobuf.EmptyInstance, nil
}
Esempio n. 2
0
func (a *apiServer) CreatePipeline(ctx context.Context, request *pps.CreatePipelineRequest) (response *google_protobuf.Empty, err error) {
	defer func(start time.Time) { a.Log(request, response, err, time.Since(start)) }(time.Now())
	if request.Pipeline == nil {
		return nil, fmt.Errorf("pachyderm.pps.pipelineserver: request.Pipeline cannot be nil")
	}
	repoSet := make(map[string]bool)
	for _, input := range request.Inputs {
		repoSet[input.Repo.Name] = true
	}
	if len(repoSet) < len(request.Inputs) {
		return nil, fmt.Errorf("pachyderm.pps.pipelineserver: duplicate input repos")
	}
	repo := pps.PipelineRepo(request.Pipeline)
	persistPipelineInfo := &persist.PipelineInfo{
		PipelineName: request.Pipeline.Name,
		Transform:    request.Transform,
		Shards:       request.Shards,
		Inputs:       request.Inputs,
		OutputRepo:   repo,
	}
	if _, err := a.persistAPIServer.CreatePipelineInfo(ctx, persistPipelineInfo); err != nil {
		return nil, err
	}
	if _, err := a.pfsAPIClient.CreateRepo(ctx, &pfs.CreateRepoRequest{Repo: repo}); err != nil {
		return nil, err
	}
	go func() {
		if err := a.runPipeline(newPipelineInfo(persistPipelineInfo)); err != nil {
			protolion.Printf("pipeline errored: %s", err.Error())
		}
	}()
	return google_protobuf.EmptyInstance, nil
}
Esempio n. 3
0
func TestPipeline(t *testing.T) {
	t.Parallel()
	pachClient := getPachClient(t)
	// create repos
	dataRepo := uniqueString("TestPipeline.data")
	require.NoError(t, pfsutil.CreateRepo(pachClient, dataRepo))
	// create pipeline
	pipelineName := uniqueString("pipeline")
	outRepo := pps.PipelineRepo(ppsutil.NewPipeline(pipelineName))
	require.NoError(t, ppsutil.CreatePipeline(
		pachClient,
		pipelineName,
		"",
		[]string{"cp", path.Join("/pfs", dataRepo, "file"), "/pfs/out/file"},
		nil,
		1,
		[]*pps.PipelineInput{{Repo: &pfs.Repo{Name: dataRepo}}},
	))
	// Do first commit to repo
	commit1, err := pfsutil.StartCommit(pachClient, dataRepo, "")
	require.NoError(t, err)
	_, err = pfsutil.PutFile(pachClient, dataRepo, commit1.Id, "file", 0, strings.NewReader("foo\n"))
	require.NoError(t, err)
	require.NoError(t, pfsutil.FinishCommit(pachClient, dataRepo, commit1.Id))
	listCommitRequest := &pfs.ListCommitRequest{
		Repo:       []*pfs.Repo{outRepo},
		CommitType: pfs.CommitType_COMMIT_TYPE_READ,
		Block:      true,
	}
	listCommitResponse, err := pachClient.ListCommit(
		context.Background(),
		listCommitRequest,
	)
	require.NoError(t, err)
	outCommits := listCommitResponse.CommitInfo
	require.Equal(t, 1, len(outCommits))
	var buffer bytes.Buffer
	require.NoError(t, pfsutil.GetFile(pachClient, outRepo.Name, outCommits[0].Commit.Id, "file", 0, 0, "", nil, &buffer))
	require.Equal(t, "foo\n", buffer.String())
	// Do second commit to repo
	commit2, err := pfsutil.StartCommit(pachClient, dataRepo, commit1.Id)
	require.NoError(t, err)
	_, err = pfsutil.PutFile(pachClient, dataRepo, commit2.Id, "file", 0, strings.NewReader("bar\n"))
	require.NoError(t, err)
	require.NoError(t, pfsutil.FinishCommit(pachClient, dataRepo, commit2.Id))
	listCommitRequest = &pfs.ListCommitRequest{
		Repo:       []*pfs.Repo{outRepo},
		FromCommit: []*pfs.Commit{outCommits[0].Commit},
		CommitType: pfs.CommitType_COMMIT_TYPE_READ,
		Block:      true,
	}
	listCommitResponse, err = pachClient.ListCommit(
		context.Background(),
		listCommitRequest,
	)
	require.NoError(t, err)
	require.NotNil(t, listCommitResponse.CommitInfo[0].ParentCommit)
	require.Equal(t, outCommits[0].Commit.Id, listCommitResponse.CommitInfo[0].ParentCommit.Id)
	outCommits = listCommitResponse.CommitInfo
	require.Equal(t, 1, len(outCommits))
	buffer = bytes.Buffer{}
	require.NoError(t, pfsutil.GetFile(pachClient, outRepo.Name, outCommits[0].Commit.Id, "file", 0, 0, "", nil, &buffer))
	require.Equal(t, "foo\nbar\n", buffer.String())
}
Esempio n. 4
0
func (a *apiServer) StartJob(ctx context.Context, request *pps.StartJobRequest) (response *pps.StartJobResponse, retErr error) {
	defer func(start time.Time) { a.Log(request, response, retErr, time.Since(start)) }(time.Now())
	inspectJobRequest := &pps.InspectJobRequest{Job: request.Job}
	jobInfo, err := a.persistAPIServer.InspectJob(ctx, inspectJobRequest)
	if err != nil {
		return nil, err
	}
	if jobInfo.Transform == nil {
		return nil, fmt.Errorf("jobInfo.Transform should not be nil (this is likely a bug)")
	}
	a.lock.Lock()
	jobState, ok := a.jobStates[request.Job.Id]
	if !ok {
		jobState = newJobState()
		a.jobStates[request.Job.Id] = jobState
	}
	shard := jobState.start
	if jobState.start < jobInfo.Shards {
		jobState.start++
	}
	a.lock.Unlock()
	if shard == jobInfo.Shards {
		return nil, fmt.Errorf("job %s already has %d shards", request.Job.Id, jobInfo.Shards)
	}
	if shard == 0 {
		var parentCommit *pfs.Commit
		if jobInfo.ParentJob == nil {
			var repo *pfs.Repo
			if jobInfo.PipelineName == "" {
				repo = pps.JobRepo(request.Job)
				if _, err := a.pfsAPIClient.CreateRepo(ctx, &pfs.CreateRepoRequest{Repo: repo}); err != nil {
					return nil, err
				}
			} else {
				repo = pps.PipelineRepo(&pps.Pipeline{Name: jobInfo.PipelineName})
			}
			parentCommit = &pfs.Commit{Repo: repo}
		} else {
			inspectJobRequest := &pps.InspectJobRequest{Job: jobInfo.ParentJob}
			parentJobInfo, err := a.persistAPIServer.InspectJob(ctx, inspectJobRequest)
			if err != nil {
				return nil, err
			}
			parentCommit = parentJobInfo.OutputCommit
		}
		commit, err := a.pfsAPIClient.StartCommit(ctx, &pfs.StartCommitRequest{
			Parent: parentCommit,
		})
		if err != nil {
			return nil, err
		}
		if _, err := a.persistAPIServer.CreateJobOutput(
			ctx,
			&persist.JobOutput{
				JobId:        request.Job.Id,
				OutputCommit: commit,
			}); err != nil {
			return nil, err
		}
		jobState.outputCommit = commit
		close(jobState.commitReady)
	}
	<-jobState.commitReady
	if jobState.outputCommit == nil {
		return nil, fmt.Errorf("jobState.outputCommit should not be nil (this is likely a bug)")
	}
	var commitMounts []*fuse.CommitMount
	for _, jobInput := range jobInfo.Inputs {
		commitMount := &fuse.CommitMount{
			Commit: jobInput.Commit,
			Shard: &pfs.Shard{
				FileModulus:  1,
				BlockModulus: 1,
			},
		}
		if jobInput.Reduce {
			commitMount.Shard.FileNumber = shard
			commitMount.Shard.FileModulus = jobInfo.Shards
		} else {
			commitMount.Shard.BlockNumber = shard
			commitMount.Shard.BlockModulus = jobInfo.Shards
		}
		commitMounts = append(commitMounts, commitMount)
	}
	outputCommitMount := &fuse.CommitMount{
		Commit: jobState.outputCommit,
		Alias:  "out",
	}
	commitMounts = append(commitMounts, outputCommitMount)
	return &pps.StartJobResponse{
		Transform:    jobInfo.Transform,
		CommitMounts: commitMounts,
		OutputCommit: jobState.outputCommit,
		Index:        shard,
	}, nil
}