func (a *apiServer) CreatePipeline(ctx context.Context, request *pps.CreatePipelineRequest) (response *google_protobuf.Empty, err error) { defer func(start time.Time) { a.Log(request, response, err, time.Since(start)) }(time.Now()) if request.Pipeline == nil { return nil, fmt.Errorf("pachyderm.pps.pipelineserver: request.Pipeline cannot be nil") } persistPipelineInfo := &persist.PipelineInfo{ PipelineName: request.Pipeline.Name, Transform: request.Transform, Shards: request.Shards, InputRepo: request.InputRepo, } if _, err := a.persistAPIServer.CreatePipelineInfo(ctx, persistPipelineInfo); err != nil { return nil, err } repo := pps.PipelineRepo(request.Pipeline) if _, err := a.pfsAPIClient.CreateRepo(ctx, &pfs.CreateRepoRequest{Repo: repo}); err != nil { return nil, err } go func() { if err := a.runPipeline(persistPipelineInfoToPipelineInfo(persistPipelineInfo)); err != nil { protolog.Printf("pipeline errored: %s", err.Error()) } }() return google_protobuf.EmptyInstance, nil }
func (a *apiServer) CreatePipeline(ctx context.Context, request *pps.CreatePipelineRequest) (response *google_protobuf.Empty, err error) { defer func(start time.Time) { a.Log(request, response, err, time.Since(start)) }(time.Now()) if request.Pipeline == nil { return nil, fmt.Errorf("pachyderm.pps.pipelineserver: request.Pipeline cannot be nil") } repoSet := make(map[string]bool) for _, input := range request.Inputs { repoSet[input.Repo.Name] = true } if len(repoSet) < len(request.Inputs) { return nil, fmt.Errorf("pachyderm.pps.pipelineserver: duplicate input repos") } repo := pps.PipelineRepo(request.Pipeline) persistPipelineInfo := &persist.PipelineInfo{ PipelineName: request.Pipeline.Name, Transform: request.Transform, Shards: request.Shards, Inputs: request.Inputs, OutputRepo: repo, } if _, err := a.persistAPIServer.CreatePipelineInfo(ctx, persistPipelineInfo); err != nil { return nil, err } if _, err := a.pfsAPIClient.CreateRepo(ctx, &pfs.CreateRepoRequest{Repo: repo}); err != nil { return nil, err } go func() { if err := a.runPipeline(newPipelineInfo(persistPipelineInfo)); err != nil { protolion.Printf("pipeline errored: %s", err.Error()) } }() return google_protobuf.EmptyInstance, nil }
func TestPipeline(t *testing.T) { t.Parallel() pachClient := getPachClient(t) // create repos dataRepo := uniqueString("TestPipeline.data") require.NoError(t, pfsutil.CreateRepo(pachClient, dataRepo)) // create pipeline pipelineName := uniqueString("pipeline") outRepo := pps.PipelineRepo(ppsutil.NewPipeline(pipelineName)) require.NoError(t, ppsutil.CreatePipeline( pachClient, pipelineName, "", []string{"cp", path.Join("/pfs", dataRepo, "file"), "/pfs/out/file"}, nil, 1, []*pps.PipelineInput{{Repo: &pfs.Repo{Name: dataRepo}}}, )) // Do first commit to repo commit1, err := pfsutil.StartCommit(pachClient, dataRepo, "") require.NoError(t, err) _, err = pfsutil.PutFile(pachClient, dataRepo, commit1.Id, "file", 0, strings.NewReader("foo\n")) require.NoError(t, err) require.NoError(t, pfsutil.FinishCommit(pachClient, dataRepo, commit1.Id)) listCommitRequest := &pfs.ListCommitRequest{ Repo: []*pfs.Repo{outRepo}, CommitType: pfs.CommitType_COMMIT_TYPE_READ, Block: true, } listCommitResponse, err := pachClient.ListCommit( context.Background(), listCommitRequest, ) require.NoError(t, err) outCommits := listCommitResponse.CommitInfo require.Equal(t, 1, len(outCommits)) var buffer bytes.Buffer require.NoError(t, pfsutil.GetFile(pachClient, outRepo.Name, outCommits[0].Commit.Id, "file", 0, 0, "", nil, &buffer)) require.Equal(t, "foo\n", buffer.String()) // Do second commit to repo commit2, err := pfsutil.StartCommit(pachClient, dataRepo, commit1.Id) require.NoError(t, err) _, err = pfsutil.PutFile(pachClient, dataRepo, commit2.Id, "file", 0, strings.NewReader("bar\n")) require.NoError(t, err) require.NoError(t, pfsutil.FinishCommit(pachClient, dataRepo, commit2.Id)) listCommitRequest = &pfs.ListCommitRequest{ Repo: []*pfs.Repo{outRepo}, FromCommit: []*pfs.Commit{outCommits[0].Commit}, CommitType: pfs.CommitType_COMMIT_TYPE_READ, Block: true, } listCommitResponse, err = pachClient.ListCommit( context.Background(), listCommitRequest, ) require.NoError(t, err) require.NotNil(t, listCommitResponse.CommitInfo[0].ParentCommit) require.Equal(t, outCommits[0].Commit.Id, listCommitResponse.CommitInfo[0].ParentCommit.Id) outCommits = listCommitResponse.CommitInfo require.Equal(t, 1, len(outCommits)) buffer = bytes.Buffer{} require.NoError(t, pfsutil.GetFile(pachClient, outRepo.Name, outCommits[0].Commit.Id, "file", 0, 0, "", nil, &buffer)) require.Equal(t, "foo\nbar\n", buffer.String()) }
func (a *apiServer) StartJob(ctx context.Context, request *pps.StartJobRequest) (response *pps.StartJobResponse, retErr error) { defer func(start time.Time) { a.Log(request, response, retErr, time.Since(start)) }(time.Now()) inspectJobRequest := &pps.InspectJobRequest{Job: request.Job} jobInfo, err := a.persistAPIServer.InspectJob(ctx, inspectJobRequest) if err != nil { return nil, err } if jobInfo.Transform == nil { return nil, fmt.Errorf("jobInfo.Transform should not be nil (this is likely a bug)") } a.lock.Lock() jobState, ok := a.jobStates[request.Job.Id] if !ok { jobState = newJobState() a.jobStates[request.Job.Id] = jobState } shard := jobState.start if jobState.start < jobInfo.Shards { jobState.start++ } a.lock.Unlock() if shard == jobInfo.Shards { return nil, fmt.Errorf("job %s already has %d shards", request.Job.Id, jobInfo.Shards) } if shard == 0 { var parentCommit *pfs.Commit if jobInfo.ParentJob == nil { var repo *pfs.Repo if jobInfo.PipelineName == "" { repo = pps.JobRepo(request.Job) if _, err := a.pfsAPIClient.CreateRepo(ctx, &pfs.CreateRepoRequest{Repo: repo}); err != nil { return nil, err } } else { repo = pps.PipelineRepo(&pps.Pipeline{Name: jobInfo.PipelineName}) } parentCommit = &pfs.Commit{Repo: repo} } else { inspectJobRequest := &pps.InspectJobRequest{Job: jobInfo.ParentJob} parentJobInfo, err := a.persistAPIServer.InspectJob(ctx, inspectJobRequest) if err != nil { return nil, err } parentCommit = parentJobInfo.OutputCommit } commit, err := a.pfsAPIClient.StartCommit(ctx, &pfs.StartCommitRequest{ Parent: parentCommit, }) if err != nil { return nil, err } if _, err := a.persistAPIServer.CreateJobOutput( ctx, &persist.JobOutput{ JobId: request.Job.Id, OutputCommit: commit, }); err != nil { return nil, err } jobState.outputCommit = commit close(jobState.commitReady) } <-jobState.commitReady if jobState.outputCommit == nil { return nil, fmt.Errorf("jobState.outputCommit should not be nil (this is likely a bug)") } var commitMounts []*fuse.CommitMount for _, jobInput := range jobInfo.Inputs { commitMount := &fuse.CommitMount{ Commit: jobInput.Commit, Shard: &pfs.Shard{ FileModulus: 1, BlockModulus: 1, }, } if jobInput.Reduce { commitMount.Shard.FileNumber = shard commitMount.Shard.FileModulus = jobInfo.Shards } else { commitMount.Shard.BlockNumber = shard commitMount.Shard.BlockModulus = jobInfo.Shards } commitMounts = append(commitMounts, commitMount) } outputCommitMount := &fuse.CommitMount{ Commit: jobState.outputCommit, Alias: "out", } commitMounts = append(commitMounts, outputCommitMount) return &pps.StartJobResponse{ Transform: jobInfo.Transform, CommitMounts: commitMounts, OutputCommit: jobState.outputCommit, Index: shard, }, nil }