func (a *apiServer) CreatePipeline(ctx context.Context, request *ppsclient.CreatePipelineRequest) (response *google_protobuf.Empty, retErr error) { defer func(start time.Time) { a.Log(request, response, retErr, time.Since(start)) }(time.Now()) defer func() { if retErr == nil { metrics.AddPipelines(1) } }() pfsAPIClient, err := a.getPfsClient() if err != nil { return nil, err } persistClient, err := a.getPersistClient() if err != nil { return nil, err } setDefaultPipelineInputMethod(request.Inputs) if request.Pipeline == nil { return nil, fmt.Errorf("pachyderm.ppsclient.pipelineserver: request.Pipeline cannot be nil") } repoSet := make(map[string]bool) for _, input := range request.Inputs { if _, err := pfsAPIClient.InspectRepo(ctx, &pfsclient.InspectRepoRequest{Repo: input.Repo}); err != nil { return nil, err } repoSet[input.Repo.Name] = true } if len(repoSet) < len(request.Inputs) { return nil, fmt.Errorf("pachyderm.ppsclient.pipelineserver: duplicate input repos") } repo := ppsserver.PipelineRepo(request.Pipeline) var provenance []*pfsclient.Repo for _, input := range request.Inputs { provenance = append(provenance, input.Repo) } if _, err := pfsAPIClient.CreateRepo( ctx, &pfsclient.CreateRepoRequest{ Repo: repo, Provenance: provenance, }); err != nil { return nil, err } persistPipelineInfo := &persist.PipelineInfo{ PipelineName: request.Pipeline.Name, Transform: request.Transform, Parallelism: request.Parallelism, Inputs: request.Inputs, OutputRepo: repo, Shard: a.hasher.HashPipeline(request.Pipeline), State: ppsclient.PipelineState_PIPELINE_IDLE, } if _, err := persistClient.CreatePipelineInfo(ctx, persistPipelineInfo); err != nil { return nil, err } return google_protobuf.EmptyInstance, nil }
// rawInputs tracks provenance for a pipeline back to its raw sources of // data // rawInputs is much efficient less than it could be because it does a lot of // duplicate work computing provenance. It could be made more efficient by // adding a special purpose rpc to the pfs api but that call wouldn't be useful // for much other than this. func (a *apiServer) rawInputs( ctx context.Context, pipelineInfo *ppsclient.PipelineInfo, ) ([]*pfsclient.Repo, error) { pfsClient, err := a.getPfsClient() if err != nil { return nil, err } repoInfo, err := pfsClient.InspectRepo( ctx, &pfsclient.InspectRepoRequest{Repo: ppsserver.PipelineRepo(pipelineInfo.Pipeline)}, ) if err != nil { return nil, err } var result []*pfsclient.Repo for _, repo := range repoInfo.Provenance { repoInfo, err := pfsClient.InspectRepo( ctx, &pfsclient.InspectRepoRequest{Repo: repo}, ) if err != nil { return nil, err } if len(repoInfo.Provenance) == 0 { result = append(result, repoInfo.Repo) } } return result, nil }
func (a *apiServer) CreateJob(ctx context.Context, request *ppsclient.CreateJobRequest) (response *ppsclient.Job, retErr error) { defer func(start time.Time) { a.Log(request, response, retErr, time.Since(start)) }(time.Now()) defer func() { if retErr == nil { metrics.AddJobs(1) } }() persistClient, err := a.getPersistClient() if err != nil { return nil, err } // We need to sort job inputs because the following code depends on // the invariant that inputs[i] matches parentInputs[i] sort.Sort(JobInputs(request.Inputs)) // In case some inputs have not provided a method, we set the default // method for them setDefaultJobInputMethod(request.Inputs) // Currently this happens when someone attempts to run a pipeline once if request.Pipeline != nil && request.Transform == nil { pipelineInfo, err := a.InspectPipeline(ctx, &ppsclient.InspectPipelineRequest{ Pipeline: request.Pipeline, }) if err != nil { return nil, err } request.Transform = pipelineInfo.Transform request.Parallelism = pipelineInfo.Parallelism } if request.Parallelism == 0 { nodeList, err := a.kubeClient.Nodes().List(api.ListOptions{}) if err != nil { return nil, fmt.Errorf("pachyderm.ppsclient.jobserver: parallelism set to zero and unable to retrieve node list from k8s") } if len(nodeList.Items) == 0 { return nil, fmt.Errorf("pachyderm.ppsclient.jobserver: no k8s nodes found") } request.Parallelism = uint64(len(nodeList.Items)) } repoSet := make(map[string]bool) for _, input := range request.Inputs { repoSet[input.Commit.Repo.Name] = true } if len(repoSet) < len(request.Inputs) { return nil, fmt.Errorf("pachyderm.ppsclient.jobserver: duplicate repo in job") } var parentJobInfo *persist.JobInfo if request.ParentJob != nil { inspectJobRequest := &ppsclient.InspectJobRequest{Job: request.ParentJob} parentJobInfo, err = persistClient.InspectJob(ctx, inspectJobRequest) if err != nil { return nil, err } // Check that the parent job has the same set of inputs as the current job if len(parentJobInfo.Inputs) != len(request.Inputs) { return nil, NewErrParentInputsMismatch(parentJobInfo.JobID) } for i, input := range request.Inputs { if parentJobInfo.Inputs[i].Commit.Repo.Name != input.Commit.Repo.Name { return nil, NewErrParentInputsMismatch(parentJobInfo.JobID) } } } pfsAPIClient, err := a.getPfsClient() if err != nil { return nil, err } jobID := getJobID(request) if !request.Force { _, err = persistClient.InspectJob(ctx, &ppsclient.InspectJobRequest{ Job: &ppsclient.Job{jobID}, }) if err == nil { // the job already exists. we simply return return &ppsclient.Job{jobID}, nil } } startCommitRequest := &pfsclient.StartCommitRequest{} for _, input := range request.Inputs { startCommitRequest.Provenance = append(startCommitRequest.Provenance, input.Commit) } // If JobInfo.Pipeline is set, use the pipeline repo if request.Pipeline != nil { startCommitRequest.Repo = ppsserver.PipelineRepo(&ppsclient.Pipeline{Name: request.Pipeline.Name}) if parentJobInfo != nil && parentJobInfo.OutputCommit.Repo.Name != startCommitRequest.Repo.Name { return nil, fmt.Errorf("Parent job was not part of the same pipeline; this is likely a bug") } } else { // If parent is set, use the parent's repo if parentJobInfo != nil { startCommitRequest.Repo = parentJobInfo.OutputCommit.Repo } else { // Otherwise, create a repo for this job startCommitRequest.Repo = ppsserver.JobRepo(&ppsclient.Job{ ID: jobID, }) var provenance []*pfsclient.Repo for _, input := range request.Inputs { provenance = append(provenance, input.Commit.Repo) } if _, err := pfsAPIClient.CreateRepo(ctx, &pfsclient.CreateRepoRequest{ Repo: startCommitRequest.Repo, Provenance: provenance, }); err != nil { return nil, err } } } repoToFromCommit := make(map[string]*pfsclient.Commit) if parentJobInfo != nil { if len(request.Inputs) != len(parentJobInfo.Inputs) { return nil, fmt.Errorf("parent job does not have the same number of inputs as this job does; this is likely a bug") } startCommitRequest.ParentID = parentJobInfo.OutputCommit.ID for i, jobInput := range request.Inputs { if jobInput.Method.Incremental { // input isn't being reduced, do it incrementally repoToFromCommit[jobInput.Commit.Repo.Name] = parentJobInfo.Inputs[i].Commit } } } commit, err := pfsAPIClient.StartCommit(ctx, startCommitRequest) if err != nil { return nil, err } // TODO validate job to make sure input commits and output repo exist persistJobInfo := &persist.JobInfo{ JobID: jobID, Transform: request.Transform, Inputs: request.Inputs, ParentJob: request.ParentJob, OutputCommit: commit, } if request.Pipeline != nil { persistJobInfo.PipelineName = request.Pipeline.Name } // If the job has no input, we respect the specified degree of parallelism // Otherwise, we run as many pods as possible given that each pod has some // input. if len(request.Inputs) == 0 { persistJobInfo.Parallelism = request.Parallelism } else { shardModuli, err := a.shardModuli(ctx, request.Inputs, request.Parallelism, repoToFromCommit) if err != nil { return nil, err } persistJobInfo.Parallelism = product(shardModuli) persistJobInfo.ShardModuli = shardModuli } if a.kubeClient == nil { return nil, fmt.Errorf("pachyderm.ppsclient.jobserver: no job backend") } _, err = persistClient.CreateJobInfo(ctx, persistJobInfo) if err != nil { return nil, err } defer func() { if retErr != nil { if _, err := persistClient.CreateJobState(ctx, &persist.JobState{ JobID: persistJobInfo.JobID, State: ppsclient.JobState_JOB_FAILURE, }); err != nil { protolion.Errorf("error from CreateJobState %s", err.Error()) } } }() if _, err := a.kubeClient.Extensions().Jobs(a.namespace).Create(job(persistJobInfo)); err != nil { return nil, err } return &ppsclient.Job{ ID: jobID, }, nil }