func TestGrep(t *testing.T) { t.Skip() t.Parallel() dataRepo := uniqueString("TestGrep.data") pfsClient := getPfsClient(t) require.NoError(t, pfsutil.CreateRepo(pfsClient, dataRepo)) commit, err := pfsutil.StartCommit(pfsClient, dataRepo, "") require.NoError(t, err) for i := 0; i < 100; i++ { _, err = pfsutil.PutFile(pfsClient, dataRepo, commit.Id, fmt.Sprintf("file%d", i), 0, strings.NewReader("foo\nbar\nfizz\nbuzz\n")) require.NoError(t, err) } require.NoError(t, pfsutil.FinishCommit(pfsClient, dataRepo, commit.Id)) ppsClient := getPpsClient(t) job1, err := ppsutil.CreateJob( ppsClient, "", []string{"bash"}, fmt.Sprintf("grep foo /pfs/%s/* >/pfs/out/foo", dataRepo), 1, []*pps.JobInput{{Commit: commit}}, "", ) require.NoError(t, err) job2, err := ppsutil.CreateJob( ppsClient, "", []string{"bash"}, fmt.Sprintf("grep foo /pfs/%s/* >/pfs/out/foo", dataRepo), 4, []*pps.JobInput{{Commit: commit}}, "", ) require.NoError(t, err) inspectJobRequest := &pps.InspectJobRequest{ Job: job1, BlockOutput: true, BlockState: true, } job1Info, err := ppsClient.InspectJob(context.Background(), inspectJobRequest) require.NoError(t, err) inspectJobRequest.Job = job2 job2Info, err := ppsClient.InspectJob(context.Background(), inspectJobRequest) require.NoError(t, err) repo1Info, err := pfsutil.InspectRepo(pfsClient, job1Info.OutputCommit.Repo.Name) require.NoError(t, err) repo2Info, err := pfsutil.InspectRepo(pfsClient, job2Info.OutputCommit.Repo.Name) require.NoError(t, err) require.Equal(t, repo1Info.SizeBytes, repo2Info.SizeBytes) }
func TestJob(t *testing.T) { t.Parallel() dataRepo := uniqueString("TestJob.data") pachClient := getPachClient(t) require.NoError(t, pfsutil.CreateRepo(pachClient, dataRepo)) commit, err := pfsutil.StartCommit(pachClient, dataRepo, "") require.NoError(t, err) _, err = pfsutil.PutFile(pachClient, dataRepo, commit.Id, "file", 0, strings.NewReader("foo\n")) require.NoError(t, err) require.NoError(t, pfsutil.FinishCommit(pachClient, dataRepo, commit.Id)) job, err := ppsutil.CreateJob( pachClient, "", []string{"cp", path.Join("/pfs", dataRepo, "file"), "/pfs/out/file"}, nil, 1, []*pps.JobInput{{Commit: commit}}, "", ) require.NoError(t, err) inspectJobRequest := &pps.InspectJobRequest{ Job: job, BlockOutput: true, BlockState: true, } jobInfo, err := pachClient.InspectJob(context.Background(), inspectJobRequest) require.NoError(t, err) require.Equal(t, pps.JobState_JOB_STATE_SUCCESS.String(), jobInfo.State.String()) commitInfo, err := pfsutil.InspectCommit(pachClient, jobInfo.OutputCommit.Repo.Name, jobInfo.OutputCommit.Id) require.NoError(t, err) require.Equal(t, pfs.CommitType_COMMIT_TYPE_READ, commitInfo.CommitType) var buffer bytes.Buffer require.NoError(t, pfsutil.GetFile(pachClient, jobInfo.OutputCommit.Repo.Name, jobInfo.OutputCommit.Id, "file", 0, 0, "", nil, &buffer)) require.Equal(t, "foo\n", buffer.String()) }
func TestJob(t *testing.T) { dataRepo := uniqueString("TestJob.data") pfsClient := getPfsClient(t) require.NoError(t, pfsutil.CreateRepo(pfsClient, dataRepo)) commit, err := pfsutil.StartCommit(pfsClient, dataRepo, "") require.NoError(t, err) _, err = pfsutil.PutFile(pfsClient, dataRepo, commit.Id, "file", 0, strings.NewReader("foo")) require.NoError(t, err) require.NoError(t, pfsutil.FinishCommit(pfsClient, dataRepo, commit.Id)) ppsClient := getPpsClient(t) job, err := ppsutil.CreateJob( ppsClient, "", []string{"cp", path.Join("/pfs", dataRepo, "file"), "/pfs/out/file"}, "", 1, []*pfs.Commit{commit}, "", ) require.NoError(t, err) inspectJobRequest := &pps.InspectJobRequest{ Job: job, BlockOutput: true, BlockState: true, } jobInfo, err := ppsClient.InspectJob(context.Background(), inspectJobRequest) require.NoError(t, err) var buffer bytes.Buffer require.NoError(t, pfsutil.GetFile(pfsClient, jobInfo.OutputCommit.Repo.Name, jobInfo.OutputCommit.Id, "file", 0, 0, nil, &buffer)) require.Equal(t, "foo", buffer.String()) }
func TestGrep(t *testing.T) { t.Parallel() dataRepo := uniqueString("pachyderm.TestGrep.data") pfsClient := getPfsClient(t) require.NoError(t, pfsutil.CreateRepo(pfsClient, dataRepo)) commit, err := pfsutil.StartCommit(pfsClient, dataRepo, "") require.NoError(t, err) for i := 0; i < 100; i++ { _, err = pfsutil.PutFile(pfsClient, dataRepo, commit.Id, fmt.Sprintf("file%d", i), 0, strings.NewReader("foo\nbar\nfizz\nbuzz\n")) require.NoError(t, err) } require.NoError(t, pfsutil.FinishCommit(pfsClient, dataRepo, commit.Id)) ppsClient := getPpsClient(t) _, err = ppsutil.CreateJob( ppsClient, "", []string{"bash"}, fmt.Sprintf("grep foo /pfs/%s/* >/pfs/out/foo", dataRepo), 1, []*pps.JobInput{{Commit: commit}}, "", ) require.NoError(t, err) }
func (w *worker) work(pfsClient pfs.APIClient, ppsClient pps.APIClient) error { opt := w.rand.Float64() switch { case opt < repo: repoName := w.name() if err := pfsutil.CreateRepo(pfsClient, repoName); err != nil { return err } w.repos = append(w.repos, &pfs.Repo{Name: repoName}) commit, err := pfsutil.StartCommit(pfsClient, repoName, "") if err != nil { return err } w.started = append(w.started, commit) case opt < commit: if len(w.started) >= maxStartedCommits { i := w.rand.Intn(len(w.started)) commit := w.started[i] if err := pfsutil.FinishCommit(pfsClient, commit.Repo.Name, commit.Id); err != nil { return err } w.started = append(w.started[:i], w.started[i+1:]...) w.finished = append(w.finished, commit) } else { commit := w.finished[w.rand.Intn(len(w.finished))] commit, err := pfsutil.StartCommit(pfsClient, commit.Repo.Name, commit.Id) if err != nil { return err } w.started = append(w.started, commit) } case opt < file: commit := w.started[w.rand.Intn(len(w.started))] if _, err := pfsutil.PutFile(pfsClient, commit.Repo.Name, commit.Id, w.name(), 0, w.reader()); err != nil { return err } case opt < job: inputs := [5]string{} var inputCommits []*pfs.Commit for i := range inputs { randI := w.rand.Intn(len(w.finished)) inputs[i] = w.finished[randI].Repo.Name inputCommits = append(inputCommits, w.finished[randI]) } var parentJobID string if len(w.jobs) > 0 { parentJobID = w.jobs[w.rand.Intn(len(w.jobs))].Id } outFilename := w.name() job, err := ppsutil.CreateJob( ppsClient, "", []string{"sh"}, w.grepCmd(inputs, outFilename), 1, inputCommits, parentJobID, ) if err != nil { return err } w.jobs = append(w.jobs, job) case opt < pipeline: inputs := [5]string{} var inputRepos []*pfs.Repo for i := range inputs { randI := w.rand.Intn(len(w.repos)) inputs[i] = w.repos[randI].Name inputRepos = append(inputRepos, w.repos[randI]) } pipelineName := w.name() outFilename := w.name() if err := ppsutil.CreatePipeline( ppsClient, pipelineName, "", []string{"sh"}, w.grepCmd(inputs, outFilename), 1, inputRepos, ); err != nil { return err } w.pipelines = append(w.pipelines, ppsutil.NewPipeline(pipelineName)) } return nil }
func (w *worker) work(pfsClient pfs.APIClient, ppsClient pps.APIClient) error { opt := w.rand.Float64() switch { case opt < repo: repoName := w.randString(10) if err := pfsutil.CreateRepo(pfsClient, repoName); err != nil { return err } w.repos = append(w.repos, &pfs.Repo{Name: repoName}) commit, err := pfsutil.StartCommit(pfsClient, repoName, "") if err != nil { return err } w.started = append(w.started, commit) case opt < commit: if len(w.started) >= maxStartedCommits || len(w.finished) == 0 { if len(w.started) == 0 { return nil } i := w.rand.Intn(len(w.started)) commit := w.started[i] if err := pfsutil.FinishCommit(pfsClient, commit.Repo.Name, commit.Id); err != nil { return err } w.started = append(w.started[:i], w.started[i+1:]...) w.finished = append(w.finished, commit) } else { if len(w.finished) == 0 { return nil } commit := w.finished[w.rand.Intn(len(w.finished))] commit, err := pfsutil.StartCommit(pfsClient, commit.Repo.Name, commit.Id) if err != nil { return err } w.started = append(w.started, commit) } case opt < file: if len(w.started) == 0 { return nil } commit := w.started[w.rand.Intn(len(w.started))] if _, err := pfsutil.PutFile(pfsClient, commit.Repo.Name, commit.Id, w.randString(10), 0, w.reader()); err != nil { return err } case opt < job: if len(w.startedJobs) >= maxStartedJobs { job := w.startedJobs[0] w.startedJobs = w.startedJobs[1:] jobInfo, err := ppsClient.InspectJob( context.Background(), &pps.InspectJobRequest{ Job: job, BlockState: true, }, ) if err != nil { return err } if jobInfo.State != pps.JobState_JOB_STATE_SUCCESS { return fmt.Errorf("job %s failed", job.Id) } w.jobs = append(w.jobs, job) } else { if len(w.finished) == 0 { return nil } inputs := [5]string{} var jobInputs []*pps.JobInput repoSet := make(map[string]bool) for i := range inputs { commit := w.finished[w.rand.Intn(len(w.finished))] if _, ok := repoSet[commit.Repo.Name]; ok { continue } repoSet[commit.Repo.Name] = true inputs[i] = commit.Repo.Name jobInputs = append(jobInputs, &pps.JobInput{Commit: commit}) } var parentJobID string if len(w.jobs) > 0 { parentJobID = w.jobs[w.rand.Intn(len(w.jobs))].Id } outFilename := w.randString(10) job, err := ppsutil.CreateJob( ppsClient, "", []string{"bash"}, w.grepCmd(inputs, outFilename), 1, jobInputs, parentJobID, ) if err != nil { return err } w.startedJobs = append(w.startedJobs, job) } case opt < pipeline: if len(w.repos) == 0 { return nil } inputs := [5]string{} var pipelineInputs []*pps.PipelineInput repoSet := make(map[string]bool) for i := range inputs { repo := w.repos[w.rand.Intn(len(w.repos))] if _, ok := repoSet[repo.Name]; ok { continue } repoSet[repo.Name] = true inputs[i] = repo.Name pipelineInputs = append(pipelineInputs, &pps.PipelineInput{Repo: repo}) } pipelineName := w.randString(10) outFilename := w.randString(10) if err := ppsutil.CreatePipeline( ppsClient, pipelineName, "", []string{"bash"}, w.grepCmd(inputs, outFilename), 1, pipelineInputs, ); err != nil { return err } w.pipelines = append(w.pipelines, ppsutil.NewPipeline(pipelineName)) } return nil }