Ejemplo n.º 1
0
func TestGrep(t *testing.T) {
	t.Skip()
	t.Parallel()
	dataRepo := uniqueString("TestGrep.data")
	pfsClient := getPfsClient(t)
	require.NoError(t, pfsutil.CreateRepo(pfsClient, dataRepo))
	commit, err := pfsutil.StartCommit(pfsClient, dataRepo, "")
	require.NoError(t, err)
	for i := 0; i < 100; i++ {
		_, err = pfsutil.PutFile(pfsClient, dataRepo, commit.Id, fmt.Sprintf("file%d", i), 0, strings.NewReader("foo\nbar\nfizz\nbuzz\n"))
		require.NoError(t, err)
	}
	require.NoError(t, pfsutil.FinishCommit(pfsClient, dataRepo, commit.Id))
	ppsClient := getPpsClient(t)
	job1, err := ppsutil.CreateJob(
		ppsClient,
		"",
		[]string{"bash"},
		fmt.Sprintf("grep foo /pfs/%s/* >/pfs/out/foo", dataRepo),
		1,
		[]*pps.JobInput{{Commit: commit}},
		"",
	)
	require.NoError(t, err)
	job2, err := ppsutil.CreateJob(
		ppsClient,
		"",
		[]string{"bash"},
		fmt.Sprintf("grep foo /pfs/%s/* >/pfs/out/foo", dataRepo),
		4,
		[]*pps.JobInput{{Commit: commit}},
		"",
	)
	require.NoError(t, err)
	inspectJobRequest := &pps.InspectJobRequest{
		Job:         job1,
		BlockOutput: true,
		BlockState:  true,
	}
	job1Info, err := ppsClient.InspectJob(context.Background(), inspectJobRequest)
	require.NoError(t, err)
	inspectJobRequest.Job = job2
	job2Info, err := ppsClient.InspectJob(context.Background(), inspectJobRequest)
	require.NoError(t, err)
	repo1Info, err := pfsutil.InspectRepo(pfsClient, job1Info.OutputCommit.Repo.Name)
	require.NoError(t, err)
	repo2Info, err := pfsutil.InspectRepo(pfsClient, job2Info.OutputCommit.Repo.Name)
	require.NoError(t, err)
	require.Equal(t, repo1Info.SizeBytes, repo2Info.SizeBytes)
}
Ejemplo n.º 2
0
func TestJob(t *testing.T) {
	t.Parallel()
	dataRepo := uniqueString("TestJob.data")
	pachClient := getPachClient(t)
	require.NoError(t, pfsutil.CreateRepo(pachClient, dataRepo))
	commit, err := pfsutil.StartCommit(pachClient, dataRepo, "")
	require.NoError(t, err)
	_, err = pfsutil.PutFile(pachClient, dataRepo, commit.Id, "file", 0, strings.NewReader("foo\n"))
	require.NoError(t, err)
	require.NoError(t, pfsutil.FinishCommit(pachClient, dataRepo, commit.Id))
	job, err := ppsutil.CreateJob(
		pachClient,
		"",
		[]string{"cp", path.Join("/pfs", dataRepo, "file"), "/pfs/out/file"},
		nil,
		1,
		[]*pps.JobInput{{Commit: commit}},
		"",
	)
	require.NoError(t, err)
	inspectJobRequest := &pps.InspectJobRequest{
		Job:         job,
		BlockOutput: true,
		BlockState:  true,
	}
	jobInfo, err := pachClient.InspectJob(context.Background(), inspectJobRequest)
	require.NoError(t, err)
	require.Equal(t, pps.JobState_JOB_STATE_SUCCESS.String(), jobInfo.State.String())
	commitInfo, err := pfsutil.InspectCommit(pachClient, jobInfo.OutputCommit.Repo.Name, jobInfo.OutputCommit.Id)
	require.NoError(t, err)
	require.Equal(t, pfs.CommitType_COMMIT_TYPE_READ, commitInfo.CommitType)
	var buffer bytes.Buffer
	require.NoError(t, pfsutil.GetFile(pachClient, jobInfo.OutputCommit.Repo.Name, jobInfo.OutputCommit.Id, "file", 0, 0, "", nil, &buffer))
	require.Equal(t, "foo\n", buffer.String())
}
Ejemplo n.º 3
0
func TestJob(t *testing.T) {
	dataRepo := uniqueString("TestJob.data")
	pfsClient := getPfsClient(t)
	require.NoError(t, pfsutil.CreateRepo(pfsClient, dataRepo))
	commit, err := pfsutil.StartCommit(pfsClient, dataRepo, "")
	require.NoError(t, err)
	_, err = pfsutil.PutFile(pfsClient, dataRepo, commit.Id, "file", 0, strings.NewReader("foo"))
	require.NoError(t, err)
	require.NoError(t, pfsutil.FinishCommit(pfsClient, dataRepo, commit.Id))
	ppsClient := getPpsClient(t)
	job, err := ppsutil.CreateJob(
		ppsClient,
		"",
		[]string{"cp", path.Join("/pfs", dataRepo, "file"), "/pfs/out/file"},
		"",
		1,
		[]*pfs.Commit{commit},
		"",
	)
	require.NoError(t, err)
	inspectJobRequest := &pps.InspectJobRequest{
		Job:         job,
		BlockOutput: true,
		BlockState:  true,
	}
	jobInfo, err := ppsClient.InspectJob(context.Background(), inspectJobRequest)
	require.NoError(t, err)
	var buffer bytes.Buffer
	require.NoError(t, pfsutil.GetFile(pfsClient, jobInfo.OutputCommit.Repo.Name, jobInfo.OutputCommit.Id, "file", 0, 0, nil, &buffer))
	require.Equal(t, "foo", buffer.String())
}
Ejemplo n.º 4
0
func TestGrep(t *testing.T) {
	t.Parallel()
	dataRepo := uniqueString("pachyderm.TestGrep.data")
	pfsClient := getPfsClient(t)
	require.NoError(t, pfsutil.CreateRepo(pfsClient, dataRepo))
	commit, err := pfsutil.StartCommit(pfsClient, dataRepo, "")
	require.NoError(t, err)
	for i := 0; i < 100; i++ {
		_, err = pfsutil.PutFile(pfsClient, dataRepo, commit.Id, fmt.Sprintf("file%d", i), 0, strings.NewReader("foo\nbar\nfizz\nbuzz\n"))
		require.NoError(t, err)
	}
	require.NoError(t, pfsutil.FinishCommit(pfsClient, dataRepo, commit.Id))
	ppsClient := getPpsClient(t)
	_, err = ppsutil.CreateJob(
		ppsClient,
		"",
		[]string{"bash"},
		fmt.Sprintf("grep foo /pfs/%s/* >/pfs/out/foo", dataRepo),
		1,
		[]*pps.JobInput{{Commit: commit}},
		"",
	)
	require.NoError(t, err)
}
Ejemplo n.º 5
0
func (w *worker) work(pfsClient pfs.APIClient, ppsClient pps.APIClient) error {
	opt := w.rand.Float64()
	switch {
	case opt < repo:
		repoName := w.name()
		if err := pfsutil.CreateRepo(pfsClient, repoName); err != nil {
			return err
		}
		w.repos = append(w.repos, &pfs.Repo{Name: repoName})
		commit, err := pfsutil.StartCommit(pfsClient, repoName, "")
		if err != nil {
			return err
		}
		w.started = append(w.started, commit)
	case opt < commit:
		if len(w.started) >= maxStartedCommits {
			i := w.rand.Intn(len(w.started))
			commit := w.started[i]
			if err := pfsutil.FinishCommit(pfsClient, commit.Repo.Name, commit.Id); err != nil {
				return err
			}
			w.started = append(w.started[:i], w.started[i+1:]...)
			w.finished = append(w.finished, commit)
		} else {
			commit := w.finished[w.rand.Intn(len(w.finished))]
			commit, err := pfsutil.StartCommit(pfsClient, commit.Repo.Name, commit.Id)
			if err != nil {
				return err
			}
			w.started = append(w.started, commit)
		}
	case opt < file:
		commit := w.started[w.rand.Intn(len(w.started))]
		if _, err := pfsutil.PutFile(pfsClient, commit.Repo.Name, commit.Id, w.name(), 0, w.reader()); err != nil {
			return err
		}
	case opt < job:
		inputs := [5]string{}
		var inputCommits []*pfs.Commit
		for i := range inputs {
			randI := w.rand.Intn(len(w.finished))
			inputs[i] = w.finished[randI].Repo.Name
			inputCommits = append(inputCommits, w.finished[randI])
		}
		var parentJobID string
		if len(w.jobs) > 0 {
			parentJobID = w.jobs[w.rand.Intn(len(w.jobs))].Id
		}
		outFilename := w.name()
		job, err := ppsutil.CreateJob(
			ppsClient,
			"",
			[]string{"sh"},
			w.grepCmd(inputs, outFilename),
			1,
			inputCommits,
			parentJobID,
		)
		if err != nil {
			return err
		}
		w.jobs = append(w.jobs, job)
	case opt < pipeline:
		inputs := [5]string{}
		var inputRepos []*pfs.Repo
		for i := range inputs {
			randI := w.rand.Intn(len(w.repos))
			inputs[i] = w.repos[randI].Name
			inputRepos = append(inputRepos, w.repos[randI])
		}
		pipelineName := w.name()
		outFilename := w.name()
		if err := ppsutil.CreatePipeline(
			ppsClient,
			pipelineName,
			"",
			[]string{"sh"},
			w.grepCmd(inputs, outFilename),
			1,
			inputRepos,
		); err != nil {
			return err
		}
		w.pipelines = append(w.pipelines, ppsutil.NewPipeline(pipelineName))
	}
	return nil
}
Ejemplo n.º 6
0
func (w *worker) work(pfsClient pfs.APIClient, ppsClient pps.APIClient) error {
	opt := w.rand.Float64()
	switch {
	case opt < repo:
		repoName := w.randString(10)
		if err := pfsutil.CreateRepo(pfsClient, repoName); err != nil {
			return err
		}
		w.repos = append(w.repos, &pfs.Repo{Name: repoName})
		commit, err := pfsutil.StartCommit(pfsClient, repoName, "")
		if err != nil {
			return err
		}
		w.started = append(w.started, commit)
	case opt < commit:
		if len(w.started) >= maxStartedCommits || len(w.finished) == 0 {
			if len(w.started) == 0 {
				return nil
			}
			i := w.rand.Intn(len(w.started))
			commit := w.started[i]
			if err := pfsutil.FinishCommit(pfsClient, commit.Repo.Name, commit.Id); err != nil {
				return err
			}
			w.started = append(w.started[:i], w.started[i+1:]...)
			w.finished = append(w.finished, commit)
		} else {
			if len(w.finished) == 0 {
				return nil
			}
			commit := w.finished[w.rand.Intn(len(w.finished))]
			commit, err := pfsutil.StartCommit(pfsClient, commit.Repo.Name, commit.Id)
			if err != nil {
				return err
			}
			w.started = append(w.started, commit)
		}
	case opt < file:
		if len(w.started) == 0 {
			return nil
		}
		commit := w.started[w.rand.Intn(len(w.started))]
		if _, err := pfsutil.PutFile(pfsClient, commit.Repo.Name, commit.Id, w.randString(10), 0, w.reader()); err != nil {
			return err
		}
	case opt < job:
		if len(w.startedJobs) >= maxStartedJobs {
			job := w.startedJobs[0]
			w.startedJobs = w.startedJobs[1:]
			jobInfo, err := ppsClient.InspectJob(
				context.Background(),
				&pps.InspectJobRequest{
					Job:        job,
					BlockState: true,
				},
			)
			if err != nil {
				return err
			}
			if jobInfo.State != pps.JobState_JOB_STATE_SUCCESS {
				return fmt.Errorf("job %s failed", job.Id)
			}
			w.jobs = append(w.jobs, job)
		} else {
			if len(w.finished) == 0 {
				return nil
			}
			inputs := [5]string{}
			var jobInputs []*pps.JobInput
			repoSet := make(map[string]bool)
			for i := range inputs {
				commit := w.finished[w.rand.Intn(len(w.finished))]
				if _, ok := repoSet[commit.Repo.Name]; ok {
					continue
				}
				repoSet[commit.Repo.Name] = true
				inputs[i] = commit.Repo.Name
				jobInputs = append(jobInputs, &pps.JobInput{Commit: commit})
			}
			var parentJobID string
			if len(w.jobs) > 0 {
				parentJobID = w.jobs[w.rand.Intn(len(w.jobs))].Id
			}
			outFilename := w.randString(10)
			job, err := ppsutil.CreateJob(
				ppsClient,
				"",
				[]string{"bash"},
				w.grepCmd(inputs, outFilename),
				1,
				jobInputs,
				parentJobID,
			)
			if err != nil {
				return err
			}
			w.startedJobs = append(w.startedJobs, job)
		}
	case opt < pipeline:
		if len(w.repos) == 0 {
			return nil
		}
		inputs := [5]string{}
		var pipelineInputs []*pps.PipelineInput
		repoSet := make(map[string]bool)
		for i := range inputs {
			repo := w.repos[w.rand.Intn(len(w.repos))]
			if _, ok := repoSet[repo.Name]; ok {
				continue
			}
			repoSet[repo.Name] = true
			inputs[i] = repo.Name
			pipelineInputs = append(pipelineInputs, &pps.PipelineInput{Repo: repo})
		}
		pipelineName := w.randString(10)
		outFilename := w.randString(10)
		if err := ppsutil.CreatePipeline(
			ppsClient,
			pipelineName,
			"",
			[]string{"bash"},
			w.grepCmd(inputs, outFilename),
			1,
			pipelineInputs,
		); err != nil {
			return err
		}
		w.pipelines = append(w.pipelines, ppsutil.NewPipeline(pipelineName))
	}
	return nil
}