// TestError makes sure that we handle commands that error correctly. func TestError(t *testing.T) { t.Parallel() inRepo := "TestError_in" require.NoError(t, btrfs.Init(inRepo)) outPrefix := "TestError_out" // Create the Pachfile require.NoError(t, btrfs.WriteFile(path.Join(inRepo, "master", "pipeline", "error"), []byte(` image ubuntu run touch /out/foo run cp /in/foo /out/bar `))) // Last line should fail here. // Commit to the inRepo require.NoError(t, btrfs.Commit(inRepo, "commit", "master")) err := RunPipelines("pipeline", inRepo, outPrefix, "commit", "master", "0-1", etcache.NewCache()) require.Error(t, err, "Running pipeline should error.") // Check that foo exists exists, err := btrfs.FileExists(path.Join(outPrefix, "error", "commit-0", "foo")) require.NoError(t, err) require.True(t, exists, "File foo should exist.") // Check that commit doesn't exist exists, err = btrfs.FileExists(path.Join(outPrefix, "error", "commit")) require.NoError(t, err) require.False(t, exists, "Commit \"commit\" should not get created when a command fails.") }
func (r *Runner) makeOutRepo(pipeline string) error { if err := btrfs.Ensure(path.Join(r.outPrefix, pipeline)); err != nil { return err } exists, err := btrfs.FileExists(path.Join(r.outPrefix, pipeline, r.branch)) if err != nil { return err } if !exists { // The branch doesn't exist, we need to create it We'll make our branch // have the same parent as the commit we're running off of if that // parent exists in the pipelines outRepo. This lets us carry over past // computation results when a new branch is created rather than having // to start from scratch. parent := btrfs.GetMeta(path.Join(r.inRepo, r.commit), "parent") if parent != "" { exists, err := btrfs.FileExists(path.Join(r.outPrefix, pipeline, parent)) if err != nil { return err } if !exists { parent = "" } } if err := btrfs.Branch(path.Join(r.outPrefix, pipeline), parent, r.branch); err != nil { return err } } // The branch exists, so we're ready to return return nil }
func TestInputOutput(t *testing.T) { t.Parallel() // create the in repo pipeline := newTestPipeline(t, "inputOutput", "commit", "master", "0-1", true) // add data to it err := btrfs.WriteFile(path.Join(pipeline.inRepo, "master", "data", "foo"), []byte("foo")) require.NoError(t, err) // commit data err = btrfs.Commit(pipeline.inRepo, "commit", "master") require.NoError(t, err) pachfile := ` image ubuntu input data run cp /in/data/foo /out/foo ` err = pipeline.runPachFile(strings.NewReader(pachfile)) require.NoError(t, err) exists, err := btrfs.FileExists(path.Join(pipeline.outRepo, "commit-0", "foo")) require.NoError(t, err) require.True(t, exists, "File `foo` doesn't exist when it should.") }
// TestScrape tests a the scraper pipeline func TestScrape(t *testing.T) { // TODO(any): what?? wget is not found in the container if parallel is set //t.Parallel() pipeline := newTestPipeline(t, "scrape", "commit", "master", "0-1", true) // Create a url to scrape require.NoError(t, btrfs.WriteFile(path.Join(pipeline.inRepo, "master", "urls", "1"), []byte("pachyderm.io"))) // Commit the data require.NoError(t, btrfs.Commit(pipeline.inRepo, "commit", "master")) // Create a pipeline to run pachfile := ` image pachyderm/scraper input urls run cat /in/urls/* | xargs wget -P /out ` err := pipeline.runPachFile(strings.NewReader(pachfile)) exists, err := btrfs.FileExists(path.Join(pipeline.outRepo, "commit", "index.html")) require.NoError(t, err) require.True(t, exists, "pachyderm.io should exist") }
func (p *pipeline) fail() error { exists, err := btrfs.FileExists(path.Join(p.outRepo, p.commit+"-fail")) if err != nil { return err } if exists { return nil } return btrfs.DanglingCommit(p.outRepo, p.commit+"-fail", p.branch) }
// finish makes the final commit for the pipeline func (p *pipeline) finish() error { exists, err := btrfs.FileExists(path.Join(p.outRepo, p.commit)) if err != nil { return err } if exists { return nil } return btrfs.Commit(p.outRepo, p.commit, p.branch) }
func TestEcho(t *testing.T) { t.Parallel() pipeline := newTestPipeline(t, "echo", "commit", "master", "0-1", true) pachfile := ` image ubuntu run echo foo >/out/foo run echo foo >/out/bar ` err := pipeline.runPachFile(strings.NewReader(pachfile)) require.NoError(t, err) exists, err := btrfs.FileExists(path.Join(pipeline.outRepo, "commit-0", "foo")) require.NoError(t, err) require.True(t, exists, "File `foo` doesn't exist when it should.") exists, err = btrfs.FileExists(path.Join(pipeline.outRepo, "commit-1", "bar")) require.NoError(t, err) require.True(t, exists, "File `bar` doesn't exist when it should.") }
// Run runs a command in the container, it assumes that `branch` has already // been created. // Notice that any failure in this function leads to the branch having // uncommitted dirty changes. This state needs to be cleaned up before the // pipeline is rerun. The reason we don't do it here is that even if we try our // best the process crashing at the wrong time could still leave it in an // inconsistent state. func (p *pipeline) run(cmd []string) error { // this function always increments counter defer func() { p.counter++ }() // Check if the commit already exists exists, err := btrfs.FileExists(path.Join(p.outRepo, p.runCommit())) if err != nil { return err } // if the commit exists there's no work to be done if exists { return nil } // Set the command p.config.Config.Cmd = []string{"sh"} //p.config.Config.Volumes["/out"] = emptyStruct() // Map the out directory in as a bind hostPath := btrfs.HostPath(path.Join(p.outRepo, p.branch)) bind := fmt.Sprintf("%s:/out", hostPath) p.config.HostConfig.Binds = append(p.config.HostConfig.Binds, bind) log.Print(p.config.HostConfig.Binds) // Make sure this bind is only visible for the duration of run defer func() { p.config.HostConfig.Binds = p.config.HostConfig.Binds[:len(p.config.HostConfig.Binds)-1] }() // Start the container p.container, err = startContainer(p.config) if err != nil { return err } if err := pipeToStdin(p.container, strings.NewReader(strings.Join(cmd, " ")+"\n")); err != nil { return err } // Create a place to put the logs f, err := btrfs.CreateAll(path.Join(p.outRepo, p.branch, ".log")) if err != nil { return err } defer f.Close() // Copy the logs from the container in to the file. if err = containerLogs(p.container, f); err != nil { return err } // Wait for the command to finish: exit, err := waitContainer(p.container) if err != nil { return err } if exit != 0 { // The command errored return fmt.Errorf("Command:\n\t%s\nhad exit code: %d.\n", strings.Join(cmd, " "), exit) } return btrfs.Commit(p.outRepo, p.runCommit(), p.branch) }
// TestWrap tests a simple pipeline that uses line wrapping in it's Pachfile func TestWrap(t *testing.T) { t.Parallel() outRepo := "TestWrap_out" require.NoError(t, btrfs.Init(outRepo)) pipeline := newPipeline("output", "", outRepo, "commit", "master", "0-1", "", etcache.NewCache()) pachfile := ` image ubuntu # touch foo and bar run touch /out/foo \ /out/bar ` err := pipeline.runPachFile(strings.NewReader(pachfile)) require.NoError(t, err) exists, err := btrfs.FileExists(path.Join(outRepo, "commit", "foo")) require.NoError(t, err) require.True(t, exists, "File `foo` doesn't exist when it should.") exists, err = btrfs.FileExists(path.Join(outRepo, "commit", "bar")) require.NoError(t, err) require.True(t, exists, "File `bar` doesn't exist when it should.") }
func (p *pipeline) finished() (bool, error) { return btrfs.FileExists(path.Join(p.outRepo, p.commit)) }
func checkExists(t *testing.T, name string) { exists, err := btrfs.FileExists(name) require.NoError(t, err) require.True(t, exists) }