// TestRecover runs a pipeline with an error. Then fixes the pipeline to not // include an error and reruns it. func TestRecover(t *testing.T) { t.Parallel() inRepo := "TestRecover_in" require.NoError(t, btrfs.Init(inRepo)) outPrefix := "TestRecover_out" // Create the Pachfile require.NoError(t, btrfs.WriteFile(path.Join(inRepo, "master", "pipeline", "recover"), []byte(` image ubuntu run touch /out/foo run touch /out/bar && cp /in/foo /out/bar `))) // Last line should fail here. // Commit to the inRepo require.NoError(t, btrfs.Commit(inRepo, "commit1", "master")) // Run the pipelines err := RunPipelines("pipeline", inRepo, outPrefix, "commit1", "master", "0-1", etcache.NewCache()) require.Error(t, err, "Running pipeline should error.") // Fix the Pachfile require.NoError(t, btrfs.WriteFile(path.Join(inRepo, "master", "pipeline", "recover"), []byte(` image ubuntu run touch /out/foo run touch /out/bar `))) // Commit to the inRepo require.NoError(t, btrfs.Commit(inRepo, "commit2", "master")) // Run the pipelines err = RunPipelines("pipeline", inRepo, outPrefix, "commit2", "master", "0-1", etcache.NewCache()) // this time the pipelines should not err require.NoError(t, err) // These are the most important 2 checks: // If this one fails it means that dirty state isn't properly saved checkExists(t, path.Join(outPrefix, "recover", "commit1-fail/bar")) // If this one fails it means that dirty state isn't properly cleared checkNoExists(t, path.Join(outPrefix, "recover", "commit2-0/bar")) // These commits are mostly covered by other tests checkExists(t, path.Join(outPrefix, "recover", "commit1-fail/foo")) checkExists(t, path.Join(outPrefix, "recover", "commit1-0/foo")) checkNoExists(t, path.Join(outPrefix, "recover", "commit1-1")) checkNoExists(t, path.Join(outPrefix, "recover", "commit1")) checkExists(t, path.Join(outPrefix, "recover", "commit2-0/foo")) checkExists(t, path.Join(outPrefix, "recover", "commit2-1/foo")) checkExists(t, path.Join(outPrefix, "recover", "commit2-1/bar")) checkExists(t, path.Join(outPrefix, "recover", "commit2/foo")) checkExists(t, path.Join(outPrefix, "recover", "commit2/bar")) }
// TestChess uses our chess data set to test s3 integration. func TestChess(t *testing.T) { t.Parallel() if testing.Short() { t.Skip() } // Notice this shard is behaving like 1 node of a 5000 node cluster to downsample to data. shard := NewShard("", "TestChessData", "TestChessPipelines", 0, 5000, etcache.NewCache()) require.NoError(t, shard.EnsureRepos()) s := httptest.NewServer(NewShardHTTPHandler(shard)) defer s.Close() res, err := http.Post(s.URL+"/pipeline/count", "application/text", strings.NewReader(` image ubuntu input s3://pachyderm-data/chess run cat /in/pachyderm-data/chess/* | wc -l > /out/count `)) require.NoError(t, err) res.Body.Close() res, err = http.Post(s.URL+"/commit?commit=commit1", "", nil) require.NoError(t, err) res, err = http.Get(s.URL + "/pipeline/count/file/count?commit=commit1") require.NoError(t, err) require.Equal(t, http.StatusOK, res.StatusCode) }
func TestCancel(t *testing.T) { t.Parallel() inRepo := "TestCancel_in" require.NoError(t, btrfs.Init(inRepo)) outPrefix := "TestCancel_out" // Create the Pachfile require.NoError(t, btrfs.WriteFile(path.Join(inRepo, "master", "pipeline", "cancel"), []byte(` image ubuntu run sleep 100 `))) require.NoError(t, btrfs.Commit(inRepo, "commit", "master")) r := NewRunner("pipeline", inRepo, outPrefix, "commit", "master", "0-1", etcache.NewCache()) go func() { err := r.Run() require.Equal(t, ErrCancelled, err) }() // This is just to make sure we don't trigger the early exit case in Run // and actually exercise the code. time.Sleep(time.Second * 2) require.NoError(t, r.Cancel()) }
func TestDependency(t *testing.T) { t.Parallel() inRepo := "TestDependency_in" require.NoError(t, btrfs.Init(inRepo)) p1 := ` image ubuntu run echo foo >/out/foo ` require.NoError(t, btrfs.WriteFile(path.Join(inRepo, "master", "pipeline", "p1"), []byte(p1))) p2 := ` image ubuntu input pps://p1 run cp /in/p1/foo /out/foo ` require.NoError(t, btrfs.WriteFile(path.Join(inRepo, "master", "pipeline", "p2"), []byte(p2))) require.NoError(t, btrfs.Commit(inRepo, "commit", "master")) outPrefix := "TestDependency" runner := NewRunner("pipeline", inRepo, outPrefix, "commit", "master", "0-1", etcache.NewCache()) require.NoError(t, runner.Run()) res, err := btrfs.ReadFile(path.Join(outPrefix, "p2", "commit", "foo")) require.NoError(t, err) require.Equal(t, "foo\n", string(res)) }
func TestRunnerInputs(t *testing.T) { t.Parallel() inRepo := "TestRunnerInputs_in" require.NoError(t, btrfs.Init(inRepo)) p1 := ` image ubuntu input foo input bar ` require.NoError(t, btrfs.WriteFile(path.Join(inRepo, "master", "pipeline", "p1"), []byte(p1))) p2 := ` image ubuntu input fizz input buzz ` require.NoError(t, btrfs.WriteFile(path.Join(inRepo, "master", "pipeline", "p2"), []byte(p2))) require.NoError(t, btrfs.Commit(inRepo, "commit", "master")) outPrefix := "TestRunnerInputs" runner := NewRunner("pipeline", inRepo, outPrefix, "commit", "master", "0-1", etcache.NewCache()) inputs, err := runner.Inputs() require.NoError(t, err) require.Equal(t, []string{"foo", "bar", "fizz", "buzz"}, inputs) }
// TestError makes sure that we handle commands that error correctly. func TestError(t *testing.T) { t.Parallel() inRepo := "TestError_in" require.NoError(t, btrfs.Init(inRepo)) outPrefix := "TestError_out" // Create the Pachfile require.NoError(t, btrfs.WriteFile(path.Join(inRepo, "master", "pipeline", "error"), []byte(` image ubuntu run touch /out/foo run cp /in/foo /out/bar `))) // Last line should fail here. // Commit to the inRepo require.NoError(t, btrfs.Commit(inRepo, "commit", "master")) err := RunPipelines("pipeline", inRepo, outPrefix, "commit", "master", "0-1", etcache.NewCache()) require.Error(t, err, "Running pipeline should error.") // Check that foo exists exists, err := btrfs.FileExists(path.Join(outPrefix, "error", "commit-0", "foo")) require.NoError(t, err) require.True(t, exists, "File foo should exist.") // Check that commit doesn't exist exists, err = btrfs.FileExists(path.Join(outPrefix, "error", "commit")) require.NoError(t, err) require.False(t, exists, "Commit \"commit\" should not get created when a command fails.") }
func RouterFromArgs() (*Router, error) { modulos, err := strconv.ParseUint(os.Args[1], 10, 32) if err != nil { return nil, err } return NewRouter(modulos, etcache.NewCache()), nil }
func TestVersion(t *testing.T) { t.Parallel() shard := NewShard("", "TestVersionData", "TestVersionPipelines", 0, 1, etcache.NewCache()) require.NoError(t, shard.EnsureRepos()) s := httptest.NewServer(NewShardHTTPHandler(shard)) defer s.Close() res, err := http.Get(s.URL + "/version") require.NoError(t, err) checkAndCloseHTTPResponseBody(t, res, fmt.Sprintf("%s\n", common.VersionString())) }
func TestPing(t *testing.T) { t.Parallel() shard := NewShard("", "TestPingData", "TestPingPipelines", 0, 1, etcache.NewCache()) require.NoError(t, shard.EnsureRepos()) s := httptest.NewServer(NewShardHTTPHandler(shard)) defer s.Close() res, err := http.Get(s.URL + "/ping") require.NoError(t, err) checkAndCloseHTTPResponseBody(t, res, "pong\n") }
// TestInject tests that s3 injections works func TestInject(t *testing.T) { t.Parallel() outRepo := "TestInject_out" require.NoError(t, btrfs.Init(outRepo)) pipeline := newPipeline("output", "", outRepo, "commit", "master", "0-1", "", etcache.NewCache()) require.NoError(t, pipeline.inject("s3://pachyderm-test/pipeline")) require.NoError(t, pipeline.finish()) res, err := btrfs.ReadFile(path.Join(outRepo, "commit", "file")) require.NoError(t, err) require.Equal(t, "foo\n", string(res)) }
func TestShardFilter(t *testing.T) { t.Parallel() shard := NewShard("", "TestShardFilterData", "TestShardFilterPipelines", 0, 1, etcache.NewCache()) require.NoError(t, shard.EnsureRepos()) s := httptest.NewServer(NewShardHTTPHandler(shard)) defer s.Close() res, err := http.Post(s.URL+"/pipeline/files", "application/text", strings.NewReader(` image ubuntu run touch /out/foo run touch /out/bar run touch /out/buzz run touch /out/bizz `)) require.NoError(t, err) res.Body.Close() res, err = http.Post(s.URL+"/commit?commit=commit1", "", nil) require.NoError(t, err) // Map to store files we receive files := make(map[string]struct{}) res, err = http.Get(s.URL + path.Join("/pipeline", "files", "file", "*") + "?commit=commit1&shard=0-2") require.NoError(t, err) require.Equal(t, http.StatusOK, res.StatusCode) reader := multipart.NewReader(res.Body, res.Header.Get("Boundary")) for p, err := reader.NextPart(); err != io.EOF; p, err = reader.NextPart() { require.NoError(t, err) match, err := route.Match(p.FileName(), "0-2") require.NoError(t, err) require.True(t, match, fmt.Sprintf("%s should match", p.FileName())) if _, ok := files[p.FileName()]; ok == true { t.Fatalf("File: %s received twice.", p.FileName()) } files[p.FileName()] = struct{}{} } res, err = http.Get(s.URL + path.Join("/pipeline", "files", "file", "*") + "?commit=commit1&shard=1-2") require.NoError(t, err) require.Equal(t, http.StatusOK, res.StatusCode) reader = multipart.NewReader(res.Body, res.Header.Get("Boundary")) for p, err := reader.NextPart(); err != io.EOF; p, err = reader.NextPart() { require.NoError(t, err) match, err := route.Match(p.FileName(), "1-2") require.NoError(t, err) require.True(t, match, fmt.Sprintf("%s should match", p.FileName())) if _, ok := files[p.FileName()]; ok == true { t.Fatalf("File: %s received twice.", p.FileName()) } files[p.FileName()] = struct{}{} } }
func do() error { if err := btrfs.CheckVersion(); err != nil { return err } shardNum := flag.Int("shard", -1, "Optional. The shard to service.") modulos := flag.Int("modulos", 4, "The total number of shards.") address := flag.String("address", "", "Optional. The address to advertise for this node.") flag.Parse() addrs, err := net.InterfaceAddrs() if err != nil { return err } if *address == "" { // No address, we'll try to use our ip addr instead for _, addr := range addrs { if ipnet, ok := addr.(*net.IPNet); ok && !ipnet.IP.IsLoopback() { if ipnet.IP.To4() != nil { *address = ipnet.IP.String() break } } } } if *address == "" { return fmt.Errorf("pfs: Couldn't find machine ip.") } shard := storage.NewShard( "http://"+*address, fmt.Sprintf("data-%d-%d", *shardNum, *modulos), fmt.Sprintf("pipe-%d-%d", *shardNum, *modulos), uint64(*shardNum), uint64(*modulos), etcache.NewCache(), ) if *shardNum == -1 { go shard.FindRole() } else { if err := shard.EnsureRepos(); err != nil { return err } go shard.FillRole() } log.Print("Listening on port 80...") return http.ListenAndServe(":80", storage.NewShardHTTPHandler(shard)) }
func TestExternalOutput(t *testing.T) { t.Parallel() if testing.Short() { t.Skip() } outRepo := "TestExternalOutput_out" require.NoError(t, btrfs.Init(outRepo)) pipeline := newPipeline("output", "", outRepo, "commit", "master", "0-1", "", etcache.NewCache()) require.NoError(t, pipeline.output("s3://pachyderm-test/pipeline-out")) pachfile := ` image ubuntu output s3://pachyderm-test/pipeline-output run echo foo >/out/foo ` require.NoError(t, pipeline.runPachFile(strings.NewReader(pachfile))) }
// TestPipeline creates a basic pipeline on a shard. func TestPipeline(t *testing.T) { t.Parallel() shard := NewShard("", "TestPipelineData", "TestPipelinePipelines", 0, 1, etcache.NewCache()) require.NoError(t, shard.EnsureRepos()) s := httptest.NewServer(NewShardHTTPHandler(shard)) defer s.Close() res, err := http.Post(s.URL+"/pipeline/touch_foo", "application/text", strings.NewReader(` image ubuntu run touch /out/foo `)) require.NoError(t, err) res.Body.Close() res, err = http.Post(s.URL+"/commit?commit=commit1", "", nil) require.NoError(t, err) checkFile(t, s.URL+"/pipeline/touch_foo", "foo", "commit1", "") }
func TestFail(t *testing.T) { t.Parallel() shard := NewShard("", "TestFailData", "TestFailPipelines", 0, 1, etcache.NewCache()) require.NoError(t, shard.EnsureRepos()) s := httptest.NewServer(NewShardHTTPHandler(shard)) defer s.Close() res, err := http.Post(s.URL+"/pipeline/fail", "application/text", strings.NewReader(` image ubuntu run touch /out/foo run exit 1 `)) require.NoError(t, err) res.Body.Close() res, err = http.Post(s.URL+"/commit?commit=commit1", "", nil) require.NoError(t, err) res, err = http.Get(s.URL + "/pipeline/fail/file/foo?commit=commit1") require.NoError(t, err) require.Equal(t, http.StatusInternalServerError, res.StatusCode) }
func newTestPipeline( t *testing.T, repoPrefix string, commit string, branch string, shard string, init bool, ) *pipeline { if init { require.NoError(t, btrfs.Init(repoPrefix+"-in")) require.NoError(t, btrfs.Init(repoPrefix+"-out")) } return newPipeline( "pipeline", repoPrefix+"-in", repoPrefix+"-out", commit, branch, shard, "pipelineDir", etcache.NewCache(), ) }
// TestWrap tests a simple pipeline that uses line wrapping in it's Pachfile func TestWrap(t *testing.T) { t.Parallel() outRepo := "TestWrap_out" require.NoError(t, btrfs.Init(outRepo)) pipeline := newPipeline("output", "", outRepo, "commit", "master", "0-1", "", etcache.NewCache()) pachfile := ` image ubuntu # touch foo and bar run touch /out/foo \ /out/bar ` err := pipeline.runPachFile(strings.NewReader(pachfile)) require.NoError(t, err) exists, err := btrfs.FileExists(path.Join(outRepo, "commit", "foo")) require.NoError(t, err) require.True(t, exists, "File `foo` doesn't exist when it should.") exists, err = btrfs.FileExists(path.Join(outRepo, "commit", "bar")) require.NoError(t, err) require.True(t, exists, "File `bar` doesn't exist when it should.") }
func TestBasic(t *testing.T) { t.Parallel() c := 0 f := func(w traffic.Workload) bool { shard := NewShard("", fmt.Sprintf("TestBasic%d", c), fmt.Sprintf("TestBasicPipelines%d", c), 0, 1, etcache.NewCache()) c++ require.NoError(t, shard.EnsureRepos()) s := httptest.NewServer(NewShardHTTPHandler(shard)) defer s.Close() RunWorkload(t, s.URL, w) facts := w.Facts() RunWorkload(t, s.URL, facts) return true } if err := quick.Check(f, &quick.Config{MaxCount: getMaxCount()}); err != nil { t.Error(err) } }
func TestPull(t *testing.T) { t.Parallel() c := 0 f := func(w traffic.Workload) bool { _src := NewShard("", fmt.Sprintf("TestPullSrc%d", c), fmt.Sprintf("TestPullSrcPipelines%d", c), 0, 1, etcache.NewCache()) _dst := NewShard("", fmt.Sprintf("TestPullDst%d", c), fmt.Sprintf("TestPullDstPipelines%d", c), 0, 1, etcache.NewCache()) c++ require.NoError(t, _src.EnsureRepos()) require.NoError(t, _dst.EnsureRepos()) src := httptest.NewServer(NewShardHTTPHandler(_src)) dst := httptest.NewServer(NewShardHTTPHandler(_dst)) defer src.Close() defer dst.Close() RunWorkload(t, src.URL, w) // Replicate the data srcReplica := newShardReplica(src.URL) dstReplica := newShardReplica(dst.URL) err := srcReplica.Pull("", dstReplica) require.NoError(t, err) facts := w.Facts() RunWorkload(t, dst.URL, facts) return true } if err := quick.Check(f, &quick.Config{MaxCount: getMaxCount()}); err != nil { t.Error(err) } }
// TestPipelines runs a 2 step pipeline. func TestPipelines(t *testing.T) { t.Parallel() inRepo := "TestPipelines_in" require.NoError(t, btrfs.Init(inRepo)) outPrefix := "TestPipelines_out" // Create a data file: require.NoError(t, btrfs.WriteFile(path.Join(inRepo, "master", "data", "foo"), []byte("foo"))) // Create the Pachfile require.NoError(t, btrfs.WriteFile(path.Join(inRepo, "master", "pipeline", "cp"), []byte(` image ubuntu input data run cp /in/data/foo /out/foo run echo "foo" `))) require.NoError(t, btrfs.Commit(inRepo, "commit", "master")) require.NoError(t, RunPipelines("pipeline", inRepo, outPrefix, "commit", "master", "0-1", etcache.NewCache())) data, err := btrfs.ReadFile(path.Join(outPrefix, "cp", "commit", "foo")) require.NoError(t, err) require.Equal(t, "foo", string(data)) }
// TestSyncFrom func TestSyncFrom(t *testing.T) { t.Parallel() c := 0 f := func(w traffic.Workload) bool { _src := NewShard("", fmt.Sprintf("TestSyncFromSrc%d", c), fmt.Sprintf("TestSyncFromSrcPipelines%d", c), 0, 1, etcache.NewCache()) _dst := NewShard("", fmt.Sprintf("TestSyncFromDst%d", c), fmt.Sprintf("TestSyncFromDstPipelines%d", c), 0, 1, etcache.NewCache()) require.NoError(t, _src.EnsureRepos()) require.NoError(t, _dst.EnsureRepos()) src := httptest.NewServer(NewShardHTTPHandler(_src)) dst := httptest.NewServer(NewShardHTTPHandler(_dst)) defer src.Close() defer dst.Close() for _, o := range w { runOp(t, src.URL, o) if o.Object == traffic.Commit { // Replicate the data err := syncFrom(fmt.Sprintf("TestSyncFromDst%d", c), []string{src.URL}) require.NoError(t, err) } } facts := w.Facts() RunWorkload(t, dst.URL, facts) c++ return true } if err := quick.Check(f, &quick.Config{MaxCount: getMaxCount()}); err != nil { t.Error(err) } }