Beispiel #1
0
// TestRecover runs a pipeline with an error. Then fixes the pipeline to not
// include an error and reruns it.
func TestRecover(t *testing.T) {
	t.Parallel()
	inRepo := "TestRecover_in"
	require.NoError(t, btrfs.Init(inRepo))
	outPrefix := "TestRecover_out"

	// Create the Pachfile
	require.NoError(t, btrfs.WriteFile(path.Join(inRepo, "master", "pipeline", "recover"), []byte(`
image ubuntu

run touch /out/foo
run touch /out/bar && cp /in/foo /out/bar
`)))
	// Last line should fail here.

	// Commit to the inRepo
	require.NoError(t, btrfs.Commit(inRepo, "commit1", "master"))

	// Run the pipelines
	err := RunPipelines("pipeline", inRepo, outPrefix, "commit1", "master", "0-1", etcache.NewCache())
	require.Error(t, err, "Running pipeline should error.")

	// Fix the Pachfile
	require.NoError(t, btrfs.WriteFile(path.Join(inRepo, "master", "pipeline", "recover"), []byte(`
image ubuntu

run touch /out/foo
run touch /out/bar
`)))

	// Commit to the inRepo
	require.NoError(t, btrfs.Commit(inRepo, "commit2", "master"))

	// Run the pipelines
	err = RunPipelines("pipeline", inRepo, outPrefix, "commit2", "master", "0-1", etcache.NewCache())
	// this time the pipelines should not err
	require.NoError(t, err)

	// These are the most important 2 checks:

	// If this one fails it means that dirty state isn't properly saved
	checkExists(t, path.Join(outPrefix, "recover", "commit1-fail/bar"))
	// If this one fails it means that dirty state isn't properly cleared
	checkNoExists(t, path.Join(outPrefix, "recover", "commit2-0/bar"))

	// These commits are mostly covered by other tests
	checkExists(t, path.Join(outPrefix, "recover", "commit1-fail/foo"))
	checkExists(t, path.Join(outPrefix, "recover", "commit1-0/foo"))
	checkNoExists(t, path.Join(outPrefix, "recover", "commit1-1"))
	checkNoExists(t, path.Join(outPrefix, "recover", "commit1"))
	checkExists(t, path.Join(outPrefix, "recover", "commit2-0/foo"))
	checkExists(t, path.Join(outPrefix, "recover", "commit2-1/foo"))
	checkExists(t, path.Join(outPrefix, "recover", "commit2-1/bar"))
	checkExists(t, path.Join(outPrefix, "recover", "commit2/foo"))
	checkExists(t, path.Join(outPrefix, "recover", "commit2/bar"))
}
Beispiel #2
0
// TestChess uses our chess data set to test s3 integration.
func TestChess(t *testing.T) {
	t.Parallel()
	if testing.Short() {
		t.Skip()
	}
	// Notice this shard is behaving like 1 node of a 5000 node cluster to downsample to data.
	shard := NewShard("", "TestChessData", "TestChessPipelines", 0, 5000, etcache.NewCache())
	require.NoError(t, shard.EnsureRepos())
	s := httptest.NewServer(NewShardHTTPHandler(shard))
	defer s.Close()

	res, err := http.Post(s.URL+"/pipeline/count", "application/text", strings.NewReader(`
image ubuntu

input s3://pachyderm-data/chess

run cat /in/pachyderm-data/chess/* | wc -l > /out/count
`))
	require.NoError(t, err)
	res.Body.Close()
	res, err = http.Post(s.URL+"/commit?commit=commit1", "", nil)
	require.NoError(t, err)
	res, err = http.Get(s.URL + "/pipeline/count/file/count?commit=commit1")
	require.NoError(t, err)
	require.Equal(t, http.StatusOK, res.StatusCode)
}
Beispiel #3
0
func TestCancel(t *testing.T) {
	t.Parallel()
	inRepo := "TestCancel_in"
	require.NoError(t, btrfs.Init(inRepo))
	outPrefix := "TestCancel_out"

	// Create the Pachfile
	require.NoError(t, btrfs.WriteFile(path.Join(inRepo, "master", "pipeline", "cancel"), []byte(`
image ubuntu

run sleep 100
`)))
	require.NoError(t, btrfs.Commit(inRepo, "commit", "master"))

	r := NewRunner("pipeline", inRepo, outPrefix, "commit", "master", "0-1", etcache.NewCache())
	go func() {
		err := r.Run()
		require.Equal(t, ErrCancelled, err)
	}()

	// This is just to make sure we don't trigger the early exit case in Run
	// and actually exercise the code.
	time.Sleep(time.Second * 2)
	require.NoError(t, r.Cancel())
}
Beispiel #4
0
func TestDependency(t *testing.T) {
	t.Parallel()
	inRepo := "TestDependency_in"
	require.NoError(t, btrfs.Init(inRepo))
	p1 := `
image ubuntu

run echo foo >/out/foo
`
	require.NoError(t, btrfs.WriteFile(path.Join(inRepo, "master", "pipeline", "p1"), []byte(p1)))
	p2 := `
image ubuntu

input pps://p1

run cp /in/p1/foo /out/foo
`
	require.NoError(t, btrfs.WriteFile(path.Join(inRepo, "master", "pipeline", "p2"), []byte(p2)))
	require.NoError(t, btrfs.Commit(inRepo, "commit", "master"))

	outPrefix := "TestDependency"
	runner := NewRunner("pipeline", inRepo, outPrefix, "commit", "master", "0-1", etcache.NewCache())
	require.NoError(t, runner.Run())

	res, err := btrfs.ReadFile(path.Join(outPrefix, "p2", "commit", "foo"))
	require.NoError(t, err)
	require.Equal(t, "foo\n", string(res))
}
Beispiel #5
0
func TestRunnerInputs(t *testing.T) {
	t.Parallel()
	inRepo := "TestRunnerInputs_in"
	require.NoError(t, btrfs.Init(inRepo))
	p1 := `
image ubuntu

input foo
input bar
`
	require.NoError(t, btrfs.WriteFile(path.Join(inRepo, "master", "pipeline", "p1"), []byte(p1)))
	p2 := `
image ubuntu

input fizz
input buzz
`
	require.NoError(t, btrfs.WriteFile(path.Join(inRepo, "master", "pipeline", "p2"), []byte(p2)))
	require.NoError(t, btrfs.Commit(inRepo, "commit", "master"))

	outPrefix := "TestRunnerInputs"
	runner := NewRunner("pipeline", inRepo, outPrefix, "commit", "master", "0-1", etcache.NewCache())
	inputs, err := runner.Inputs()
	require.NoError(t, err)
	require.Equal(t, []string{"foo", "bar", "fizz", "buzz"}, inputs)
}
Beispiel #6
0
// TestError makes sure that we handle commands that error correctly.
func TestError(t *testing.T) {
	t.Parallel()
	inRepo := "TestError_in"
	require.NoError(t, btrfs.Init(inRepo))
	outPrefix := "TestError_out"

	// Create the Pachfile
	require.NoError(t, btrfs.WriteFile(path.Join(inRepo, "master", "pipeline", "error"), []byte(`
image ubuntu

run touch /out/foo
run cp /in/foo /out/bar
`)))
	// Last line should fail here.

	// Commit to the inRepo
	require.NoError(t, btrfs.Commit(inRepo, "commit", "master"))

	err := RunPipelines("pipeline", inRepo, outPrefix, "commit", "master", "0-1", etcache.NewCache())
	require.Error(t, err, "Running pipeline should error.")

	// Check that foo exists
	exists, err := btrfs.FileExists(path.Join(outPrefix, "error", "commit-0", "foo"))
	require.NoError(t, err)
	require.True(t, exists, "File foo should exist.")

	// Check that commit doesn't exist
	exists, err = btrfs.FileExists(path.Join(outPrefix, "error", "commit"))
	require.NoError(t, err)
	require.False(t, exists, "Commit \"commit\" should not get created when a command fails.")
}
Beispiel #7
0
func RouterFromArgs() (*Router, error) {
	modulos, err := strconv.ParseUint(os.Args[1], 10, 32)

	if err != nil {
		return nil, err
	}
	return NewRouter(modulos, etcache.NewCache()), nil
}
Beispiel #8
0
func TestVersion(t *testing.T) {
	t.Parallel()
	shard := NewShard("", "TestVersionData", "TestVersionPipelines", 0, 1, etcache.NewCache())
	require.NoError(t, shard.EnsureRepos())
	s := httptest.NewServer(NewShardHTTPHandler(shard))
	defer s.Close()

	res, err := http.Get(s.URL + "/version")
	require.NoError(t, err)
	checkAndCloseHTTPResponseBody(t, res, fmt.Sprintf("%s\n", common.VersionString()))
}
Beispiel #9
0
func TestPing(t *testing.T) {
	t.Parallel()
	shard := NewShard("", "TestPingData", "TestPingPipelines", 0, 1, etcache.NewCache())
	require.NoError(t, shard.EnsureRepos())
	s := httptest.NewServer(NewShardHTTPHandler(shard))
	defer s.Close()

	res, err := http.Get(s.URL + "/ping")
	require.NoError(t, err)
	checkAndCloseHTTPResponseBody(t, res, "pong\n")
}
Beispiel #10
0
// TestInject tests that s3 injections works
func TestInject(t *testing.T) {
	t.Parallel()
	outRepo := "TestInject_out"
	require.NoError(t, btrfs.Init(outRepo))
	pipeline := newPipeline("output", "", outRepo, "commit", "master", "0-1", "", etcache.NewCache())
	require.NoError(t, pipeline.inject("s3://pachyderm-test/pipeline"))
	require.NoError(t, pipeline.finish())
	res, err := btrfs.ReadFile(path.Join(outRepo, "commit", "file"))
	require.NoError(t, err)
	require.Equal(t, "foo\n", string(res))
}
Beispiel #11
0
func TestShardFilter(t *testing.T) {
	t.Parallel()
	shard := NewShard("", "TestShardFilterData", "TestShardFilterPipelines", 0, 1, etcache.NewCache())
	require.NoError(t, shard.EnsureRepos())
	s := httptest.NewServer(NewShardHTTPHandler(shard))
	defer s.Close()

	res, err := http.Post(s.URL+"/pipeline/files", "application/text", strings.NewReader(`
image ubuntu

run touch /out/foo
run touch /out/bar
run touch /out/buzz
run touch /out/bizz
`))
	require.NoError(t, err)
	res.Body.Close()

	res, err = http.Post(s.URL+"/commit?commit=commit1", "", nil)
	require.NoError(t, err)

	// Map to store files we receive
	files := make(map[string]struct{})
	res, err = http.Get(s.URL + path.Join("/pipeline", "files", "file", "*") + "?commit=commit1&shard=0-2")
	require.NoError(t, err)
	require.Equal(t, http.StatusOK, res.StatusCode)
	reader := multipart.NewReader(res.Body, res.Header.Get("Boundary"))
	for p, err := reader.NextPart(); err != io.EOF; p, err = reader.NextPart() {
		require.NoError(t, err)
		match, err := route.Match(p.FileName(), "0-2")
		require.NoError(t, err)
		require.True(t, match, fmt.Sprintf("%s should match", p.FileName()))
		if _, ok := files[p.FileName()]; ok == true {
			t.Fatalf("File: %s received twice.", p.FileName())
		}
		files[p.FileName()] = struct{}{}
	}

	res, err = http.Get(s.URL + path.Join("/pipeline", "files", "file", "*") + "?commit=commit1&shard=1-2")
	require.NoError(t, err)
	require.Equal(t, http.StatusOK, res.StatusCode)
	reader = multipart.NewReader(res.Body, res.Header.Get("Boundary"))
	for p, err := reader.NextPart(); err != io.EOF; p, err = reader.NextPart() {
		require.NoError(t, err)
		match, err := route.Match(p.FileName(), "1-2")
		require.NoError(t, err)
		require.True(t, match, fmt.Sprintf("%s should match", p.FileName()))
		if _, ok := files[p.FileName()]; ok == true {
			t.Fatalf("File: %s received twice.", p.FileName())
		}
		files[p.FileName()] = struct{}{}
	}
}
Beispiel #12
0
func do() error {
	if err := btrfs.CheckVersion(); err != nil {
		return err
	}

	shardNum := flag.Int("shard", -1, "Optional. The shard to service.")
	modulos := flag.Int("modulos", 4, "The total number of shards.")
	address := flag.String("address", "", "Optional. The address to advertise for this node.")
	flag.Parse()
	addrs, err := net.InterfaceAddrs()
	if err != nil {
		return err
	}
	if *address == "" {
		// No address, we'll try to use our ip addr instead
		for _, addr := range addrs {
			if ipnet, ok := addr.(*net.IPNet); ok && !ipnet.IP.IsLoopback() {
				if ipnet.IP.To4() != nil {
					*address = ipnet.IP.String()
					break
				}
			}
		}
	}
	if *address == "" {
		return fmt.Errorf("pfs: Couldn't find machine ip.")
	}

	shard := storage.NewShard(
		"http://"+*address,
		fmt.Sprintf("data-%d-%d", *shardNum, *modulos),
		fmt.Sprintf("pipe-%d-%d", *shardNum, *modulos),
		uint64(*shardNum),
		uint64(*modulos),
		etcache.NewCache(),
	)
	if *shardNum == -1 {
		go shard.FindRole()
	} else {
		if err := shard.EnsureRepos(); err != nil {
			return err
		}
		go shard.FillRole()
	}
	log.Print("Listening on port 80...")
	return http.ListenAndServe(":80", storage.NewShardHTTPHandler(shard))
}
Beispiel #13
0
func TestExternalOutput(t *testing.T) {
	t.Parallel()
	if testing.Short() {
		t.Skip()
	}
	outRepo := "TestExternalOutput_out"
	require.NoError(t, btrfs.Init(outRepo))
	pipeline := newPipeline("output", "", outRepo, "commit", "master", "0-1", "", etcache.NewCache())
	require.NoError(t, pipeline.output("s3://pachyderm-test/pipeline-out"))
	pachfile := `
image ubuntu
output s3://pachyderm-test/pipeline-output

run echo foo >/out/foo
`
	require.NoError(t, pipeline.runPachFile(strings.NewReader(pachfile)))
}
Beispiel #14
0
// TestPipeline creates a basic pipeline on a shard.
func TestPipeline(t *testing.T) {
	t.Parallel()
	shard := NewShard("", "TestPipelineData", "TestPipelinePipelines", 0, 1, etcache.NewCache())
	require.NoError(t, shard.EnsureRepos())
	s := httptest.NewServer(NewShardHTTPHandler(shard))
	defer s.Close()

	res, err := http.Post(s.URL+"/pipeline/touch_foo", "application/text", strings.NewReader(`
image ubuntu

run touch /out/foo
`))
	require.NoError(t, err)
	res.Body.Close()

	res, err = http.Post(s.URL+"/commit?commit=commit1", "", nil)
	require.NoError(t, err)
	checkFile(t, s.URL+"/pipeline/touch_foo", "foo", "commit1", "")
}
Beispiel #15
0
func TestFail(t *testing.T) {
	t.Parallel()
	shard := NewShard("", "TestFailData", "TestFailPipelines", 0, 1, etcache.NewCache())
	require.NoError(t, shard.EnsureRepos())
	s := httptest.NewServer(NewShardHTTPHandler(shard))
	defer s.Close()

	res, err := http.Post(s.URL+"/pipeline/fail", "application/text", strings.NewReader(`
image ubuntu

run touch /out/foo
run exit 1
`))
	require.NoError(t, err)
	res.Body.Close()
	res, err = http.Post(s.URL+"/commit?commit=commit1", "", nil)
	require.NoError(t, err)
	res, err = http.Get(s.URL + "/pipeline/fail/file/foo?commit=commit1")
	require.NoError(t, err)
	require.Equal(t, http.StatusInternalServerError, res.StatusCode)
}
Beispiel #16
0
func newTestPipeline(
	t *testing.T,
	repoPrefix string,
	commit string,
	branch string,
	shard string,
	init bool,
) *pipeline {
	if init {
		require.NoError(t, btrfs.Init(repoPrefix+"-in"))
		require.NoError(t, btrfs.Init(repoPrefix+"-out"))
	}
	return newPipeline(
		"pipeline",
		repoPrefix+"-in",
		repoPrefix+"-out",
		commit,
		branch,
		shard,
		"pipelineDir",
		etcache.NewCache(),
	)
}
Beispiel #17
0
// TestWrap tests a simple pipeline that uses line wrapping in it's Pachfile
func TestWrap(t *testing.T) {
	t.Parallel()
	outRepo := "TestWrap_out"
	require.NoError(t, btrfs.Init(outRepo))
	pipeline := newPipeline("output", "", outRepo, "commit", "master", "0-1", "", etcache.NewCache())
	pachfile := `
image ubuntu

# touch foo and bar
run touch /out/foo \
          /out/bar
`
	err := pipeline.runPachFile(strings.NewReader(pachfile))
	require.NoError(t, err)

	exists, err := btrfs.FileExists(path.Join(outRepo, "commit", "foo"))
	require.NoError(t, err)
	require.True(t, exists, "File `foo` doesn't exist when it should.")

	exists, err = btrfs.FileExists(path.Join(outRepo, "commit", "bar"))
	require.NoError(t, err)
	require.True(t, exists, "File `bar` doesn't exist when it should.")
}
Beispiel #18
0
func TestBasic(t *testing.T) {
	t.Parallel()
	c := 0
	f := func(w traffic.Workload) bool {
		shard := NewShard("", fmt.Sprintf("TestBasic%d", c), fmt.Sprintf("TestBasicPipelines%d", c), 0, 1, etcache.NewCache())
		c++
		require.NoError(t, shard.EnsureRepos())
		s := httptest.NewServer(NewShardHTTPHandler(shard))
		defer s.Close()

		RunWorkload(t, s.URL, w)
		facts := w.Facts()
		RunWorkload(t, s.URL, facts)
		return true
	}
	if err := quick.Check(f, &quick.Config{MaxCount: getMaxCount()}); err != nil {
		t.Error(err)
	}
}
Beispiel #19
0
func TestPull(t *testing.T) {
	t.Parallel()
	c := 0
	f := func(w traffic.Workload) bool {
		_src := NewShard("", fmt.Sprintf("TestPullSrc%d", c), fmt.Sprintf("TestPullSrcPipelines%d", c), 0, 1, etcache.NewCache())
		_dst := NewShard("", fmt.Sprintf("TestPullDst%d", c), fmt.Sprintf("TestPullDstPipelines%d", c), 0, 1, etcache.NewCache())
		c++
		require.NoError(t, _src.EnsureRepos())
		require.NoError(t, _dst.EnsureRepos())
		src := httptest.NewServer(NewShardHTTPHandler(_src))
		dst := httptest.NewServer(NewShardHTTPHandler(_dst))
		defer src.Close()
		defer dst.Close()

		RunWorkload(t, src.URL, w)

		// Replicate the data
		srcReplica := newShardReplica(src.URL)
		dstReplica := newShardReplica(dst.URL)
		err := srcReplica.Pull("", dstReplica)
		require.NoError(t, err)
		facts := w.Facts()
		RunWorkload(t, dst.URL, facts)
		return true
	}
	if err := quick.Check(f, &quick.Config{MaxCount: getMaxCount()}); err != nil {
		t.Error(err)
	}
}
Beispiel #20
0
// TestPipelines runs a 2 step pipeline.
func TestPipelines(t *testing.T) {
	t.Parallel()
	inRepo := "TestPipelines_in"
	require.NoError(t, btrfs.Init(inRepo))
	outPrefix := "TestPipelines_out"

	// Create a data file:
	require.NoError(t, btrfs.WriteFile(path.Join(inRepo, "master", "data", "foo"), []byte("foo")))

	// Create the Pachfile
	require.NoError(t, btrfs.WriteFile(path.Join(inRepo, "master", "pipeline", "cp"), []byte(`
image ubuntu

input data

run cp /in/data/foo /out/foo
run echo "foo"
`)))
	require.NoError(t, btrfs.Commit(inRepo, "commit", "master"))

	require.NoError(t, RunPipelines("pipeline", inRepo, outPrefix, "commit", "master", "0-1", etcache.NewCache()))

	data, err := btrfs.ReadFile(path.Join(outPrefix, "cp", "commit", "foo"))
	require.NoError(t, err)
	require.Equal(t, "foo", string(data))
}
Beispiel #21
0
// TestSyncFrom
func TestSyncFrom(t *testing.T) {
	t.Parallel()
	c := 0
	f := func(w traffic.Workload) bool {
		_src := NewShard("", fmt.Sprintf("TestSyncFromSrc%d", c), fmt.Sprintf("TestSyncFromSrcPipelines%d", c), 0, 1, etcache.NewCache())
		_dst := NewShard("", fmt.Sprintf("TestSyncFromDst%d", c), fmt.Sprintf("TestSyncFromDstPipelines%d", c), 0, 1, etcache.NewCache())
		require.NoError(t, _src.EnsureRepos())
		require.NoError(t, _dst.EnsureRepos())
		src := httptest.NewServer(NewShardHTTPHandler(_src))
		dst := httptest.NewServer(NewShardHTTPHandler(_dst))
		defer src.Close()
		defer dst.Close()

		for _, o := range w {
			runOp(t, src.URL, o)
			if o.Object == traffic.Commit {
				// Replicate the data
				err := syncFrom(fmt.Sprintf("TestSyncFromDst%d", c), []string{src.URL})
				require.NoError(t, err)
			}
		}

		facts := w.Facts()
		RunWorkload(t, dst.URL, facts)

		c++
		return true
	}
	if err := quick.Check(f, &quick.Config{MaxCount: getMaxCount()}); err != nil {
		t.Error(err)
	}
}