func TestWordCount(t *testing.T) { t.Parallel() cache := etcache.NewTestCache() // Setup 2 shards shard1 := NewShard("", "TestWordCountData-0-2", "TestWordCountPipelines-0-2", 0, 2, cache) require.NoError(t, shard1.EnsureRepos()) s1 := httptest.NewServer(NewShardHTTPHandler(shard1)) defer s1.Close() shard2 := NewShard("", "TestWordCountData-1-2", "TestWordCountPipelines-1-2", 1, 2, cache) require.NoError(t, shard2.EnsureRepos()) s2 := httptest.NewServer(NewShardHTTPHandler(shard2)) defer s2.Close() checkWriteFile(t, s1.URL, path.Join("data", "1"), "master", `Mr and Mrs Dursley, of number four, Privet Drive, were proud to say that they were perfectly normal, thank you very much. They were the last people you'd expect to be involved in anything strange or mysterious, because they just didn't hold with such nonsense.`) checkWriteFile(t, s2.URL, path.Join("data", "2"), "master", `Mr Dursley was the director of a firm called Grunnings, which made drills. He was a big, beefy man with hardly any neck, although he did have a very large moustache. Mrs Dursley was thin and blonde and had nearly twice the usual amount of neck, which came in very useful as she spent so much of her time craning over garden fences, spying on the neighbours. The Dursleys had a small son called Dudley and in their opinion there was no finer boy anywhere.`) // Spoof the shards in etcache cache.SpoofMany("/pfs/master", []string{s1.URL, s2.URL}, false) pipeline := ` image ubuntu input data run mkdir /out/counts run cat /in/data/* | tr -cs "A-Za-z'" "\n" | sort | uniq -c | sort -n -r | while read count; do echo ${count% *} >/out/counts/${count#* }; done shuffle counts run find /out/counts | while read count; do cat $count | awk '{ sum+=$1} END {print sum}' >/tmp/count; mv /tmp/count $count; done ` res, err := http.Post(s1.URL+"/pipeline/wc", "application/text", strings.NewReader(pipeline)) require.NoError(t, err) res.Body.Close() res, err = http.Post(s2.URL+"/pipeline/wc", "application/text", strings.NewReader(pipeline)) require.NoError(t, err) res.Body.Close() res, err = http.Post(s1.URL+"/commit?commit=commit1", "", nil) require.NoError(t, err) res.Body.Close() res, err = http.Post(s2.URL+"/commit?commit=commit1", "", nil) require.NoError(t, err) res.Body.Close() // There should be 3 occurances of Dursley checkFile(t, s1.URL+"/pipeline/wc", path.Join("counts", "Dursley"), "commit1", "3\n") }
func TestShuffle(t *testing.T) { t.Parallel() cache := etcache.NewTestCache() // Setup 2 shards shard1 := NewShard("", "TestShuffleData-0-2", "TestShufflePipelines-0-2", 0, 2, cache) require.NoError(t, shard1.EnsureRepos()) s1 := httptest.NewServer(NewShardHTTPHandler(shard1)) defer s1.Close() shard2 := NewShard("", "TestShuffleData-1-2", "TestShufflePipelines-1-2", 1, 2, cache) require.NoError(t, shard2.EnsureRepos()) s2 := httptest.NewServer(NewShardHTTPHandler(shard2)) defer s2.Close() files := []string{"foo", "bar", "fizz", "buzz"} for _, file := range files { checkWriteFile(t, s1.URL, path.Join("data", file), "master", file) checkWriteFile(t, s2.URL, path.Join("data", file), "master", file) } // Spoof the shards in etcache cache.SpoofMany("/pfs/master", []string{s1.URL, s2.URL}, false) pipeline := ` image ubuntu input data run cp -r /in/data /out shuffle data ` res, err := http.Post(s1.URL+"/pipeline/shuffle", "application/text", strings.NewReader(pipeline)) require.NoError(t, err) res.Body.Close() res, err = http.Post(s2.URL+"/pipeline/shuffle", "application/text", strings.NewReader(pipeline)) require.NoError(t, err) res.Body.Close() res, err = http.Post(s1.URL+"/commit?commit=commit1", "", nil) require.NoError(t, err) res, err = http.Post(s2.URL+"/commit?commit=commit1", "", nil) require.NoError(t, err) for _, file := range files { match, err := route.Match(path.Join("data", file), "0-2") require.NoError(t, err) if match { log.Print("shard: s1 file: ", file) checkFile(t, s1.URL+"/pipeline/shuffle", path.Join("data", file), "commit1", file+file) } else { log.Print("shard: s2 file: ", file) checkFile(t, s2.URL+"/pipeline/shuffle", path.Join("data", file), "commit1", file+file) } } }
func TestWordCount(t *testing.T) { t.Parallel() maxCount := 2 if testing.Short() { maxCount = 1 } // First setup the WordCount pipeline pipeline := ` image ubuntu input data run mkdir -p /out/counts run cat /in/data/* | tr -cs "A-Za-z'" "\n" | sort | uniq -c | sort -n -r | while read count; do echo ${count% *} >/out/counts/${count#* }; done shuffle counts run find /out/counts | while read count; do cat $count | awk '{ sum+=$1} END {print sum}' >/tmp/count; mv /tmp/count $count; done ` // used to prevent collisions counter := 0 f := func(w traffic.Workload) bool { defer func() { counter++ }() cluster := newCluster(t, fmt.Sprintf("TestWordCount-%d", counter), 4, etcache.NewTestCache()) defer cluster.Close() // Run the workload storage.RunWorkload(t, cluster.router.URL, w) // Install the pipeline response, err := http.Post(cluster.router.URL+"/pipeline/wc", "application/text", strings.NewReader(pipeline)) defer response.Body.Close() assert.NoError(t, err) // Make a commit response, err = http.Post(cluster.router.URL+"/commit?commit=commit1", "", nil) defer response.Body.Close() assert.NoError(t, err) // TODO(jd) make this check for correctness, not just that the request // completes. It's a bit hard because the input is random. Probably the // right idea is to modify the traffic package so that it keeps track of // this. response, err = http.Get(cluster.router.URL + "/pipeline/wc/file/counts/*?commit=commit1") defer response.Body.Close() assert.NoError(t, err) require.Equal(t, http.StatusOK, response.StatusCode) return true } if err := quick.Check(f, &quick.Config{MaxCount: maxCount}); err != nil { t.Error(err) } }
func TestTwoShards(t *testing.T) { t.Parallel() maxCount := 5 if testing.Short() { maxCount = 1 } // used to prevent collisions counter := 0 f := func(w traffic.Workload) bool { defer func() { counter++ }() cluster := newCluster(t, fmt.Sprintf("TestTwoShards-%d", counter), 2, etcache.NewTestCache()) defer cluster.Close() // Run the workload storage.RunWorkload(t, cluster.router.URL, w) // Make sure we see the changes we should facts := w.Facts() storage.RunWorkload(t, cluster.router.URL, facts) //increment the counter return true } if err := quick.Check(f, &quick.Config{MaxCount: maxCount}); err != nil { t.Error(err) } }