Beispiel #1
0
func TestShardFilter(t *testing.T) {
	t.Parallel()
	shard := NewShard("", "TestShardFilterData", "TestShardFilterPipelines", 0, 1, etcache.NewCache())
	require.NoError(t, shard.EnsureRepos())
	s := httptest.NewServer(NewShardHTTPHandler(shard))
	defer s.Close()

	res, err := http.Post(s.URL+"/pipeline/files", "application/text", strings.NewReader(`
image ubuntu

run touch /out/foo
run touch /out/bar
run touch /out/buzz
run touch /out/bizz
`))
	require.NoError(t, err)
	res.Body.Close()

	res, err = http.Post(s.URL+"/commit?commit=commit1", "", nil)
	require.NoError(t, err)

	// Map to store files we receive
	files := make(map[string]struct{})
	res, err = http.Get(s.URL + path.Join("/pipeline", "files", "file", "*") + "?commit=commit1&shard=0-2")
	require.NoError(t, err)
	require.Equal(t, http.StatusOK, res.StatusCode)
	reader := multipart.NewReader(res.Body, res.Header.Get("Boundary"))
	for p, err := reader.NextPart(); err != io.EOF; p, err = reader.NextPart() {
		require.NoError(t, err)
		match, err := route.Match(p.FileName(), "0-2")
		require.NoError(t, err)
		require.True(t, match, fmt.Sprintf("%s should match", p.FileName()))
		if _, ok := files[p.FileName()]; ok == true {
			t.Fatalf("File: %s received twice.", p.FileName())
		}
		files[p.FileName()] = struct{}{}
	}

	res, err = http.Get(s.URL + path.Join("/pipeline", "files", "file", "*") + "?commit=commit1&shard=1-2")
	require.NoError(t, err)
	require.Equal(t, http.StatusOK, res.StatusCode)
	reader = multipart.NewReader(res.Body, res.Header.Get("Boundary"))
	for p, err := reader.NextPart(); err != io.EOF; p, err = reader.NextPart() {
		require.NoError(t, err)
		match, err := route.Match(p.FileName(), "1-2")
		require.NoError(t, err)
		require.True(t, match, fmt.Sprintf("%s should match", p.FileName()))
		if _, ok := files[p.FileName()]; ok == true {
			t.Fatalf("File: %s received twice.", p.FileName())
		}
		files[p.FileName()] = struct{}{}
	}
}
Beispiel #2
0
func (s *shard) PipelineFileGetAll(pipelineName string, fileName string, commit string, shard string) ([]File, error) {
	matches, err := btrfs.Glob(path.Join(s.pipelinePrefix, pipelineName, commit, fileName))
	if err != nil {
		return nil, err
	}
	var result []File
	for _, match := range matches {
		prefix := path.Join("/", s.pipelinePrefix, pipelineName, commit)
		if !strings.HasSuffix(prefix, "/") {
			prefix = prefix + "/"
		}
		name := strings.TrimPrefix(match, prefix)
		if shard != "" {
			ok, err := route.Match(name, shard)
			if err != nil {
				return nil, err
			}
			if !ok {
				continue
			}
		}
		file, err := s.PipelineFileGet(pipelineName, name, commit)
		if err == ErrIsDirectory {
			continue
		}
		if err != nil {
			return nil, err
		}
		result = append(result, file)
	}
	return result, nil
}
Beispiel #3
0
func TestShuffle(t *testing.T) {
	t.Parallel()
	cache := etcache.NewTestCache()
	// Setup 2 shards
	shard1 := NewShard("", "TestShuffleData-0-2", "TestShufflePipelines-0-2", 0, 2, cache)
	require.NoError(t, shard1.EnsureRepos())
	s1 := httptest.NewServer(NewShardHTTPHandler(shard1))
	defer s1.Close()
	shard2 := NewShard("", "TestShuffleData-1-2", "TestShufflePipelines-1-2", 1, 2, cache)
	require.NoError(t, shard2.EnsureRepos())
	s2 := httptest.NewServer(NewShardHTTPHandler(shard2))
	defer s2.Close()

	files := []string{"foo", "bar", "fizz", "buzz"}

	for _, file := range files {
		checkWriteFile(t, s1.URL, path.Join("data", file), "master", file)
		checkWriteFile(t, s2.URL, path.Join("data", file), "master", file)
	}

	// Spoof the shards in etcache
	cache.SpoofMany("/pfs/master", []string{s1.URL, s2.URL}, false)

	pipeline := `
image ubuntu

input data

run cp -r /in/data /out

shuffle data
`
	res, err := http.Post(s1.URL+"/pipeline/shuffle", "application/text", strings.NewReader(pipeline))
	require.NoError(t, err)
	res.Body.Close()
	res, err = http.Post(s2.URL+"/pipeline/shuffle", "application/text", strings.NewReader(pipeline))
	require.NoError(t, err)
	res.Body.Close()

	res, err = http.Post(s1.URL+"/commit?commit=commit1", "", nil)
	require.NoError(t, err)
	res, err = http.Post(s2.URL+"/commit?commit=commit1", "", nil)
	require.NoError(t, err)

	for _, file := range files {
		match, err := route.Match(path.Join("data", file), "0-2")
		require.NoError(t, err)
		if match {
			log.Print("shard: s1 file: ", file)
			checkFile(t, s1.URL+"/pipeline/shuffle", path.Join("data", file), "commit1", file+file)
		} else {
			log.Print("shard: s2 file: ", file)
			checkFile(t, s2.URL+"/pipeline/shuffle", path.Join("data", file), "commit1", file+file)
		}
	}
}
Beispiel #4
0
// inject injects data from an external source into the output directory
func (p *pipeline) inject(name string, public bool) error {
	switch {
	case strings.HasPrefix(name, "s3://"):
		bucket, err := s3utils.GetBucket(name)
		if err != nil {
			return err
		}
		client := s3utils.NewClient(public)
		var wg sync.WaitGroup
		s3utils.ForEachFile(name, public, "", func(file string, modtime time.Time) error {
			// Grab the path, it's handy later
			_path, err := s3utils.GetPath(name)
			if err != nil {
				return err
			}
			if err != nil {
				return err
			}
			// Check if the file belongs on shit shard
			match, err := route.Match(file, p.shard)
			if err != nil {
				return err
			}
			if !match {
				return nil
			}
			// Check if the file has changed
			changed, err := btrfs.Changed(path.Join(p.outRepo, p.branch,
				strings.TrimPrefix(file, _path)), modtime)
			if err != nil {
				return err
			}
			if !changed {
				return nil
			}
			// TODO match the on disk timestamps to s3's timestamps and make
			// sure we only pull data that has changed
			wg.Add(1)
			go func() {
				defer wg.Done()
				response, err := client.GetObject(&s3.GetObjectInput{
					Bucket: &bucket,
					Key:    &file,
				})
				if err != nil {
					return
				}

				src := response.Body
				dst, err := btrfs.CreateAll(path.Join(p.outRepo, p.branch, strings.TrimPrefix(file, _path)))
				if err != nil {
					return
				}
				defer dst.Close()
				_, err = io.Copy(dst, src)
				if err != nil {
					return
				}
				err = btrfs.Chtimes(path.Join(p.outRepo, p.branch, strings.TrimPrefix(file, _path)), modtime, modtime)
				if err != nil {
					return
				}
			}()
			return nil
		})
		wg.Wait()
	default:
		log.Print("Unknown protocol: ", name)
		return ErrUnknownProtocol
	}
	return nil
}