Example #1
0
func Show(repo string, commit string, out string) error {
	if err := subvolumeCreate(path.Join(repo, out)); err != nil {
		return err
	}
	files, err := NewIn(repo, commit)
	if err != nil {
		return err
	}
	log.Print("Files: ", files)
	var wg sync.WaitGroup
	for _, file := range files {
		wg.Add(1)
		go func(file string) {
			defer wg.Done()
			in, err := Open(path.Join(repo, commit, file))
			if err != nil {
				log.Print(err)
				return
			}
			if _, err := CreateFromReader(path.Join(repo, out, file), in); err != nil {
				log.Print(err)
				return
			}
		}(file)
	}
	wg.Wait()
	return nil
}
Example #2
0
func (s *shard) CommitCreate(name string, branch string) (Commit, error) {
	if err := btrfs.Commit(s.dataRepo, name, branch); err != nil {
		return Commit{}, err
	}
	// We lock the guard so that we can remove the oldRunner from the map
	// and add the newRunner in.
	s.guard.Lock()
	oldRunner, ok := s.runners[branch]
	newRunner := pipeline.NewRunner("pipeline", s.dataRepo, s.pipelinePrefix, name, branch, s.shardStr, s.cache)
	s.runners[branch] = newRunner
	s.guard.Unlock()
	go func() {
		// cancel oldRunner if it exists
		if ok {
			err := oldRunner.Cancel()
			if err != nil {
				log.Print(err)
			}
		}
		err := newRunner.Run()
		if err != nil {
			log.Print(err)
		}
	}()
	go s.syncToPeers()
	return s.CommitGet(name)
}
Example #3
0
// FillRole attempts to find a role in the cluster. Once on is found it
// prepares the local storage for the role and announces the shard to the rest
// of the cluster. This function will loop until `cancel` is closed.
func (s *shard) FillRole() {
	shard := fmt.Sprintf("%d-%d", s.shard, s.modulos)
	masterKey := path.Join("/pfs/master", shard)
	replicaDir := path.Join("/pfs/replica", shard)

	amMaster := false //true if we're master
	replicaKey := ""
	for {
		client := etcd.NewClient([]string{"http://172.17.42.1:4001", "http://10.1.42.1:4001"})
		defer client.Close()
		// First we attempt to become the master for this shard
		if !amMaster {
			// We're not master, so we attempt to claim it, this will error if
			// another shard is already master
			backfillingKey := "[backfilling]" + s.url
			if _, err := client.Create(masterKey, backfillingKey, 5*60); err == nil {
				// no error means we succesfully claimed master
				_ = s.syncFromPeers()
				// Attempt to finalize ourselves as master
				_, _ = client.CompareAndSwap(masterKey, s.url, 60, backfillingKey, 0)
				if err == nil {
					// no error means that we succusfully announced ourselves as master
					// Sync the new data we pulled to peers
					go s.syncToPeers()
					//Record that we're master
					amMaster = true
				}
			} else {
				log.Print(err)
			}
		} else {
			// We're already master, renew our lease
			_, err := client.CompareAndSwap(masterKey, s.url, 60, s.url, 0)
			if err != nil { // error means we failed to reclaim master
				log.Print(err)
				amMaster = false
			}
		}

		// We didn't claim master, so we add ourselves as replica instead.
		if replicaKey == "" {
			if resp, err := client.CreateInOrder(replicaDir, s.url, 60); err == nil {
				replicaKey = resp.Node.Key
				// Get ourselves up to date
				go s.syncFromPeers()
			}
		} else {
			_, err := client.CompareAndSwap(replicaKey, s.url, 60, s.url, 0)
			if err != nil {
				replicaKey = ""
			}
		}

		select {
		case <-time.After(time.Second * 45):
			continue
		}
	}
}
Example #4
0
func TestShuffle(t *testing.T) {
	t.Parallel()
	cache := etcache.NewTestCache()
	// Setup 2 shards
	shard1 := NewShard("", "TestShuffleData-0-2", "TestShufflePipelines-0-2", 0, 2, cache)
	require.NoError(t, shard1.EnsureRepos())
	s1 := httptest.NewServer(NewShardHTTPHandler(shard1))
	defer s1.Close()
	shard2 := NewShard("", "TestShuffleData-1-2", "TestShufflePipelines-1-2", 1, 2, cache)
	require.NoError(t, shard2.EnsureRepos())
	s2 := httptest.NewServer(NewShardHTTPHandler(shard2))
	defer s2.Close()

	files := []string{"foo", "bar", "fizz", "buzz"}

	for _, file := range files {
		checkWriteFile(t, s1.URL, path.Join("data", file), "master", file)
		checkWriteFile(t, s2.URL, path.Join("data", file), "master", file)
	}

	// Spoof the shards in etcache
	cache.SpoofMany("/pfs/master", []string{s1.URL, s2.URL}, false)

	pipeline := `
image ubuntu

input data

run cp -r /in/data /out

shuffle data
`
	res, err := http.Post(s1.URL+"/pipeline/shuffle", "application/text", strings.NewReader(pipeline))
	require.NoError(t, err)
	res.Body.Close()
	res, err = http.Post(s2.URL+"/pipeline/shuffle", "application/text", strings.NewReader(pipeline))
	require.NoError(t, err)
	res.Body.Close()

	res, err = http.Post(s1.URL+"/commit?commit=commit1", "", nil)
	require.NoError(t, err)
	res, err = http.Post(s2.URL+"/commit?commit=commit1", "", nil)
	require.NoError(t, err)

	for _, file := range files {
		match, err := route.Match(path.Join("data", file), "0-2")
		require.NoError(t, err)
		if match {
			log.Print("shard: s1 file: ", file)
			checkFile(t, s1.URL+"/pipeline/shuffle", path.Join("data", file), "commit1", file+file)
		} else {
			log.Print("shard: s2 file: ", file)
			checkFile(t, s2.URL+"/pipeline/shuffle", path.Join("data", file), "commit1", file+file)
		}
	}
}
Example #5
0
// FindNew returns an array of filenames that were created or modified between `from` and `to`
func FindNew(repo, from, to string) ([]string, error) {
	var files []string
	t, err := transid(repo, from)
	if err != nil {
		return files, err
	}
	c := exec.Command("btrfs", "subvolume", "find-new", FilePath(path.Join(repo, to)), t)
	err = util.CallCont(c, func(r io.Reader) error {
		scanner := bufio.NewScanner(r)
		for scanner.Scan() {
			log.Print(scanner.Text())
			// scanner.Text() looks like this:
			// inode 6683 file offset 0 len 107 disk start 0 offset 0 gen 909 flags INLINE jobs/rPqZxsaspy
			// 0     1    2    3      4 5   6   7    8     9 10     11 12 13 14     15     16
			tokens := strings.Split(scanner.Text(), " ")
			// Make sure the line is parseable as a file and the path isn't hidden.
			if len(tokens) == 17 {
				if !strings.HasPrefix(tokens[16], ".") { // check if it's a hidden file
					files = append(files, tokens[16])
				}
			} else if len(tokens) == 4 {
				continue //skip transid messages
			} else {
				return fmt.Errorf("Failed to parse find-new output.")
			}
		}
		return scanner.Err()
	})
	return files, err
}
Example #6
0
func callCont(c *exec.Cmd, cont func(io.Reader) error) error {
	_, callerFile, callerLine, _ := runtime.Caller(1)
	log.Printf("%15s:%.3d -> %s", path.Base(callerFile), callerLine, strings.Join(c.Args, " "))
	var reader io.Reader
	var err error
	if cont != nil {
		reader, err = c.StdoutPipe()
		if err != nil {
			return err
		}
	}
	stderr, err := c.StderrPipe()
	if err != nil {
		return err
	}
	if err = c.Start(); err != nil {
		return err
	}
	if cont != nil {
		if err = cont(reader); err != nil {
			return err
		}
	}
	buffer := bytes.NewBuffer(nil)
	buffer.ReadFrom(stderr)
	if buffer.Len() != 0 {
		log.Print("Command had output on stderr.\n Cmd: ", strings.Join(c.Args, " "), "\nstderr: ", buffer)
	}
	return c.Wait()
}
Example #7
0
func main() {
	if err := do(); err != nil {
		log.Print(err)
		os.Exit(1)
	}
	os.Exit(0)
}
Example #8
0
func (s *shard) FindRole() {
	client := etcd.NewClient([]string{"http://172.17.42.1:4001", "http://10.1.42.1:4001"})
	defer client.Close()
	gotRole := false
	var err error
	for ; true; <-time.After(time.Second * 45) {
		// renew our role if we have one
		if gotRole {
			_, err := client.CompareAndSwap(s.key(), s.url, 60, s.url, 0)
			if err != nil {
				log.Print(err)
			}
			continue
		}
		// figure out if we should take on a new role
		s.shard, err = s.freeRole()
		if err != nil {
			continue
		}
		_, err = client.Create(s.key(), s.url, 60)
		if err != nil {
			continue
		}
		s.dataRepo = fmt.Sprintf("data-%d-%d", s.shard, s.modulos)
		s.pipelinePrefix = fmt.Sprintf("pipe-%d-%d", s.shard, s.modulos)
		err := s.EnsureRepos()
		if err != nil {
			continue
		}
		gotRole = true
	}
}
Example #9
0
// WaitAnyFile returns as soon as ANY of the files exists.
// It returns an error if waiting for ANY of the files errors.
func WaitAnyFile(files ...string) (string, error) {
	// Channel for files that appear
	done := make(chan string, len(files))
	// Channel for errors that occur
	errors := make(chan error, len(files))

	cancellers := make([]chan struct{}, len(files))
	// Make sure that everyone gets cancelled after this function exits.
	defer func() {
		for _, c := range cancellers {
			c <- struct{}{}
		}
	}()
	for i, _ := range files {
		file := files[i]
		cancellers[i] = make(chan struct{}, 1)
		go func(i int) {
			err := WaitFile(file, cancellers[i])
			if err != nil {
				// Never blocks due to size of channel's buffer.
				errors <- err
			}
			// Never blocks due to size of channel's buffer.
			done <- file
		}(i)
	}

	select {
	case file := <-done:
		log.Print("Done: ", file)
		return file, nil
	case err := <-errors:
		return "", err
	}
}
Example #10
0
func do() error {
	log.Print("Starting up...")
	r, err := router.RouterFromArgs()
	if err != nil {
		return err
	}
	return r.RunServer()
}
Example #11
0
func (p *pipeline) clean() error {
	log.Print("p.createdCommits: ", p.createdCommits)
	for _, commit := range p.createdCommits {
		if err := btrfs.DeleteCommit(commit); err != nil {
			return err
		}
	}
	return nil
}
Example #12
0
// Image sets the image that is being used for computations.
func (p *pipeline) image(image string) error {
	p.config.Config.Image = image
	// TODO(pedge): ensure images are on machine
	err := pullImage(image)
	if err != nil {
		log.Print("assuming image is local and continuing")
	}
	return nil
}
Example #13
0
// pushToOutputs should be called after `finish` to push the outputted data to s3
func (p *pipeline) pushToOutputs() error {
	if p.externalOutput == "" {
		return nil
	}
	client := s3utils.NewClient(false)
	bucket, err := s3utils.GetBucket(p.externalOutput)
	if err != nil {
		return err
	}
	pathPrefix, err := s3utils.GetPath(p.externalOutput)
	if err != nil {
		return err
	}
	files, err := btrfs.NewIn(p.outRepo, p.commit)
	if err != nil {
		return err
	}
	var wg sync.WaitGroup
	for _, file := range files {
		wg.Add(1)
		go func(file string) {
			defer wg.Done()
			key := path.Join(pathPrefix, file)
			f, err := btrfs.Open(path.Join(p.outRepo, p.commit, file))
			if err != nil {
				log.Print(err)
				return
			}
			acl := "private"
			defer f.Close()
			if _, err = client.PutObject(&s3.PutObjectInput{
				Bucket: &bucket,
				Key:    &key,
				Body:   f,
				ACL:    &acl,
			}); err != nil {
				log.Print(err)
				return
			}
		}(file)
	}
	wg.Wait()
	return nil
}
Example #14
0
// WaitFile waits for a file to exist in the filesystem
// NOTE: You NEVER want to pass an unbuffered channel as cancel because
// WaitFile provides no guarantee that it will ever read from cancel.  Thus if
// you passed an unbuffered channel as cancel sending to the channel may block
// forever.
func WaitFile(name string, cancel chan struct{}) error {
	log.Print("WaitFile(", name, ")")
	dir, err := largestExistingPath(name)
	if err != nil {
		return err
	}
	watcher, err := fsnotify.NewWatcher()
	if err != nil {
		return err
	}
	defer watcher.Close()

	if err := watcher.Add(FilePath(dir)); err != nil {
		return err
	}

	// Notice that we check to see if the file exists AFTER we create the watcher.
	// That means if we don't see the file with this check we're guaranteed to
	// get a notification for it.
	exists, err := FileExists(name)
	if err != nil {
		return err
	}
	if exists {
		log.Print("Found: ", name)
		return nil
	}

	for {
		select {
		case event := <-watcher.Events:
			if event.Op == fsnotify.Create && event.Name == FilePath(name) {
				return nil
			} else if event.Op == fsnotify.Create && strings.HasPrefix(FilePath(name), event.Name) {
				//fsnotify isn't recursive so we need to recurse for it.
				return WaitFile(name, cancel)
			}
		case err := <-watcher.Errors:
			return err
		case <-cancel:
			return ErrCancelled
		}
	}
}
Example #15
0
// Run runs a command in the container, it assumes that `branch` has already
// been created.
// Notice that any failure in this function leads to the branch having
// uncommitted dirty changes. This state needs to be cleaned up before the
// pipeline is rerun. The reason we don't do it here is that even if we try our
// best the process crashing at the wrong time could still leave it in an
// inconsistent state.
func (p *pipeline) run(cmd []string) error {
	// this function always increments counter
	defer func() { p.counter++ }()
	// Check if the commit already exists
	exists, err := btrfs.FileExists(path.Join(p.outRepo, p.runCommit()))
	if err != nil {
		return err
	}
	// if the commit exists there's no work to be done
	if exists {
		return nil
	}
	// Set the command
	p.config.Config.Cmd = []string{"sh"}
	//p.config.Config.Volumes["/out"] = emptyStruct()
	// Map the out directory in as a bind
	hostPath := btrfs.HostPath(path.Join(p.outRepo, p.branch))
	bind := fmt.Sprintf("%s:/out", hostPath)
	p.config.HostConfig.Binds = append(p.config.HostConfig.Binds, bind)
	log.Print(p.config.HostConfig.Binds)
	// Make sure this bind is only visible for the duration of run
	defer func() { p.config.HostConfig.Binds = p.config.HostConfig.Binds[:len(p.config.HostConfig.Binds)-1] }()
	// Start the container
	p.container, err = startContainer(p.config)
	if err != nil {
		return err
	}
	if err := pipeToStdin(p.container, strings.NewReader(strings.Join(cmd, " ")+"\n")); err != nil {
		return err
	}
	// Create a place to put the logs
	f, err := btrfs.CreateAll(path.Join(p.outRepo, p.branch, ".log"))
	if err != nil {
		return err
	}
	defer f.Close()
	// Copy the logs from the container in to the file.
	if err = containerLogs(p.container, f); err != nil {
		return err
	}
	// Wait for the command to finish:
	exit, err := waitContainer(p.container)
	if err != nil {
		return err
	}
	if exit != 0 {
		// The command errored
		return fmt.Errorf("Command:\n\t%s\nhad exit code: %d.\n",
			strings.Join(cmd, " "), exit)
	}
	return btrfs.Commit(p.outRepo, p.runCommit(), p.branch)
}
Example #16
0
func do() error {
	if err := btrfs.CheckVersion(); err != nil {
		return err
	}

	shardNum := flag.Int("shard", -1, "Optional. The shard to service.")
	modulos := flag.Int("modulos", 4, "The total number of shards.")
	address := flag.String("address", "", "Optional. The address to advertise for this node.")
	flag.Parse()
	addrs, err := net.InterfaceAddrs()
	if err != nil {
		return err
	}
	if *address == "" {
		// No address, we'll try to use our ip addr instead
		for _, addr := range addrs {
			if ipnet, ok := addr.(*net.IPNet); ok && !ipnet.IP.IsLoopback() {
				if ipnet.IP.To4() != nil {
					*address = ipnet.IP.String()
					break
				}
			}
		}
	}
	if *address == "" {
		return fmt.Errorf("pfs: Couldn't find machine ip.")
	}

	shard := storage.NewShard(
		"http://"+*address,
		fmt.Sprintf("data-%d-%d", *shardNum, *modulos),
		fmt.Sprintf("pipe-%d-%d", *shardNum, *modulos),
		uint64(*shardNum),
		uint64(*modulos),
		etcache.NewCache(),
	)
	if *shardNum == -1 {
		go shard.FindRole()
	} else {
		if err := shard.EnsureRepos(); err != nil {
			return err
		}
		go shard.FillRole()
	}
	log.Print("Listening on port 80...")
	return http.ListenAndServe(":80", storage.NewShardHTTPHandler(shard))
}
Example #17
0
func (s *shard) masters() ([]string, error) {
	result := make([]string, s.modulos)
	client := etcd.NewClient([]string{"http://172.17.42.1:4001", "http://10.1.42.1:4001"})
	defer client.Close()

	response, err := client.Get("/pfs/master", false, true)
	if err == nil {
		log.Print("len(response.Node.Nodes): ", len(response.Node.Nodes))
		for _, node := range response.Node.Nodes {
			// node.Key looks like " /pachyderm.io/pfs/0-5"
			//                      0 1            2   3
			key := strings.Split(node.Key, "/")
			shard, _, err := route.ParseShard(key[3])
			if err != nil {
				return nil, err
			}
			result[shard] = node.Value
		}

	}
	return result, nil
}
Example #18
0
func TestWaitAnyFile(t *testing.T) {
	t.Parallel()
	src := "repo_TestWaitAnyFile"
	require.NoError(t, Init(src))
	complete := make(chan struct{})
	go func() {
		file, err := WaitAnyFile(src+"/file1", src+"/file2")
		log.Print("WaitedOn: ", file)
		require.NoError(t, err)
		if file != src+"/file2" {
			t.Fatal("Got the wrong file.")
		}
		complete <- struct{}{}
	}()
	WriteFile(src+"/file2", nil)
	select {
	case <-complete:
		// we passed the test
		return
	case <-time.After(time.Second * 10):
		t.Fatal("Timeout waiting for file.")
	}
}
Example #19
0
func (r *Runner) Cancel() error {
	log.Print("Cancel: ", r)
	// A chanel for the errors, notice that it's capacity is the same as the
	// number of pipelines. The below code should make sure that each pipeline only
	// sends 1 error otherwise deadlock may occur.
	errors := make(chan error, len(r.pipelines))

	// Make sure we don't race with Run
	r.lock.Lock()
	// Indicate that we're cancelling the pipelines
	r.cancelled = true
	// A waitgroup for the goros that cancel the containers
	var wg sync.WaitGroup
	// We'll have one goro per pipelines
	wg.Add(len(r.pipelines))
	for _, p := range r.pipelines {
		go func(p *pipeline) {
			defer wg.Done()
			err := p.cancel()
			if err != nil {
				errors <- err
			}
		}(p)
	}
	// Wait for the cancellations to finish.
	wg.Wait()
	r.lock.Unlock()
	close(errors)
	for err := range errors {
		return err
	}
	// At the end we wait for the pipelines to actually finish, this means that
	// once Cancel is done you can safely fire off a new batch of pipelines.
	r.wait.Wait()
	return nil
}
Example #20
0
// runPachFile parses r as a PachFile and executes. runPachFile is GUARANTEED
// to call either `finish` or `fail`
func (p *pipeline) runPachFile(r io.Reader) (retErr error) {
	defer func() {
		// This function GUARANTEES that if `p.finish()` didn't happen.
		// `p.fail()` does.
		finished, err := p.finished()
		if err != nil {
			if retErr == nil {
				retErr = err
			}
			return
		}
		if finished {
			return
		}
		if err := p.fail(); err != nil {
			if retErr == nil {
				retErr = err
			}
			return
		}
	}()
	defer func() {
		if err := p.clean(); err != nil {
			if retErr == nil {
				retErr = err
			}
		}
	}()
	lines := bufio.NewScanner(r)

	if err := p.start(); err != nil {
		return err
	}
	var tokens []string
	for lines.Scan() {
		if p.cancelled {
			return ErrCancelled
		}
		if len(tokens) > 0 && tokens[len(tokens)-1] == "\\" {
			// We have tokens from last loop, remove the \ token which designates the line wrap
			tokens = tokens[:len(tokens)-1]
		} else {
			// No line wrap, clear the tokens they were already execuated
			tokens = []string{}
		}
		tokens = append(tokens, strings.Fields(lines.Text())...)
		// These conditions are, empty line, comment line and wrapped line.
		// All 3 cause us to continue, the first 2 because we're skipping them.
		// The last because we need more input.
		if len(tokens) == 0 || tokens[0][0] == '#' || tokens[len(tokens)-1] == "\\" {
			// Comment or empty line, skip
			continue
		}

		var err error
		switch strings.ToLower(tokens[0]) {
		case "input":
			if len(tokens) != 2 {
				return ErrArgCount
			}
			err = p.input(tokens[1])
		case "output":
			if len(tokens) != 2 {
				return ErrArgCount
			}
			err = p.output(tokens[1])
		case "image":
			if len(tokens) != 2 {
				return ErrArgCount
			}
			err = p.image(tokens[1])
		case "run":
			if len(tokens) < 2 {
				return ErrArgCount
			}
			err = p.run(tokens[1:])
		case "shuffle":
			if len(tokens) != 2 {
				return ErrArgCount
			}
			err = p.shuffle(tokens[1])
		default:
			return ErrUnkownKeyword
		}
		if err != nil {
			log.Print(err)
			return err
		}
	}
	if err := p.finish(); err != nil {
		return err
	}

	if err := p.pushToOutputs(); err != nil {
		return err
	}

	return nil
}
Example #21
0
// inject injects data from an external source into the output directory
func (p *pipeline) inject(name string, public bool) error {
	switch {
	case strings.HasPrefix(name, "s3://"):
		bucket, err := s3utils.GetBucket(name)
		if err != nil {
			return err
		}
		client := s3utils.NewClient(public)
		var wg sync.WaitGroup
		s3utils.ForEachFile(name, public, "", func(file string, modtime time.Time) error {
			// Grab the path, it's handy later
			_path, err := s3utils.GetPath(name)
			if err != nil {
				return err
			}
			if err != nil {
				return err
			}
			// Check if the file belongs on shit shard
			match, err := route.Match(file, p.shard)
			if err != nil {
				return err
			}
			if !match {
				return nil
			}
			// Check if the file has changed
			changed, err := btrfs.Changed(path.Join(p.outRepo, p.branch,
				strings.TrimPrefix(file, _path)), modtime)
			if err != nil {
				return err
			}
			if !changed {
				return nil
			}
			// TODO match the on disk timestamps to s3's timestamps and make
			// sure we only pull data that has changed
			wg.Add(1)
			go func() {
				defer wg.Done()
				response, err := client.GetObject(&s3.GetObjectInput{
					Bucket: &bucket,
					Key:    &file,
				})
				if err != nil {
					return
				}

				src := response.Body
				dst, err := btrfs.CreateAll(path.Join(p.outRepo, p.branch, strings.TrimPrefix(file, _path)))
				if err != nil {
					return
				}
				defer dst.Close()
				_, err = io.Copy(dst, src)
				if err != nil {
					return
				}
				err = btrfs.Chtimes(path.Join(p.outRepo, p.branch, strings.TrimPrefix(file, _path)), modtime, modtime)
				if err != nil {
					return
				}
			}()
			return nil
		})
		wg.Wait()
	default:
		log.Print("Unknown protocol: ", name)
		return ErrUnknownProtocol
	}
	return nil
}