func Show(repo string, commit string, out string) error { if err := subvolumeCreate(path.Join(repo, out)); err != nil { return err } files, err := NewIn(repo, commit) if err != nil { return err } log.Print("Files: ", files) var wg sync.WaitGroup for _, file := range files { wg.Add(1) go func(file string) { defer wg.Done() in, err := Open(path.Join(repo, commit, file)) if err != nil { log.Print(err) return } if _, err := CreateFromReader(path.Join(repo, out, file), in); err != nil { log.Print(err) return } }(file) } wg.Wait() return nil }
func (s *shard) CommitCreate(name string, branch string) (Commit, error) { if err := btrfs.Commit(s.dataRepo, name, branch); err != nil { return Commit{}, err } // We lock the guard so that we can remove the oldRunner from the map // and add the newRunner in. s.guard.Lock() oldRunner, ok := s.runners[branch] newRunner := pipeline.NewRunner("pipeline", s.dataRepo, s.pipelinePrefix, name, branch, s.shardStr, s.cache) s.runners[branch] = newRunner s.guard.Unlock() go func() { // cancel oldRunner if it exists if ok { err := oldRunner.Cancel() if err != nil { log.Print(err) } } err := newRunner.Run() if err != nil { log.Print(err) } }() go s.syncToPeers() return s.CommitGet(name) }
// FillRole attempts to find a role in the cluster. Once on is found it // prepares the local storage for the role and announces the shard to the rest // of the cluster. This function will loop until `cancel` is closed. func (s *shard) FillRole() { shard := fmt.Sprintf("%d-%d", s.shard, s.modulos) masterKey := path.Join("/pfs/master", shard) replicaDir := path.Join("/pfs/replica", shard) amMaster := false //true if we're master replicaKey := "" for { client := etcd.NewClient([]string{"http://172.17.42.1:4001", "http://10.1.42.1:4001"}) defer client.Close() // First we attempt to become the master for this shard if !amMaster { // We're not master, so we attempt to claim it, this will error if // another shard is already master backfillingKey := "[backfilling]" + s.url if _, err := client.Create(masterKey, backfillingKey, 5*60); err == nil { // no error means we succesfully claimed master _ = s.syncFromPeers() // Attempt to finalize ourselves as master _, _ = client.CompareAndSwap(masterKey, s.url, 60, backfillingKey, 0) if err == nil { // no error means that we succusfully announced ourselves as master // Sync the new data we pulled to peers go s.syncToPeers() //Record that we're master amMaster = true } } else { log.Print(err) } } else { // We're already master, renew our lease _, err := client.CompareAndSwap(masterKey, s.url, 60, s.url, 0) if err != nil { // error means we failed to reclaim master log.Print(err) amMaster = false } } // We didn't claim master, so we add ourselves as replica instead. if replicaKey == "" { if resp, err := client.CreateInOrder(replicaDir, s.url, 60); err == nil { replicaKey = resp.Node.Key // Get ourselves up to date go s.syncFromPeers() } } else { _, err := client.CompareAndSwap(replicaKey, s.url, 60, s.url, 0) if err != nil { replicaKey = "" } } select { case <-time.After(time.Second * 45): continue } } }
func TestShuffle(t *testing.T) { t.Parallel() cache := etcache.NewTestCache() // Setup 2 shards shard1 := NewShard("", "TestShuffleData-0-2", "TestShufflePipelines-0-2", 0, 2, cache) require.NoError(t, shard1.EnsureRepos()) s1 := httptest.NewServer(NewShardHTTPHandler(shard1)) defer s1.Close() shard2 := NewShard("", "TestShuffleData-1-2", "TestShufflePipelines-1-2", 1, 2, cache) require.NoError(t, shard2.EnsureRepos()) s2 := httptest.NewServer(NewShardHTTPHandler(shard2)) defer s2.Close() files := []string{"foo", "bar", "fizz", "buzz"} for _, file := range files { checkWriteFile(t, s1.URL, path.Join("data", file), "master", file) checkWriteFile(t, s2.URL, path.Join("data", file), "master", file) } // Spoof the shards in etcache cache.SpoofMany("/pfs/master", []string{s1.URL, s2.URL}, false) pipeline := ` image ubuntu input data run cp -r /in/data /out shuffle data ` res, err := http.Post(s1.URL+"/pipeline/shuffle", "application/text", strings.NewReader(pipeline)) require.NoError(t, err) res.Body.Close() res, err = http.Post(s2.URL+"/pipeline/shuffle", "application/text", strings.NewReader(pipeline)) require.NoError(t, err) res.Body.Close() res, err = http.Post(s1.URL+"/commit?commit=commit1", "", nil) require.NoError(t, err) res, err = http.Post(s2.URL+"/commit?commit=commit1", "", nil) require.NoError(t, err) for _, file := range files { match, err := route.Match(path.Join("data", file), "0-2") require.NoError(t, err) if match { log.Print("shard: s1 file: ", file) checkFile(t, s1.URL+"/pipeline/shuffle", path.Join("data", file), "commit1", file+file) } else { log.Print("shard: s2 file: ", file) checkFile(t, s2.URL+"/pipeline/shuffle", path.Join("data", file), "commit1", file+file) } } }
// FindNew returns an array of filenames that were created or modified between `from` and `to` func FindNew(repo, from, to string) ([]string, error) { var files []string t, err := transid(repo, from) if err != nil { return files, err } c := exec.Command("btrfs", "subvolume", "find-new", FilePath(path.Join(repo, to)), t) err = util.CallCont(c, func(r io.Reader) error { scanner := bufio.NewScanner(r) for scanner.Scan() { log.Print(scanner.Text()) // scanner.Text() looks like this: // inode 6683 file offset 0 len 107 disk start 0 offset 0 gen 909 flags INLINE jobs/rPqZxsaspy // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 tokens := strings.Split(scanner.Text(), " ") // Make sure the line is parseable as a file and the path isn't hidden. if len(tokens) == 17 { if !strings.HasPrefix(tokens[16], ".") { // check if it's a hidden file files = append(files, tokens[16]) } } else if len(tokens) == 4 { continue //skip transid messages } else { return fmt.Errorf("Failed to parse find-new output.") } } return scanner.Err() }) return files, err }
func callCont(c *exec.Cmd, cont func(io.Reader) error) error { _, callerFile, callerLine, _ := runtime.Caller(1) log.Printf("%15s:%.3d -> %s", path.Base(callerFile), callerLine, strings.Join(c.Args, " ")) var reader io.Reader var err error if cont != nil { reader, err = c.StdoutPipe() if err != nil { return err } } stderr, err := c.StderrPipe() if err != nil { return err } if err = c.Start(); err != nil { return err } if cont != nil { if err = cont(reader); err != nil { return err } } buffer := bytes.NewBuffer(nil) buffer.ReadFrom(stderr) if buffer.Len() != 0 { log.Print("Command had output on stderr.\n Cmd: ", strings.Join(c.Args, " "), "\nstderr: ", buffer) } return c.Wait() }
func main() { if err := do(); err != nil { log.Print(err) os.Exit(1) } os.Exit(0) }
func (s *shard) FindRole() { client := etcd.NewClient([]string{"http://172.17.42.1:4001", "http://10.1.42.1:4001"}) defer client.Close() gotRole := false var err error for ; true; <-time.After(time.Second * 45) { // renew our role if we have one if gotRole { _, err := client.CompareAndSwap(s.key(), s.url, 60, s.url, 0) if err != nil { log.Print(err) } continue } // figure out if we should take on a new role s.shard, err = s.freeRole() if err != nil { continue } _, err = client.Create(s.key(), s.url, 60) if err != nil { continue } s.dataRepo = fmt.Sprintf("data-%d-%d", s.shard, s.modulos) s.pipelinePrefix = fmt.Sprintf("pipe-%d-%d", s.shard, s.modulos) err := s.EnsureRepos() if err != nil { continue } gotRole = true } }
// WaitAnyFile returns as soon as ANY of the files exists. // It returns an error if waiting for ANY of the files errors. func WaitAnyFile(files ...string) (string, error) { // Channel for files that appear done := make(chan string, len(files)) // Channel for errors that occur errors := make(chan error, len(files)) cancellers := make([]chan struct{}, len(files)) // Make sure that everyone gets cancelled after this function exits. defer func() { for _, c := range cancellers { c <- struct{}{} } }() for i, _ := range files { file := files[i] cancellers[i] = make(chan struct{}, 1) go func(i int) { err := WaitFile(file, cancellers[i]) if err != nil { // Never blocks due to size of channel's buffer. errors <- err } // Never blocks due to size of channel's buffer. done <- file }(i) } select { case file := <-done: log.Print("Done: ", file) return file, nil case err := <-errors: return "", err } }
func do() error { log.Print("Starting up...") r, err := router.RouterFromArgs() if err != nil { return err } return r.RunServer() }
func (p *pipeline) clean() error { log.Print("p.createdCommits: ", p.createdCommits) for _, commit := range p.createdCommits { if err := btrfs.DeleteCommit(commit); err != nil { return err } } return nil }
// Image sets the image that is being used for computations. func (p *pipeline) image(image string) error { p.config.Config.Image = image // TODO(pedge): ensure images are on machine err := pullImage(image) if err != nil { log.Print("assuming image is local and continuing") } return nil }
// pushToOutputs should be called after `finish` to push the outputted data to s3 func (p *pipeline) pushToOutputs() error { if p.externalOutput == "" { return nil } client := s3utils.NewClient(false) bucket, err := s3utils.GetBucket(p.externalOutput) if err != nil { return err } pathPrefix, err := s3utils.GetPath(p.externalOutput) if err != nil { return err } files, err := btrfs.NewIn(p.outRepo, p.commit) if err != nil { return err } var wg sync.WaitGroup for _, file := range files { wg.Add(1) go func(file string) { defer wg.Done() key := path.Join(pathPrefix, file) f, err := btrfs.Open(path.Join(p.outRepo, p.commit, file)) if err != nil { log.Print(err) return } acl := "private" defer f.Close() if _, err = client.PutObject(&s3.PutObjectInput{ Bucket: &bucket, Key: &key, Body: f, ACL: &acl, }); err != nil { log.Print(err) return } }(file) } wg.Wait() return nil }
// WaitFile waits for a file to exist in the filesystem // NOTE: You NEVER want to pass an unbuffered channel as cancel because // WaitFile provides no guarantee that it will ever read from cancel. Thus if // you passed an unbuffered channel as cancel sending to the channel may block // forever. func WaitFile(name string, cancel chan struct{}) error { log.Print("WaitFile(", name, ")") dir, err := largestExistingPath(name) if err != nil { return err } watcher, err := fsnotify.NewWatcher() if err != nil { return err } defer watcher.Close() if err := watcher.Add(FilePath(dir)); err != nil { return err } // Notice that we check to see if the file exists AFTER we create the watcher. // That means if we don't see the file with this check we're guaranteed to // get a notification for it. exists, err := FileExists(name) if err != nil { return err } if exists { log.Print("Found: ", name) return nil } for { select { case event := <-watcher.Events: if event.Op == fsnotify.Create && event.Name == FilePath(name) { return nil } else if event.Op == fsnotify.Create && strings.HasPrefix(FilePath(name), event.Name) { //fsnotify isn't recursive so we need to recurse for it. return WaitFile(name, cancel) } case err := <-watcher.Errors: return err case <-cancel: return ErrCancelled } } }
// Run runs a command in the container, it assumes that `branch` has already // been created. // Notice that any failure in this function leads to the branch having // uncommitted dirty changes. This state needs to be cleaned up before the // pipeline is rerun. The reason we don't do it here is that even if we try our // best the process crashing at the wrong time could still leave it in an // inconsistent state. func (p *pipeline) run(cmd []string) error { // this function always increments counter defer func() { p.counter++ }() // Check if the commit already exists exists, err := btrfs.FileExists(path.Join(p.outRepo, p.runCommit())) if err != nil { return err } // if the commit exists there's no work to be done if exists { return nil } // Set the command p.config.Config.Cmd = []string{"sh"} //p.config.Config.Volumes["/out"] = emptyStruct() // Map the out directory in as a bind hostPath := btrfs.HostPath(path.Join(p.outRepo, p.branch)) bind := fmt.Sprintf("%s:/out", hostPath) p.config.HostConfig.Binds = append(p.config.HostConfig.Binds, bind) log.Print(p.config.HostConfig.Binds) // Make sure this bind is only visible for the duration of run defer func() { p.config.HostConfig.Binds = p.config.HostConfig.Binds[:len(p.config.HostConfig.Binds)-1] }() // Start the container p.container, err = startContainer(p.config) if err != nil { return err } if err := pipeToStdin(p.container, strings.NewReader(strings.Join(cmd, " ")+"\n")); err != nil { return err } // Create a place to put the logs f, err := btrfs.CreateAll(path.Join(p.outRepo, p.branch, ".log")) if err != nil { return err } defer f.Close() // Copy the logs from the container in to the file. if err = containerLogs(p.container, f); err != nil { return err } // Wait for the command to finish: exit, err := waitContainer(p.container) if err != nil { return err } if exit != 0 { // The command errored return fmt.Errorf("Command:\n\t%s\nhad exit code: %d.\n", strings.Join(cmd, " "), exit) } return btrfs.Commit(p.outRepo, p.runCommit(), p.branch) }
func do() error { if err := btrfs.CheckVersion(); err != nil { return err } shardNum := flag.Int("shard", -1, "Optional. The shard to service.") modulos := flag.Int("modulos", 4, "The total number of shards.") address := flag.String("address", "", "Optional. The address to advertise for this node.") flag.Parse() addrs, err := net.InterfaceAddrs() if err != nil { return err } if *address == "" { // No address, we'll try to use our ip addr instead for _, addr := range addrs { if ipnet, ok := addr.(*net.IPNet); ok && !ipnet.IP.IsLoopback() { if ipnet.IP.To4() != nil { *address = ipnet.IP.String() break } } } } if *address == "" { return fmt.Errorf("pfs: Couldn't find machine ip.") } shard := storage.NewShard( "http://"+*address, fmt.Sprintf("data-%d-%d", *shardNum, *modulos), fmt.Sprintf("pipe-%d-%d", *shardNum, *modulos), uint64(*shardNum), uint64(*modulos), etcache.NewCache(), ) if *shardNum == -1 { go shard.FindRole() } else { if err := shard.EnsureRepos(); err != nil { return err } go shard.FillRole() } log.Print("Listening on port 80...") return http.ListenAndServe(":80", storage.NewShardHTTPHandler(shard)) }
func (s *shard) masters() ([]string, error) { result := make([]string, s.modulos) client := etcd.NewClient([]string{"http://172.17.42.1:4001", "http://10.1.42.1:4001"}) defer client.Close() response, err := client.Get("/pfs/master", false, true) if err == nil { log.Print("len(response.Node.Nodes): ", len(response.Node.Nodes)) for _, node := range response.Node.Nodes { // node.Key looks like " /pachyderm.io/pfs/0-5" // 0 1 2 3 key := strings.Split(node.Key, "/") shard, _, err := route.ParseShard(key[3]) if err != nil { return nil, err } result[shard] = node.Value } } return result, nil }
func TestWaitAnyFile(t *testing.T) { t.Parallel() src := "repo_TestWaitAnyFile" require.NoError(t, Init(src)) complete := make(chan struct{}) go func() { file, err := WaitAnyFile(src+"/file1", src+"/file2") log.Print("WaitedOn: ", file) require.NoError(t, err) if file != src+"/file2" { t.Fatal("Got the wrong file.") } complete <- struct{}{} }() WriteFile(src+"/file2", nil) select { case <-complete: // we passed the test return case <-time.After(time.Second * 10): t.Fatal("Timeout waiting for file.") } }
func (r *Runner) Cancel() error { log.Print("Cancel: ", r) // A chanel for the errors, notice that it's capacity is the same as the // number of pipelines. The below code should make sure that each pipeline only // sends 1 error otherwise deadlock may occur. errors := make(chan error, len(r.pipelines)) // Make sure we don't race with Run r.lock.Lock() // Indicate that we're cancelling the pipelines r.cancelled = true // A waitgroup for the goros that cancel the containers var wg sync.WaitGroup // We'll have one goro per pipelines wg.Add(len(r.pipelines)) for _, p := range r.pipelines { go func(p *pipeline) { defer wg.Done() err := p.cancel() if err != nil { errors <- err } }(p) } // Wait for the cancellations to finish. wg.Wait() r.lock.Unlock() close(errors) for err := range errors { return err } // At the end we wait for the pipelines to actually finish, this means that // once Cancel is done you can safely fire off a new batch of pipelines. r.wait.Wait() return nil }
// runPachFile parses r as a PachFile and executes. runPachFile is GUARANTEED // to call either `finish` or `fail` func (p *pipeline) runPachFile(r io.Reader) (retErr error) { defer func() { // This function GUARANTEES that if `p.finish()` didn't happen. // `p.fail()` does. finished, err := p.finished() if err != nil { if retErr == nil { retErr = err } return } if finished { return } if err := p.fail(); err != nil { if retErr == nil { retErr = err } return } }() defer func() { if err := p.clean(); err != nil { if retErr == nil { retErr = err } } }() lines := bufio.NewScanner(r) if err := p.start(); err != nil { return err } var tokens []string for lines.Scan() { if p.cancelled { return ErrCancelled } if len(tokens) > 0 && tokens[len(tokens)-1] == "\\" { // We have tokens from last loop, remove the \ token which designates the line wrap tokens = tokens[:len(tokens)-1] } else { // No line wrap, clear the tokens they were already execuated tokens = []string{} } tokens = append(tokens, strings.Fields(lines.Text())...) // These conditions are, empty line, comment line and wrapped line. // All 3 cause us to continue, the first 2 because we're skipping them. // The last because we need more input. if len(tokens) == 0 || tokens[0][0] == '#' || tokens[len(tokens)-1] == "\\" { // Comment or empty line, skip continue } var err error switch strings.ToLower(tokens[0]) { case "input": if len(tokens) != 2 { return ErrArgCount } err = p.input(tokens[1]) case "output": if len(tokens) != 2 { return ErrArgCount } err = p.output(tokens[1]) case "image": if len(tokens) != 2 { return ErrArgCount } err = p.image(tokens[1]) case "run": if len(tokens) < 2 { return ErrArgCount } err = p.run(tokens[1:]) case "shuffle": if len(tokens) != 2 { return ErrArgCount } err = p.shuffle(tokens[1]) default: return ErrUnkownKeyword } if err != nil { log.Print(err) return err } } if err := p.finish(); err != nil { return err } if err := p.pushToOutputs(); err != nil { return err } return nil }
// inject injects data from an external source into the output directory func (p *pipeline) inject(name string, public bool) error { switch { case strings.HasPrefix(name, "s3://"): bucket, err := s3utils.GetBucket(name) if err != nil { return err } client := s3utils.NewClient(public) var wg sync.WaitGroup s3utils.ForEachFile(name, public, "", func(file string, modtime time.Time) error { // Grab the path, it's handy later _path, err := s3utils.GetPath(name) if err != nil { return err } if err != nil { return err } // Check if the file belongs on shit shard match, err := route.Match(file, p.shard) if err != nil { return err } if !match { return nil } // Check if the file has changed changed, err := btrfs.Changed(path.Join(p.outRepo, p.branch, strings.TrimPrefix(file, _path)), modtime) if err != nil { return err } if !changed { return nil } // TODO match the on disk timestamps to s3's timestamps and make // sure we only pull data that has changed wg.Add(1) go func() { defer wg.Done() response, err := client.GetObject(&s3.GetObjectInput{ Bucket: &bucket, Key: &file, }) if err != nil { return } src := response.Body dst, err := btrfs.CreateAll(path.Join(p.outRepo, p.branch, strings.TrimPrefix(file, _path))) if err != nil { return } defer dst.Close() _, err = io.Copy(dst, src) if err != nil { return } err = btrfs.Chtimes(path.Join(p.outRepo, p.branch, strings.TrimPrefix(file, _path)), modtime, modtime) if err != nil { return } }() return nil }) wg.Wait() default: log.Print("Unknown protocol: ", name) return ErrUnknownProtocol } return nil }