func rawCat(w io.Writer, name string) error { f, err := btrfs.Open(name) if err != nil { return err } defer f.Close() if _, err := io.Copy(w, f); err != nil { return err } return nil }
// pushToOutputs should be called after `finish` to push the outputted data to s3 func (p *pipeline) pushToOutputs() error { if p.externalOutput == "" { return nil } client := s3utils.NewClient(false) bucket, err := s3utils.GetBucket(p.externalOutput) if err != nil { return err } pathPrefix, err := s3utils.GetPath(p.externalOutput) if err != nil { return err } files, err := btrfs.NewIn(p.outRepo, p.commit) if err != nil { return err } var wg sync.WaitGroup for _, file := range files { wg.Add(1) go func(file string) { defer wg.Done() key := path.Join(pathPrefix, file) f, err := btrfs.Open(path.Join(p.outRepo, p.commit, file)) if err != nil { log.Print(err) return } acl := "private" defer f.Close() if _, err = client.PutObject(&s3.PutObjectInput{ Bucket: &bucket, Key: &key, Body: f, ACL: &acl, }); err != nil { log.Print(err) return } }(file) } wg.Wait() return nil }
func (s *shard) fileGet(dir string, name string) (File, error) { path := path.Join(dir, name) info, err := btrfs.Stat(path) if err != nil { return File{}, err } if info.IsDir() { return File{}, ErrIsDirectory } file, err := btrfs.Open(path) if err != nil { return File{}, err } return File{ name, info.ModTime(), file, }, nil }
// Inputs returns all of the inputs for the pipelines. func (r *Runner) Inputs() ([]string, error) { pipelines, err := btrfs.ReadDir(path.Join(r.inRepo, r.commit, r.pipelineDir)) if err != nil { // Notice we don't return this error but instead no-op. It's fine to not // have a pipeline dir. return nil, nil } var res []string for _, pInfo := range pipelines { f, err := btrfs.Open(path.Join(r.inRepo, r.commit, r.pipelineDir, pInfo.Name())) if err != nil { return nil, err } defer f.Close() lines := bufio.NewScanner(f) // TODO we're copy-pasting code from runPachFile. Let's abstract that. var tokens []string for lines.Scan() { if len(tokens) > 0 && tokens[len(tokens)-1] == "\\" { // We have tokens from last loop, remove the \ token which designates the line wrap tokens = tokens[:len(tokens)-1] } else { // No line wrap, clear the tokens they were already considered tokens = []string{} } tokens = append(tokens, strings.Fields(lines.Text())...) if len(tokens) > 0 && tokens[0] == "input" { if len(tokens) < 2 { return nil, ErrArgCount } res = append(res, tokens[1]) } } } return res, nil }
// Run runs all of the pipelines it finds in pipelineDir. Returns the // first error it encounters. func (r *Runner) Run() error { if err := btrfs.MkdirAll(r.outPrefix); err != nil { return err } if err := r.startInputPipelines(); err != nil { return err } pipelines, err := btrfs.ReadDir(path.Join(r.inRepo, r.commit, r.pipelineDir)) if err != nil { // Notice we don't return this error but instead no-op. It's fine to not // have a pipeline dir. return nil } // A chanel for the errors, notice that it's capacity is the same as the // number of pipelines. The below code should make sure that each pipeline only // sends 1 error otherwise deadlock may occur. errors := make(chan error, len(pipelines)) // Make sure we don't race with cancel this is held while we add pipelines. r.lock.Lock() if r.cancelled { // we were cancelled before we even started r.lock.Unlock() return ErrCancelled } for _, pInfo := range pipelines { if err := r.makeOutRepo(pInfo.Name()); err != nil { return err } p := newPipeline(pInfo.Name(), r.inRepo, path.Join(r.outPrefix, pInfo.Name()), r.commit, r.branch, r.shard, r.outPrefix, r.cache) r.pipelines = append(r.pipelines, p) } // unlocker lets us defer unlocking and explicitly unlock var unlocker sync.Once defer unlocker.Do(r.lock.Unlock) for _, p := range r.pipelines { p := p r.wait.Add(1) go func() { defer r.wait.Done() f, err := btrfs.Open(path.Join(r.inRepo, r.commit, r.pipelineDir, p.name)) if err != nil { errors <- err return } defer f.Close() err = p.runPachFile(f) if err != nil { errors <- err return } }() } // We're done adding pipelines so unlock unlocker.Do(r.lock.Unlock) // Wait for the pipelines to finish r.wait.Wait() close(errors) if r.cancelled { // Pipelines finished because we were cancelled return ErrCancelled } for err := range errors { return err } return nil }