Esempio n. 1
0
func rawCat(w io.Writer, name string) error {
	f, err := btrfs.Open(name)
	if err != nil {
		return err
	}
	defer f.Close()

	if _, err := io.Copy(w, f); err != nil {
		return err
	}
	return nil
}
Esempio n. 2
0
// pushToOutputs should be called after `finish` to push the outputted data to s3
func (p *pipeline) pushToOutputs() error {
	if p.externalOutput == "" {
		return nil
	}
	client := s3utils.NewClient(false)
	bucket, err := s3utils.GetBucket(p.externalOutput)
	if err != nil {
		return err
	}
	pathPrefix, err := s3utils.GetPath(p.externalOutput)
	if err != nil {
		return err
	}
	files, err := btrfs.NewIn(p.outRepo, p.commit)
	if err != nil {
		return err
	}
	var wg sync.WaitGroup
	for _, file := range files {
		wg.Add(1)
		go func(file string) {
			defer wg.Done()
			key := path.Join(pathPrefix, file)
			f, err := btrfs.Open(path.Join(p.outRepo, p.commit, file))
			if err != nil {
				log.Print(err)
				return
			}
			acl := "private"
			defer f.Close()
			if _, err = client.PutObject(&s3.PutObjectInput{
				Bucket: &bucket,
				Key:    &key,
				Body:   f,
				ACL:    &acl,
			}); err != nil {
				log.Print(err)
				return
			}
		}(file)
	}
	wg.Wait()
	return nil
}
Esempio n. 3
0
func (s *shard) fileGet(dir string, name string) (File, error) {
	path := path.Join(dir, name)
	info, err := btrfs.Stat(path)
	if err != nil {
		return File{}, err
	}
	if info.IsDir() {
		return File{}, ErrIsDirectory
	}
	file, err := btrfs.Open(path)
	if err != nil {
		return File{}, err
	}
	return File{
		name,
		info.ModTime(),
		file,
	}, nil
}
Esempio n. 4
0
// Inputs returns all of the inputs for the pipelines.
func (r *Runner) Inputs() ([]string, error) {
	pipelines, err := btrfs.ReadDir(path.Join(r.inRepo, r.commit, r.pipelineDir))
	if err != nil {
		// Notice we don't return this error but instead no-op. It's fine to not
		// have a pipeline dir.
		return nil, nil
	}
	var res []string
	for _, pInfo := range pipelines {
		f, err := btrfs.Open(path.Join(r.inRepo, r.commit, r.pipelineDir, pInfo.Name()))
		if err != nil {
			return nil, err
		}
		defer f.Close()
		lines := bufio.NewScanner(f)
		// TODO we're copy-pasting code from runPachFile. Let's abstract that.
		var tokens []string
		for lines.Scan() {
			if len(tokens) > 0 && tokens[len(tokens)-1] == "\\" {
				// We have tokens from last loop, remove the \ token which designates the line wrap
				tokens = tokens[:len(tokens)-1]
			} else {
				// No line wrap, clear the tokens they were already considered
				tokens = []string{}
			}
			tokens = append(tokens, strings.Fields(lines.Text())...)
			if len(tokens) > 0 && tokens[0] == "input" {
				if len(tokens) < 2 {
					return nil, ErrArgCount
				}
				res = append(res, tokens[1])
			}
		}
	}
	return res, nil
}
Esempio n. 5
0
// Run runs all of the pipelines it finds in pipelineDir. Returns the
// first error it encounters.
func (r *Runner) Run() error {
	if err := btrfs.MkdirAll(r.outPrefix); err != nil {
		return err
	}
	if err := r.startInputPipelines(); err != nil {
		return err
	}
	pipelines, err := btrfs.ReadDir(path.Join(r.inRepo, r.commit, r.pipelineDir))
	if err != nil {
		// Notice we don't return this error but instead no-op. It's fine to not
		// have a pipeline dir.
		return nil
	}
	// A chanel for the errors, notice that it's capacity is the same as the
	// number of pipelines. The below code should make sure that each pipeline only
	// sends 1 error otherwise deadlock may occur.
	errors := make(chan error, len(pipelines))
	// Make sure we don't race with cancel this is held while we add pipelines.
	r.lock.Lock()
	if r.cancelled {
		// we were cancelled before we even started
		r.lock.Unlock()
		return ErrCancelled
	}
	for _, pInfo := range pipelines {
		if err := r.makeOutRepo(pInfo.Name()); err != nil {
			return err
		}
		p := newPipeline(pInfo.Name(), r.inRepo, path.Join(r.outPrefix, pInfo.Name()), r.commit, r.branch, r.shard, r.outPrefix, r.cache)
		r.pipelines = append(r.pipelines, p)
	}
	// unlocker lets us defer unlocking and explicitly unlock
	var unlocker sync.Once
	defer unlocker.Do(r.lock.Unlock)
	for _, p := range r.pipelines {
		p := p
		r.wait.Add(1)
		go func() {
			defer r.wait.Done()
			f, err := btrfs.Open(path.Join(r.inRepo, r.commit, r.pipelineDir, p.name))
			if err != nil {
				errors <- err
				return
			}
			defer f.Close()
			err = p.runPachFile(f)
			if err != nil {
				errors <- err
				return
			}
		}()
	}
	// We're done adding pipelines so unlock
	unlocker.Do(r.lock.Unlock)
	// Wait for the pipelines to finish
	r.wait.Wait()
	close(errors)
	if r.cancelled {
		// Pipelines finished because we were cancelled
		return ErrCancelled
	}
	for err := range errors {
		return err
	}
	return nil
}