예제 #1
0
파일: pipeline.go 프로젝트: plar/pachyderm
// pushToOutputs should be called after `finish` to push the outputted data to s3
func (p *pipeline) pushToOutputs() error {
	if p.externalOutput == "" {
		return nil
	}
	client := s3utils.NewClient(false)
	bucket, err := s3utils.GetBucket(p.externalOutput)
	if err != nil {
		return err
	}
	pathPrefix, err := s3utils.GetPath(p.externalOutput)
	if err != nil {
		return err
	}
	files, err := btrfs.NewIn(p.outRepo, p.commit)
	if err != nil {
		return err
	}
	var wg sync.WaitGroup
	for _, file := range files {
		wg.Add(1)
		go func(file string) {
			defer wg.Done()
			key := path.Join(pathPrefix, file)
			f, err := btrfs.Open(path.Join(p.outRepo, p.commit, file))
			if err != nil {
				log.Print(err)
				return
			}
			acl := "private"
			defer f.Close()
			if _, err = client.PutObject(&s3.PutObjectInput{
				Bucket: &bucket,
				Key:    &key,
				Body:   f,
				ACL:    &acl,
			}); err != nil {
				log.Print(err)
				return
			}
		}(file)
	}
	wg.Wait()
	return nil
}
예제 #2
0
파일: pipeline.go 프로젝트: plar/pachyderm
// inject injects data from an external source into the output directory
func (p *pipeline) inject(name string, public bool) error {
	switch {
	case strings.HasPrefix(name, "s3://"):
		bucket, err := s3utils.GetBucket(name)
		if err != nil {
			return err
		}
		client := s3utils.NewClient(public)
		var wg sync.WaitGroup
		s3utils.ForEachFile(name, public, "", func(file string, modtime time.Time) error {
			// Grab the path, it's handy later
			_path, err := s3utils.GetPath(name)
			if err != nil {
				return err
			}
			if err != nil {
				return err
			}
			// Check if the file belongs on shit shard
			match, err := route.Match(file, p.shard)
			if err != nil {
				return err
			}
			if !match {
				return nil
			}
			// Check if the file has changed
			changed, err := btrfs.Changed(path.Join(p.outRepo, p.branch,
				strings.TrimPrefix(file, _path)), modtime)
			if err != nil {
				return err
			}
			if !changed {
				return nil
			}
			// TODO match the on disk timestamps to s3's timestamps and make
			// sure we only pull data that has changed
			wg.Add(1)
			go func() {
				defer wg.Done()
				response, err := client.GetObject(&s3.GetObjectInput{
					Bucket: &bucket,
					Key:    &file,
				})
				if err != nil {
					return
				}

				src := response.Body
				dst, err := btrfs.CreateAll(path.Join(p.outRepo, p.branch, strings.TrimPrefix(file, _path)))
				if err != nil {
					return
				}
				defer dst.Close()
				_, err = io.Copy(dst, src)
				if err != nil {
					return
				}
				err = btrfs.Chtimes(path.Join(p.outRepo, p.branch, strings.TrimPrefix(file, _path)), modtime, modtime)
				if err != nil {
					return
				}
			}()
			return nil
		})
		wg.Wait()
	default:
		log.Print("Unknown protocol: ", name)
		return ErrUnknownProtocol
	}
	return nil
}