Exemple #1
0
//SafeCreateS3Bucket creates an s3 bucket for storing files to an s3-compatible blobstore
func SafeCreateS3Bucket(domain, bucket, accessKey, secretKey string) (*S3Bucket, error) {
	s := &S3Bucket{
		Bucket:    bucket,
		Name:      "s3",
		Domain:    domain,
		AccessKey: accessKey,
		SecretKey: secretKey,
	}
	if s.Bucket == "" {
		return nil, errors.New("bucket name is undefined")
	}
	var k s3gof3r.Keys
	var err error

	if s.AccessKey == "" || s.SecretKey == "" {
		k, err = s3gof3r.EnvKeys() // get S3 keys from environment
		if err != nil {
			return nil, err
		}
	} else {
		k = s3gof3r.Keys{
			AccessKey: s.AccessKey,
			SecretKey: s.SecretKey,
		}
	}
	s3 := s3gof3r.New(s.Domain, k)
	s.bucket = s3.Bucket(s.Bucket)
	return s, nil
}
func main() {

	s3gof3r.SetLogger(os.Stdout, "", log.LstdFlags, false)

	// STARTEXAMPLE OMIT
	presentation, err := os.Open(file_name) // open presentation file
	if err != nil {
		log.Fatal(err)
	}

	k, err := s3gof3r.EnvKeys() // get S3 keys from environment
	if err != nil {
		log.Fatal(err)
	}
	// Open bucket to put file into
	s3 := s3gof3r.New("", k)
	b := s3.Bucket(bucket_name)

	// Open a PutWriter for upload
	w, err := b.PutWriter(presentation.Name(), nil, nil)
	if err != nil {
		log.Fatal(err)
	}
	defer w.Close()
	if _, err = io.Copy(w, presentation); err != nil { // Copy into S3
		log.Fatal(err)
	}
	log.Printf("%s uploaded to %s", file_name, bucket_name)
	// STOPEXAMPLE OMIT
}
Exemple #3
0
// getAWSKeys gets the AWS Keys from environment variables or the instance-based metadata on EC2
// Environment variables are attempted first, followed by the instance-based credentials.
func getAWSKeys() (keys s3gof3r.Keys, err error) {

	keys, err = s3gof3r.EnvKeys()
	if err == nil {
		return
	}
	keys, err = s3gof3r.InstanceKeys()
	if err == nil {
		return
	}
	err = errors.New("no AWS keys found")
	return
}
func newS3Output(conf map[string]string) (output, error) {
	bucketName := conf["bucket"]
	if bucketName == "" {
		return nil, errors.New("No bucket specified")
	}
	fileName := conf["filename"]
	if fileName == "" {
		return nil, errors.New("No file name specified")
	}

	keys, err := s3gof3r.EnvKeys()
	if err != nil {
		return nil, err
	}
	s3 := s3gof3r.New(conf["endpoint"], keys)
	bucket := s3.Bucket(bucketName)

	return bucket.PutWriter(fileName, nil, nil)
}
func NewS3ImageFactory(bucketName string) *ImageFactory {
	factory := new(ImageFactory)
	//log.Print(imageCollections)
	k, err := s3gof3r.EnvKeys() // get S3 keys from environment
	if err != nil {
		log.Fatal("Unable to init s3", err)
	}

	// Open bucket to put file into
	s3 := s3gof3r.New("", k)

	bucket := s3.Bucket(bucketName)
	if bucket == nil {
		log.Fatal("Unable to init s3", err)
	}
	factory.NewImage = func(r, o string, b bool) ImageFile {
		return NewS3Image(s3, bucket, r, o, b)
	}
	return factory
}
func TestCoreCompliance(t *testing.T) {
	if _, err := s3gof3r.EnvKeys(); err != nil {
		t.Skipf("skipping s3 output tests; no s3 credentials loaded (err: %s)", err)
	}

	// group all effects of this test run under one "dir" for human reader sanity and cleanup in extremis.
	testRunGuid := guid.New()

	Convey("Spec Compliance: S3 Transmat", t, testutil.WithTmpdir(func() {
		// scanning
		tests.CheckScanWithoutMutation(Kind, New)
		tests.CheckScanProducesConsistentHash(Kind, New)
		tests.CheckScanProducesDistinctHashes(Kind, New)
		tests.CheckScanEmptyIsCalm(Kind, New)
		tests.CheckScanWithFilters(Kind, New)
		// round-trip
		tests.CheckRoundTrip(Kind, New, "s3://repeatr-test/test-"+testRunGuid+"/rt/obj.tar", "literal path")
		tests.CheckRoundTrip(Kind, New, "s3+splay://repeatr-test/test-"+testRunGuid+"/rt-splay/heap/", "content addressible path")
	}))
}
Exemple #7
0
func UploadToS3(file multipart.File) (id string, err error) {
	k, err := s3gof3r.EnvKeys()
	if err != nil {
		log.Println(err.Error())
		return
	}

	// Open bucket to put file into
	s3 := s3gof3r.New(S3Domain, k)
	s3.Region()
	b := s3.Bucket(S3Bucket)

	//Generate unique name
	u4, err := uuid.NewV4()
	if err != nil {
		log.Println(err.Error())
		return
	}

	id = u4.String()

	// Open a PutWriter for upload
	w, err := b.PutWriter("challenge/"+id, nil, nil)
	if err != nil {
		log.Println(err.Error())
		return
	}

	if _, err = io.Copy(w, file); err != nil {
		log.Println(err.Error())
		return
	}
	if err = w.Close(); err != nil {
		log.Println(err.Error())
		return
	}

	return
}
Exemple #8
0
func GetStats(id string) (p Parser, err error) {
	k, err := s3gof3r.EnvKeys()
	if err != nil {
		return
	}
	s3 := s3gof3r.New(S3Domain, k)
	s3.Region()
	b := s3.Bucket(S3Bucket)

	r, _, err := b.GetReader("challenge/"+id, nil)
	if err != nil {
		return
	}
	defer r.Close()

	buf := bytes.NewBuffer(nil)
	io.Copy(buf, r)

	p = WordStats(string(buf.Bytes()))
	p.Name = id
	return

}
Exemple #9
0
/*
	Arenas produced by Dir Transmats may be relocated by simple `mv`.
*/
func (t *S3Transmat) Materialize(
	kind integrity.TransmatKind,
	dataHash integrity.CommitID,
	siloURIs []integrity.SiloURI,
	options ...integrity.MaterializerConfigurer,
) integrity.Arena {
	var arena dirArena
	try.Do(func() {
		// Basic validation and config
		config := integrity.EvaluateConfig(options...)
		if kind != Kind {
			panic(errors.ProgrammerError.New("This transmat supports definitions of type %q, not %q", Kind, kind))
		}

		// Parse URI; Find warehouses.
		if len(siloURIs) < 1 {
			panic(integrity.ConfigError.New("Materialization requires at least one data source!"))
			// Note that it's possible a caching layer will satisfy things even without data sources...
			//  but if that was going to happen, it already would have by now.
		}
		// Our policy is to take the first path that exists.
		//  This lets you specify a series of potential locations, and if one is unavailable we'll just take the next.
		var warehouseBucketName string
		var warehousePathPrefix string
		var warehouseCtntAddr bool
		for _, givenURI := range siloURIs {
			u, err := url.Parse(string(givenURI))
			if err != nil {
				panic(integrity.ConfigError.New("failed to parse URI: %s", err))
			}
			warehouseBucketName = u.Host
			warehousePathPrefix = u.Path
			switch u.Scheme {
			case "s3":
				warehouseCtntAddr = false
			case "s3+splay":
				warehouseCtntAddr = true
			default:
				panic(integrity.ConfigError.New("unrecognized scheme: %q", u.Scheme))
			}
			// TODO figure out how to check for data (or at least warehouse!) presence;
			//  currently just assuming the first one's golden, and blowing up later if it's not.
			break
		}
		if warehouseBucketName == "" {
			panic(integrity.WarehouseConnectionError.New("No warehouses were available!"))
		}

		// load keys from env
		// TODO someday URIs should grow smart enough to control this in a more general fashion -- but for now, host ENV is actually pretty feasible and plays easily with others.
		// TODO should not require keys!  we're just reading, after all; anon access is 100% valid.
		//   Buuuuut s3gof3r doesn't seem to understand empty keys; it still sends them as if to login, and AWS says 403.  So, foo.
		keys, err := s3gof3r.EnvKeys()
		if err != nil {
			panic(S3CredentialsMissingError.Wrap(err))
		}

		// initialize reader from s3!
		getPath := warehousePathPrefix
		if warehouseCtntAddr {
			getPath = path.Join(warehousePathPrefix, string(dataHash))
		}
		s3reader := makeS3reader(warehouseBucketName, getPath, keys)
		defer s3reader.Close()
		// prepare decompression as necessary
		reader, err := tartrans.Decompress(s3reader)
		if err != nil {
			panic(integrity.WarehouseConnectionError.New("could not start decompressing: %s", err))
		}
		tarReader := tar.NewReader(reader)

		// Create staging arena to produce data into.
		arena.path, err = ioutil.TempDir(t.workPath, "")
		if err != nil {
			panic(integrity.TransmatError.New("Unable to create arena: %s", err))
		}

		// walk input tar stream, placing data and accumulating hashes and metadata for integrity check
		bucket := &fshash.MemoryBucket{}
		tartrans.Extract(tarReader, arena.Path(), bucket, hasherFactory)

		// bucket processing may have created a root node if missing.  if so, we need to apply its props.
		fs.PlaceFile(arena.Path(), bucket.Root().Metadata, nil)

		// hash whole tree
		actualTreeHash := fshash.Hash(bucket, hasherFactory)

		// verify total integrity
		expectedTreeHash, err := base64.URLEncoding.DecodeString(string(dataHash))
		if err != nil {
			panic(integrity.ConfigError.New("Could not parse hash: %s", err))
		}
		if bytes.Equal(actualTreeHash, expectedTreeHash) {
			// excellent, got what we asked for.
			arena.hash = dataHash
		} else {
			// this may or may not be grounds for panic, depending on configuration.
			if config.AcceptHashMismatch {
				// if we're tolerating mismatches, report the actual hash through different mechanisms.
				// you probably only ever want to use this in tests or debugging; in prod it's just asking for insanity.
				arena.hash = integrity.CommitID(actualTreeHash)
			} else {
				panic(integrity.NewHashMismatchError(string(dataHash), base64.URLEncoding.EncodeToString(actualTreeHash)))
			}
		}
	}).Catch(integrity.Error, func(err *errors.Error) {
		panic(err)
	}).CatchAll(func(err error) {
		panic(integrity.UnknownError.Wrap(err))
	}).Done()
	return arena
}
Exemple #10
0
func (t S3Transmat) Scan(
	kind integrity.TransmatKind,
	subjectPath string,
	siloURIs []integrity.SiloURI,
	options ...integrity.MaterializerConfigurer,
) integrity.CommitID {
	var commitID integrity.CommitID
	try.Do(func() {
		// Basic validation and config
		config := integrity.EvaluateConfig(options...)
		if kind != Kind {
			panic(errors.ProgrammerError.New("This transmat supports definitions of type %q, not %q", Kind, kind))
		}

		// If scan area doesn't exist, bail immediately.
		// No need to even start dialing warehouses if we've got nothing for em.
		_, err := os.Stat(subjectPath)
		if err != nil {
			if os.IsNotExist(err) {
				return // empty commitID
			} else {
				panic(err)
			}
		}

		// load keys from env
		// TODO someday URIs should grow smart enough to control this in a more general fashion -- but for now, host ENV is actually pretty feasible and plays easily with others.
		keys, err := s3gof3r.EnvKeys()
		if err != nil {
			panic(S3CredentialsMissingError.Wrap(err))
		}

		// Parse URI; Find warehouses; Open output streams for writing.
		// Since these are all behaving as just one `io.Writer` stream, this could maybe be factored out.
		// Error handling is currently "anything -> panic".  This should probably be more resilient.  (That might need another refactor so we have an upload call per remote.)
		// TODO : both this and the tar code that has a similar single stream idea should use an interface
		//  And that interface should have a concept of mv so we can make atomic commits.
		//  I'm not doing multiple URIs here until we get that, because the io.Writer interface just
		//   doesn't cut it like it did for tars (and really, it's ignoring a major issue to use it there, too).
		//  ...F**k it, we're gonna do it
		controllers := make([]*s3warehousePut, 0)
		writers := make([]io.Writer, 0) // this is dumb, but we end up making one of these to satisfy the type conversation for MultiWriter anyway
		for _, givenURI := range siloURIs {
			u, err := url.Parse(string(givenURI))
			if err != nil {
				panic(integrity.ConfigError.New("failed to parse URI: %s", err))
			}
			controller := &s3warehousePut{}
			controller.bucketName = u.Host
			controller.pathPrefix = u.Path
			var ctntAddr bool
			switch u.Scheme {
			case "s3":
				ctntAddr = false
			case "s3+splay":
				ctntAddr = true
			default:
				panic(integrity.ConfigError.New("unrecognized scheme: %q", u.Scheme))
			}
			// dial it and initialize writer to s3!
			// if the URI indicated splay behavior, first stream data to {$bucketName}:{dirname($storePath)}/.tmp.upload.{basename($storePath)}.{random()};
			// this allows us to start uploading before the final hash is determined and relocate it later.
			// for direct paths, upload into place, because aws already manages atomicity at that scale (and they don't have a rename or copy operation that's free, because uh...?  no time to implement it since 2006, apparently).
			controller.keys = keys
			if ctntAddr {
				controller.tmpPath = path.Join(
					path.Dir(controller.pathPrefix),
					".tmp.upload."+path.Base(controller.pathPrefix)+"."+guid.New(),
				)
				controller.stream = makeS3writer(controller.bucketName, controller.tmpPath, keys)
			} else {
				controller.stream = makeS3writer(controller.bucketName, controller.pathPrefix, keys)
			}
			controllers = append(controllers, controller)
			writers = append(writers, controller.stream)
		}
		stream := io.MultiWriter(writers...)
		if len(writers) < 1 {
			stream = ioutil.Discard
		}

		// walk, fwrite, hash
		commitID = integrity.CommitID(tartrans.Save(stream, subjectPath, config.FilterSet, hasherFactory))

		// commit
		for _, controller := range controllers {
			controller.Commit(string(commitID))
		}
	}).Catch(integrity.Error, func(err *errors.Error) {
		panic(err)
	}).CatchAll(func(err error) {
		panic(integrity.UnknownError.Wrap(err))
	}).Done()
	return commitID
}