//SafeCreateS3Bucket creates an s3 bucket for storing files to an s3-compatible blobstore func SafeCreateS3Bucket(domain, bucket, accessKey, secretKey string) (*S3Bucket, error) { s := &S3Bucket{ Bucket: bucket, Name: "s3", Domain: domain, AccessKey: accessKey, SecretKey: secretKey, } if s.Bucket == "" { return nil, errors.New("bucket name is undefined") } var k s3gof3r.Keys var err error if s.AccessKey == "" || s.SecretKey == "" { k, err = s3gof3r.EnvKeys() // get S3 keys from environment if err != nil { return nil, err } } else { k = s3gof3r.Keys{ AccessKey: s.AccessKey, SecretKey: s.SecretKey, } } s3 := s3gof3r.New(s.Domain, k) s.bucket = s3.Bucket(s.Bucket) return s, nil }
func main() { s3gof3r.SetLogger(os.Stdout, "", log.LstdFlags, false) // STARTEXAMPLE OMIT presentation, err := os.Open(file_name) // open presentation file if err != nil { log.Fatal(err) } k, err := s3gof3r.EnvKeys() // get S3 keys from environment if err != nil { log.Fatal(err) } // Open bucket to put file into s3 := s3gof3r.New("", k) b := s3.Bucket(bucket_name) // Open a PutWriter for upload w, err := b.PutWriter(presentation.Name(), nil, nil) if err != nil { log.Fatal(err) } defer w.Close() if _, err = io.Copy(w, presentation); err != nil { // Copy into S3 log.Fatal(err) } log.Printf("%s uploaded to %s", file_name, bucket_name) // STOPEXAMPLE OMIT }
// getAWSKeys gets the AWS Keys from environment variables or the instance-based metadata on EC2 // Environment variables are attempted first, followed by the instance-based credentials. func getAWSKeys() (keys s3gof3r.Keys, err error) { keys, err = s3gof3r.EnvKeys() if err == nil { return } keys, err = s3gof3r.InstanceKeys() if err == nil { return } err = errors.New("no AWS keys found") return }
func newS3Output(conf map[string]string) (output, error) { bucketName := conf["bucket"] if bucketName == "" { return nil, errors.New("No bucket specified") } fileName := conf["filename"] if fileName == "" { return nil, errors.New("No file name specified") } keys, err := s3gof3r.EnvKeys() if err != nil { return nil, err } s3 := s3gof3r.New(conf["endpoint"], keys) bucket := s3.Bucket(bucketName) return bucket.PutWriter(fileName, nil, nil) }
func NewS3ImageFactory(bucketName string) *ImageFactory { factory := new(ImageFactory) //log.Print(imageCollections) k, err := s3gof3r.EnvKeys() // get S3 keys from environment if err != nil { log.Fatal("Unable to init s3", err) } // Open bucket to put file into s3 := s3gof3r.New("", k) bucket := s3.Bucket(bucketName) if bucket == nil { log.Fatal("Unable to init s3", err) } factory.NewImage = func(r, o string, b bool) ImageFile { return NewS3Image(s3, bucket, r, o, b) } return factory }
func TestCoreCompliance(t *testing.T) { if _, err := s3gof3r.EnvKeys(); err != nil { t.Skipf("skipping s3 output tests; no s3 credentials loaded (err: %s)", err) } // group all effects of this test run under one "dir" for human reader sanity and cleanup in extremis. testRunGuid := guid.New() Convey("Spec Compliance: S3 Transmat", t, testutil.WithTmpdir(func() { // scanning tests.CheckScanWithoutMutation(Kind, New) tests.CheckScanProducesConsistentHash(Kind, New) tests.CheckScanProducesDistinctHashes(Kind, New) tests.CheckScanEmptyIsCalm(Kind, New) tests.CheckScanWithFilters(Kind, New) // round-trip tests.CheckRoundTrip(Kind, New, "s3://repeatr-test/test-"+testRunGuid+"/rt/obj.tar", "literal path") tests.CheckRoundTrip(Kind, New, "s3+splay://repeatr-test/test-"+testRunGuid+"/rt-splay/heap/", "content addressible path") })) }
func UploadToS3(file multipart.File) (id string, err error) { k, err := s3gof3r.EnvKeys() if err != nil { log.Println(err.Error()) return } // Open bucket to put file into s3 := s3gof3r.New(S3Domain, k) s3.Region() b := s3.Bucket(S3Bucket) //Generate unique name u4, err := uuid.NewV4() if err != nil { log.Println(err.Error()) return } id = u4.String() // Open a PutWriter for upload w, err := b.PutWriter("challenge/"+id, nil, nil) if err != nil { log.Println(err.Error()) return } if _, err = io.Copy(w, file); err != nil { log.Println(err.Error()) return } if err = w.Close(); err != nil { log.Println(err.Error()) return } return }
func GetStats(id string) (p Parser, err error) { k, err := s3gof3r.EnvKeys() if err != nil { return } s3 := s3gof3r.New(S3Domain, k) s3.Region() b := s3.Bucket(S3Bucket) r, _, err := b.GetReader("challenge/"+id, nil) if err != nil { return } defer r.Close() buf := bytes.NewBuffer(nil) io.Copy(buf, r) p = WordStats(string(buf.Bytes())) p.Name = id return }
/* Arenas produced by Dir Transmats may be relocated by simple `mv`. */ func (t *S3Transmat) Materialize( kind integrity.TransmatKind, dataHash integrity.CommitID, siloURIs []integrity.SiloURI, options ...integrity.MaterializerConfigurer, ) integrity.Arena { var arena dirArena try.Do(func() { // Basic validation and config config := integrity.EvaluateConfig(options...) if kind != Kind { panic(errors.ProgrammerError.New("This transmat supports definitions of type %q, not %q", Kind, kind)) } // Parse URI; Find warehouses. if len(siloURIs) < 1 { panic(integrity.ConfigError.New("Materialization requires at least one data source!")) // Note that it's possible a caching layer will satisfy things even without data sources... // but if that was going to happen, it already would have by now. } // Our policy is to take the first path that exists. // This lets you specify a series of potential locations, and if one is unavailable we'll just take the next. var warehouseBucketName string var warehousePathPrefix string var warehouseCtntAddr bool for _, givenURI := range siloURIs { u, err := url.Parse(string(givenURI)) if err != nil { panic(integrity.ConfigError.New("failed to parse URI: %s", err)) } warehouseBucketName = u.Host warehousePathPrefix = u.Path switch u.Scheme { case "s3": warehouseCtntAddr = false case "s3+splay": warehouseCtntAddr = true default: panic(integrity.ConfigError.New("unrecognized scheme: %q", u.Scheme)) } // TODO figure out how to check for data (or at least warehouse!) presence; // currently just assuming the first one's golden, and blowing up later if it's not. break } if warehouseBucketName == "" { panic(integrity.WarehouseConnectionError.New("No warehouses were available!")) } // load keys from env // TODO someday URIs should grow smart enough to control this in a more general fashion -- but for now, host ENV is actually pretty feasible and plays easily with others. // TODO should not require keys! we're just reading, after all; anon access is 100% valid. // Buuuuut s3gof3r doesn't seem to understand empty keys; it still sends them as if to login, and AWS says 403. So, foo. keys, err := s3gof3r.EnvKeys() if err != nil { panic(S3CredentialsMissingError.Wrap(err)) } // initialize reader from s3! getPath := warehousePathPrefix if warehouseCtntAddr { getPath = path.Join(warehousePathPrefix, string(dataHash)) } s3reader := makeS3reader(warehouseBucketName, getPath, keys) defer s3reader.Close() // prepare decompression as necessary reader, err := tartrans.Decompress(s3reader) if err != nil { panic(integrity.WarehouseConnectionError.New("could not start decompressing: %s", err)) } tarReader := tar.NewReader(reader) // Create staging arena to produce data into. arena.path, err = ioutil.TempDir(t.workPath, "") if err != nil { panic(integrity.TransmatError.New("Unable to create arena: %s", err)) } // walk input tar stream, placing data and accumulating hashes and metadata for integrity check bucket := &fshash.MemoryBucket{} tartrans.Extract(tarReader, arena.Path(), bucket, hasherFactory) // bucket processing may have created a root node if missing. if so, we need to apply its props. fs.PlaceFile(arena.Path(), bucket.Root().Metadata, nil) // hash whole tree actualTreeHash := fshash.Hash(bucket, hasherFactory) // verify total integrity expectedTreeHash, err := base64.URLEncoding.DecodeString(string(dataHash)) if err != nil { panic(integrity.ConfigError.New("Could not parse hash: %s", err)) } if bytes.Equal(actualTreeHash, expectedTreeHash) { // excellent, got what we asked for. arena.hash = dataHash } else { // this may or may not be grounds for panic, depending on configuration. if config.AcceptHashMismatch { // if we're tolerating mismatches, report the actual hash through different mechanisms. // you probably only ever want to use this in tests or debugging; in prod it's just asking for insanity. arena.hash = integrity.CommitID(actualTreeHash) } else { panic(integrity.NewHashMismatchError(string(dataHash), base64.URLEncoding.EncodeToString(actualTreeHash))) } } }).Catch(integrity.Error, func(err *errors.Error) { panic(err) }).CatchAll(func(err error) { panic(integrity.UnknownError.Wrap(err)) }).Done() return arena }
func (t S3Transmat) Scan( kind integrity.TransmatKind, subjectPath string, siloURIs []integrity.SiloURI, options ...integrity.MaterializerConfigurer, ) integrity.CommitID { var commitID integrity.CommitID try.Do(func() { // Basic validation and config config := integrity.EvaluateConfig(options...) if kind != Kind { panic(errors.ProgrammerError.New("This transmat supports definitions of type %q, not %q", Kind, kind)) } // If scan area doesn't exist, bail immediately. // No need to even start dialing warehouses if we've got nothing for em. _, err := os.Stat(subjectPath) if err != nil { if os.IsNotExist(err) { return // empty commitID } else { panic(err) } } // load keys from env // TODO someday URIs should grow smart enough to control this in a more general fashion -- but for now, host ENV is actually pretty feasible and plays easily with others. keys, err := s3gof3r.EnvKeys() if err != nil { panic(S3CredentialsMissingError.Wrap(err)) } // Parse URI; Find warehouses; Open output streams for writing. // Since these are all behaving as just one `io.Writer` stream, this could maybe be factored out. // Error handling is currently "anything -> panic". This should probably be more resilient. (That might need another refactor so we have an upload call per remote.) // TODO : both this and the tar code that has a similar single stream idea should use an interface // And that interface should have a concept of mv so we can make atomic commits. // I'm not doing multiple URIs here until we get that, because the io.Writer interface just // doesn't cut it like it did for tars (and really, it's ignoring a major issue to use it there, too). // ...F**k it, we're gonna do it controllers := make([]*s3warehousePut, 0) writers := make([]io.Writer, 0) // this is dumb, but we end up making one of these to satisfy the type conversation for MultiWriter anyway for _, givenURI := range siloURIs { u, err := url.Parse(string(givenURI)) if err != nil { panic(integrity.ConfigError.New("failed to parse URI: %s", err)) } controller := &s3warehousePut{} controller.bucketName = u.Host controller.pathPrefix = u.Path var ctntAddr bool switch u.Scheme { case "s3": ctntAddr = false case "s3+splay": ctntAddr = true default: panic(integrity.ConfigError.New("unrecognized scheme: %q", u.Scheme)) } // dial it and initialize writer to s3! // if the URI indicated splay behavior, first stream data to {$bucketName}:{dirname($storePath)}/.tmp.upload.{basename($storePath)}.{random()}; // this allows us to start uploading before the final hash is determined and relocate it later. // for direct paths, upload into place, because aws already manages atomicity at that scale (and they don't have a rename or copy operation that's free, because uh...? no time to implement it since 2006, apparently). controller.keys = keys if ctntAddr { controller.tmpPath = path.Join( path.Dir(controller.pathPrefix), ".tmp.upload."+path.Base(controller.pathPrefix)+"."+guid.New(), ) controller.stream = makeS3writer(controller.bucketName, controller.tmpPath, keys) } else { controller.stream = makeS3writer(controller.bucketName, controller.pathPrefix, keys) } controllers = append(controllers, controller) writers = append(writers, controller.stream) } stream := io.MultiWriter(writers...) if len(writers) < 1 { stream = ioutil.Discard } // walk, fwrite, hash commitID = integrity.CommitID(tartrans.Save(stream, subjectPath, config.FilterSet, hasherFactory)) // commit for _, controller := range controllers { controller.Commit(string(commitID)) } }).Catch(integrity.Error, func(err *errors.Error) { panic(err) }).CatchAll(func(err error) { panic(integrity.UnknownError.Wrap(err)) }).Done() return commitID }