// List function with retry and support for listing all keys in a prefix func List(bucket *s3.Bucket, prefix string, delimiter string) <-chan *s3.ListResp { ch := make(chan *s3.ListResp, 100) go func(pfix string, del string) { defer close(ch) isTruncated := true nextMarker := "" for isTruncated { attempts := 0 for { attempts++ res, err := bucket.List(pfix, del, nextMarker, 1000) if err != nil { if err.Error() == "runtime error: index out of range" { break } if attempts >= maxRetries { log.Panic(err) } time.Sleep(time.Second * 3) } else { ch <- res if len(res.Contents) > 0 { nextMarker = res.Contents[len(res.Contents)-1].Key } else if len(res.CommonPrefixes) > 0 { nextMarker = res.CommonPrefixes[len(res.CommonPrefixes)-1] } isTruncated = res.IsTruncated break } } } }(prefix, delimiter) return ch }
// Recursively descend into an S3 directory tree, filtering based on the given // schema, and sending results on the given channel. The `level` parameter // indicates how far down the tree we are, and is used to determine which schema // field we use for filtering. func FilterS3(bucket *s3.Bucket, prefix string, level int, schema Schema, kc chan S3ListResult) { // Update the marker as we encounter keys / prefixes. If a response is // truncated, the next `List` request will start from the next item after // the marker. marker := "" // Keep listing if the response is incomplete (there are more than // `listBatchSize` entries or prefixes) done := false for !done { response, err := bucket.List(prefix, "/", marker, listBatchSize) if err != nil { fmt.Printf("Error listing: %s\n", err) // TODO: retry? kc <- S3ListResult{s3.Key{}, err} break } if !response.IsTruncated { // Response is not truncated, so we're done. done = true } if level >= len(schema.Fields) { // We are past all the dimensions - encountered items are now // S3 key names. We ignore any further prefixes and assume that the // specified schema is correct/complete. for _, k := range response.Contents { marker = k.Key kc <- S3ListResult{k, nil} } } else { // We are still looking at prefixes. Recursively list each one that // matches the specified schema's allowed values. for _, pf := range response.CommonPrefixes { // Get just the last piece of the prefix to check it as a // dimension. If we have '/foo/bar/baz', we just want 'baz'. stripped := pf[len(prefix) : len(pf)-1] allowed := schema.Dims[schema.Fields[level]].IsAllowed(stripped) marker = pf if allowed { FilterS3(bucket, pf, level+1, schema, kc) } } } } if level == 0 { // We traverse the tree in depth-first order, so once we've reached the // end at the root (level 0), we know we're done. // Note that things could be made faster by parallelizing the recursive // listing, but we would need some other mechanism to know when to close // the channel? close(kc) } return }
// Read a single client record using a partial read from S3 using the given // headers, which should contain a "Range: bytes=M-N" header. func getClientRecord(bucket *s3.Bucket, o *MessageLocation, headers map[string][]string) ([]byte, error) { resp, err := bucket.GetResponseWithHeaders(o.Key, headers) if err != nil { return nil, err } defer resp.Body.Close() body, err := ioutil.ReadAll(resp.Body) if err == nil && len(body) != int(o.Length) { err = fmt.Errorf("Unexpected body length: %d != %d\n", len(body), o.Length) } return body, err }
func killBucket(b *s3.Bucket) { var err error for attempt := attempts.Start(); attempt.Next(); { err = b.DelBucket() if err == nil { return } if _, ok := err.(*net.DNSError); ok { return } e, ok := err.(*s3.Error) if ok && e.Code == "NoSuchBucket" { return } if ok && e.Code == "BucketNotEmpty" { // Errors are ignored here. Just retry. resp, err := b.List("", "", "", 1000) if err == nil { for _, key := range resp.Contents { _ = b.Del(key.Key) } } multis, _, _ := b.ListMulti("", "") for _, m := range multis { _ = m.Abort() } } } message := "cannot delete test bucket" if err != nil { message += ": " + err.Error() } panic(message) }
func Get(bucket *s3.Bucket, key string) ([]byte, error) { attempts := 0 for { attempts++ buff, err := bucket.Get(key) if err == nil { return buff, nil } if attempts >= maxRetries && err != nil { return nil, err } } }
func Put(bucket *s3.Bucket, key string, contents []byte, contentType string, permissions s3.ACL, options s3.Options) error { attempts := 0 for { attempts++ err := bucket.Put(key, contents, contentType, permissions, options) if err == nil { return nil } if attempts >= maxRetries && err != nil { return err } time.Sleep(time.Second * 3) } }
// DeleteMulti deletes multiple keys func DeleteMulti(bucket *s3.Bucket, keys []string) error { attempts := 0 for { attempts++ err := bucket.DelMulti(toDeleteStruct(keys)) if err != nil { if attempts >= maxRetries { return err } time.Sleep(time.Second * 3) } else { break } } return nil }
// Callers must call Close() on rc. func getS3Reader(bucket *s3.Bucket, s3Key string, offset uint64) (rc io.ReadCloser, err error) { if offset == 0 { rc, err = bucket.GetReader(s3Key) return } headers := map[string][]string{ "Range": []string{fmt.Sprintf("bytes=%d-", offset)}, } resp, err := bucket.GetResponseWithHeaders(s3Key, headers) if resp != nil { rc = resp.Body } return }
// Generate a function to use for uploading files as we walk the file tree from // `base`. func makeupload(base string, pattern *regexp.Regexp, bucket *s3.Bucket, bucketPrefix string, dryRun bool, progress *Progress) func(string, os.FileInfo, error) error { // Remove any excess path separators from the bucket prefix. bucketPrefix = strings.Trim(bucketPrefix, "/") // Get a canonical version of the base dir cleanBase := filepath.Clean(base) // Create a closure for the upload function. return func(path string, fi os.FileInfo, err error) (errOut error) { if err != nil { return err } if fi.IsDir() { return nil } //fmt.Printf("Found an item: %s\n", path) baseName := filepath.Base(path) if !pattern.MatchString(baseName) { //fmt.Printf("Item does not match pattern\n") return nil } // Make sure we're comparing apples to apples when stripping off the // base path. Use the canonical versions of both. cleanPath := filepath.Clean(path) relPath := cleanPath[len(cleanBase)+1:] // If we encounter a file // /path/to/base/foo/bar/baz // <--- base ---><-- rel --> // and our bucket prefix is `hello/files`, our file in S3 will be at // s3://bucket-name/hello/files/foo/bar/baz // <- prefix -><-- rel --> s3Path := fmt.Sprintf("%s/%s", bucketPrefix, relPath) // Update progress Count whether we were successful or not. progress.Count += 1 if dryRun { fmt.Printf("Dry run. Not uploading item to %s\n", s3Path) return } fmt.Printf("Uploading item to: %s\n", s3Path) reader, err := os.Open(path) if err != nil { fmt.Printf("Error opening %s for reading: %s\n", path, err) progress.Errors++ return err } err = bucket.PutReader(s3Path, reader, fi.Size(), "binary/octet-stream", s3.BucketOwnerFull, s3.Options{}) if err != nil { progress.Errors++ return err } // Count the bytes for this file as progress if there were // no upoad errors. progress.Bytes += fi.Size() err = reader.Close() if err != nil { fmt.Printf("Error closing %s: %s\n", path, err) progress.Errors++ errOut = err } // Now remove the file locally. err = os.Remove(path) if err != nil { fmt.Printf("Error removing %s: %s\n", path, err) progress.Errors++ errOut = err } return } }
// Stream takes a set of prefixes lists them and // streams the contents by line func Stream(prefixes []string, searchDepth int, keyRegex string, includeKeyName bool) { if len(prefixes) == 0 { fmt.Printf("No prefixes provided\n Usage: fasts3 get <prefix>") return } keys := make(chan string, len(prefixes)*2+1) var keyRegexFilter *regexp.Regexp if keyRegex != "" { keyRegexFilter = regexp.MustCompile(keyRegex) } else { keyRegexFilter = nil } var b *s3.Bucket = nil go func() { for _, prefix := range prefixes { bucket, prefix := parseS3Uri(prefix) if b == nil { b = GetBucket(bucket) } keyExists, err := b.Exists(prefix) if err != nil { log.Fatalln(err) } if keyExists { if keyRegexFilter != nil && !keyRegexFilter.MatchString(prefix) { continue } keys <- prefix } else { for key := range s3wrapper.ListRecurse(b, prefix, searchDepth) { if keyRegexFilter != nil && !keyRegexFilter.MatchString(key.Key) { continue } keys <- key.Key } } } close(keys) }() var wg sync.WaitGroup msgs := make(chan string, 1000) for i := 1; i <= 10; i++ { wg.Add(1) go func() { for key := range keys { bts, err := s3wrapper.Get(b, key) reader, err := getReaderByExt(bts, key) if err != nil { panic(err) } for { line, _, err := reader.ReadLine() if err != nil { if err.Error() == "EOF" { break } else { log.Fatalln(err) } } msg := fmt.Sprintf("%s\n", string(line)) if includeKeyName { msg = fmt.Sprintf("[%s] %s", key, msg) } msgs <- msg } } wg.Done() }() } go func() { wg.Wait() close(msgs) }() for msg := range msgs { fmt.Print(msg) } }
// Get lists and retrieves s3 keys given a list of prefixes // searchDepth can also be specified to increase speed of listing func Get(prefixes []string, searchDepth int) { if len(prefixes) == 0 { fmt.Printf("No prefixes provided\n Usage: fasts3 get <prefix>") return } getRequests := make(chan GetRequest, len(prefixes)*2+1) var b *s3.Bucket = nil go func() { for _, prefix := range prefixes { bucket, prefix := parseS3Uri(prefix) if b == nil { b = GetBucket(bucket) } keyExists, err := b.Exists(prefix) if err != nil { log.Fatalln(err) } if keyExists { keyParts := strings.Split(prefix, "/") ogPrefix := strings.Join(keyParts[0:len(keyParts)-1], "/") + "/" getRequests <- GetRequest{Key: prefix, OriginalPrefix: ogPrefix} } else { for key := range s3wrapper.ListRecurse(b, prefix, searchDepth) { getRequests <- GetRequest{Key: key.Key, OriginalPrefix: prefix} } } } close(getRequests) }() var wg sync.WaitGroup msgs := make(chan string, 1000) workingDirectory, err := os.Getwd() if err != nil { log.Fatalln(err) } for i := 1; i <= 10; i++ { wg.Add(1) go func() { for rq := range getRequests { dest := path.Join(workingDirectory, strings.Replace(rq.Key, rq.OriginalPrefix, "", 1)) msgs <- fmt.Sprintf("Getting %s -> %s\n", rq.Key, dest) err := s3wrapper.GetToFile(b, rq.Key, dest) if err != nil { log.Fatalln(err) } } wg.Done() }() } go func() { wg.Wait() close(msgs) }() for msg := range msgs { fmt.Print(msg) } }
// Del deletes a set of prefixes(s3 keys or partial keys func Del(prefixes []string, searchDepth int, isRecursive bool) { if len(*delPrefixes) == 0 { fmt.Printf("No prefixes provided\n Usage: fasts3 del <prefix>") return } keys := make(chan string, len(prefixes)*2+1) var b *s3.Bucket = nil go func() { for _, delPrefix := range prefixes { bucket, prefix := parseS3Uri(delPrefix) if b == nil { b = GetBucket(bucket) } keys <- prefix if *delRecurse { keyExists, err := b.Exists(prefix) if err != nil { log.Fatalln(err) } if keyExists { keys <- prefix } else if *delRecurse { for key := range s3wrapper.ListRecurse(b, prefix, searchDepth) { keys <- key.Key } } else { fmt.Printf("trying to delete a prefix, please add --recursive or -r to proceed\n") } } } close(keys) }() var wg sync.WaitGroup msgs := make(chan string, 1000) for i := 1; i <= 10; i++ { wg.Add(1) go func() { batch := make([]string, 0, 100) for key := range keys { batch = append(batch, key) if len(batch) >= 100 { err := s3wrapper.DeleteMulti(b, batch) if err != nil { log.Fatalln(err) } for _, k := range batch { msgs <- fmt.Sprintf("File %s Deleted\n", k) } batch = batch[:0] } } if len(batch) > 0 { err := s3wrapper.DeleteMulti(b, batch) if err != nil { log.Fatalln(err) } for _, k := range batch { msgs <- fmt.Sprintf("File %s Deleted\n", k) } } wg.Done() }() } go func() { wg.Wait() close(msgs) }() for msg := range msgs { fmt.Print(msg) } }