// List function with retry and support for listing all keys in a prefix func List(bucket *s3.Bucket, prefix string, delimiter string) <-chan *s3.ListResp { ch := make(chan *s3.ListResp, 100) go func(pfix string, del string) { defer close(ch) isTruncated := true nextMarker := "" for isTruncated { attempts := 0 for { attempts++ res, err := bucket.List(pfix, del, nextMarker, 1000) if err != nil { if err.Error() == "runtime error: index out of range" { break } if attempts >= maxRetries { log.Panic(err) } time.Sleep(time.Second * 3) } else { ch <- res if len(res.Contents) > 0 { nextMarker = res.Contents[len(res.Contents)-1].Key } else if len(res.CommonPrefixes) > 0 { nextMarker = res.CommonPrefixes[len(res.CommonPrefixes)-1] } isTruncated = res.IsTruncated break } } } }(prefix, delimiter) return ch }
func killBucket(b *s3.Bucket) { var err error for attempt := attempts.Start(); attempt.Next(); { err = b.DelBucket() if err == nil { return } if _, ok := err.(*net.DNSError); ok { return } e, ok := err.(*s3.Error) if ok && e.Code == "NoSuchBucket" { return } if ok && e.Code == "BucketNotEmpty" { // Errors are ignored here. Just retry. resp, err := b.List("", "", "", 1000) if err == nil { for _, key := range resp.Contents { _ = b.Del(key.Key) } } multis, _, _ := b.ListMulti("", "") for _, m := range multis { _ = m.Abort() } } } message := "cannot delete test bucket" if err != nil { message += ": " + err.Error() } panic(message) }
// Recursively descend into an S3 directory tree, filtering based on the given // schema, and sending results on the given channel. The `level` parameter // indicates how far down the tree we are, and is used to determine which schema // field we use for filtering. func FilterS3(bucket *s3.Bucket, prefix string, level int, schema Schema, kc chan S3ListResult) { // Update the marker as we encounter keys / prefixes. If a response is // truncated, the next `List` request will start from the next item after // the marker. marker := "" // Keep listing if the response is incomplete (there are more than // `listBatchSize` entries or prefixes) done := false for !done { response, err := bucket.List(prefix, "/", marker, listBatchSize) if err != nil { fmt.Printf("Error listing: %s\n", err) // TODO: retry? kc <- S3ListResult{s3.Key{}, err} break } if !response.IsTruncated { // Response is not truncated, so we're done. done = true } if level >= len(schema.Fields) { // We are past all the dimensions - encountered items are now // S3 key names. We ignore any further prefixes and assume that the // specified schema is correct/complete. for _, k := range response.Contents { marker = k.Key kc <- S3ListResult{k, nil} } } else { // We are still looking at prefixes. Recursively list each one that // matches the specified schema's allowed values. for _, pf := range response.CommonPrefixes { // Get just the last piece of the prefix to check it as a // dimension. If we have '/foo/bar/baz', we just want 'baz'. stripped := pf[len(prefix) : len(pf)-1] allowed := schema.Dims[schema.Fields[level]].IsAllowed(stripped) marker = pf if allowed { FilterS3(bucket, pf, level+1, schema, kc) } } } } if level == 0 { // We traverse the tree in depth-first order, so once we've reached the // end at the root (level 0), we know we're done. // Note that things could be made faster by parallelizing the recursive // listing, but we would need some other mechanism to know when to close // the channel? close(kc) } return }