Ejemplo n.º 1
0
// List function with retry and support for listing all keys in a prefix
func List(bucket *s3.Bucket, prefix string, delimiter string) <-chan *s3.ListResp {
	ch := make(chan *s3.ListResp, 100)
	go func(pfix string, del string) {
		defer close(ch)
		isTruncated := true
		nextMarker := ""
		for isTruncated {
			attempts := 0
			for {
				attempts++
				res, err := bucket.List(pfix, del, nextMarker, 1000)
				if err != nil {
					if err.Error() == "runtime error: index out of range" {
						break
					}
					if attempts >= maxRetries {
						log.Panic(err)
					}

					time.Sleep(time.Second * 3)
				} else {
					ch <- res
					if len(res.Contents) > 0 {
						nextMarker = res.Contents[len(res.Contents)-1].Key
					} else if len(res.CommonPrefixes) > 0 {
						nextMarker = res.CommonPrefixes[len(res.CommonPrefixes)-1]
					}
					isTruncated = res.IsTruncated
					break
				}
			}
		}
	}(prefix, delimiter)
	return ch
}
Ejemplo n.º 2
0
// Recursively descend into an S3 directory tree, filtering based on the given
// schema, and sending results on the given channel. The `level` parameter
// indicates how far down the tree we are, and is used to determine which schema
// field we use for filtering.
func FilterS3(bucket *s3.Bucket, prefix string, level int, schema Schema, kc chan S3ListResult) {
	// Update the marker as we encounter keys / prefixes. If a response is
	// truncated, the next `List` request will start from the next item after
	// the marker.
	marker := ""

	// Keep listing if the response is incomplete (there are more than
	// `listBatchSize` entries or prefixes)
	done := false
	for !done {
		response, err := bucket.List(prefix, "/", marker, listBatchSize)
		if err != nil {
			fmt.Printf("Error listing: %s\n", err)
			// TODO: retry?
			kc <- S3ListResult{s3.Key{}, err}
			break
		}

		if !response.IsTruncated {
			// Response is not truncated, so we're done.
			done = true
		}

		if level >= len(schema.Fields) {
			// We are past all the dimensions - encountered items are now
			// S3 key names. We ignore any further prefixes and assume that the
			// specified schema is correct/complete.
			for _, k := range response.Contents {
				marker = k.Key
				kc <- S3ListResult{k, nil}
			}
		} else {
			// We are still looking at prefixes. Recursively list each one that
			// matches the specified schema's allowed values.
			for _, pf := range response.CommonPrefixes {
				// Get just the last piece of the prefix to check it as a
				// dimension. If we have '/foo/bar/baz', we just want 'baz'.
				stripped := pf[len(prefix) : len(pf)-1]
				allowed := schema.Dims[schema.Fields[level]].IsAllowed(stripped)
				marker = pf
				if allowed {
					FilterS3(bucket, pf, level+1, schema, kc)
				}
			}
		}
	}

	if level == 0 {
		// We traverse the tree in depth-first order, so once we've reached the
		// end at the root (level 0), we know we're done.
		// Note that things could be made faster by parallelizing the recursive
		// listing, but we would need some other mechanism to know when to close
		// the channel?
		close(kc)
	}
	return
}
Ejemplo n.º 3
0
// Read a single client record using a partial read from S3 using the given
// headers, which should contain a "Range: bytes=M-N" header.
func getClientRecord(bucket *s3.Bucket, o *MessageLocation, headers map[string][]string) ([]byte, error) {
	resp, err := bucket.GetResponseWithHeaders(o.Key, headers)
	if err != nil {
		return nil, err
	}
	defer resp.Body.Close()
	body, err := ioutil.ReadAll(resp.Body)
	if err == nil && len(body) != int(o.Length) {
		err = fmt.Errorf("Unexpected body length: %d != %d\n", len(body), o.Length)
	}
	return body, err
}
Ejemplo n.º 4
0
func killBucket(b *s3.Bucket) {
	var err error
	for attempt := attempts.Start(); attempt.Next(); {
		err = b.DelBucket()
		if err == nil {
			return
		}
		if _, ok := err.(*net.DNSError); ok {
			return
		}
		e, ok := err.(*s3.Error)
		if ok && e.Code == "NoSuchBucket" {
			return
		}
		if ok && e.Code == "BucketNotEmpty" {
			// Errors are ignored here. Just retry.
			resp, err := b.List("", "", "", 1000)
			if err == nil {
				for _, key := range resp.Contents {
					_ = b.Del(key.Key)
				}
			}
			multis, _, _ := b.ListMulti("", "")
			for _, m := range multis {
				_ = m.Abort()
			}
		}
	}
	message := "cannot delete test bucket"
	if err != nil {
		message += ": " + err.Error()
	}
	panic(message)
}
Ejemplo n.º 5
0
func Get(bucket *s3.Bucket, key string) ([]byte, error) {
	attempts := 0
	for {
		attempts++
		buff, err := bucket.Get(key)
		if err == nil {
			return buff, nil
		}
		if attempts >= maxRetries && err != nil {
			return nil, err
		}
	}

}
Ejemplo n.º 6
0
func Put(bucket *s3.Bucket, key string, contents []byte, contentType string, permissions s3.ACL, options s3.Options) error {
	attempts := 0
	for {
		attempts++
		err := bucket.Put(key, contents, contentType, permissions, options)
		if err == nil {
			return nil
		}
		if attempts >= maxRetries && err != nil {
			return err
		}

		time.Sleep(time.Second * 3)
	}
}
Ejemplo n.º 7
0
// DeleteMulti  deletes multiple keys
func DeleteMulti(bucket *s3.Bucket, keys []string) error {
	attempts := 0
	for {
		attempts++
		err := bucket.DelMulti(toDeleteStruct(keys))
		if err != nil {
			if attempts >= maxRetries {
				return err
			}

			time.Sleep(time.Second * 3)
		} else {
			break
		}
	}
	return nil
}
Ejemplo n.º 8
0
// Callers must call Close() on rc.
func getS3Reader(bucket *s3.Bucket, s3Key string, offset uint64) (rc io.ReadCloser, err error) {
	if offset == 0 {
		rc, err = bucket.GetReader(s3Key)
		return
	}

	headers := map[string][]string{
		"Range": []string{fmt.Sprintf("bytes=%d-", offset)},
	}

	resp, err := bucket.GetResponseWithHeaders(s3Key, headers)

	if resp != nil {
		rc = resp.Body
	}
	return
}
Ejemplo n.º 9
0
// Generate a function to use for uploading files as we walk the file tree from
// `base`.
func makeupload(base string, pattern *regexp.Regexp, bucket *s3.Bucket, bucketPrefix string, dryRun bool, progress *Progress) func(string, os.FileInfo, error) error {
	// Remove any excess path separators from the bucket prefix.
	bucketPrefix = strings.Trim(bucketPrefix, "/")

	// Get a canonical version of the base dir
	cleanBase := filepath.Clean(base)

	// Create a closure for the upload function.
	return func(path string, fi os.FileInfo, err error) (errOut error) {
		if err != nil {
			return err
		}

		if fi.IsDir() {
			return nil
		}
		//fmt.Printf("Found an item: %s\n", path)

		baseName := filepath.Base(path)
		if !pattern.MatchString(baseName) {
			//fmt.Printf("Item does not match pattern\n")
			return nil
		}

		// Make sure we're comparing apples to apples when stripping off the
		// base path. Use the canonical versions of both.
		cleanPath := filepath.Clean(path)
		relPath := cleanPath[len(cleanBase)+1:]

		// If we encounter a file
		//    /path/to/base/foo/bar/baz
		//    <--- base ---><-- rel -->
		// and our bucket prefix is `hello/files`, our file in S3 will be at
		//    s3://bucket-name/hello/files/foo/bar/baz
		//                     <- prefix -><-- rel -->
		s3Path := fmt.Sprintf("%s/%s", bucketPrefix, relPath)

		// Update progress Count whether we were successful or not.
		progress.Count += 1

		if dryRun {
			fmt.Printf("Dry run. Not uploading item to %s\n", s3Path)
			return
		}

		fmt.Printf("Uploading item to: %s\n", s3Path)
		reader, err := os.Open(path)
		if err != nil {
			fmt.Printf("Error opening %s for reading: %s\n", path, err)
			progress.Errors++
			return err
		}

		err = bucket.PutReader(s3Path, reader, fi.Size(), "binary/octet-stream", s3.BucketOwnerFull, s3.Options{})
		if err != nil {
			progress.Errors++
			return err
		}

		// Count the bytes for this file as progress if there were
		// no upoad errors.
		progress.Bytes += fi.Size()

		err = reader.Close()
		if err != nil {
			fmt.Printf("Error closing %s: %s\n", path, err)
			progress.Errors++
			errOut = err
		}

		// Now remove the file locally.
		err = os.Remove(path)
		if err != nil {
			fmt.Printf("Error removing %s: %s\n", path, err)
			progress.Errors++
			errOut = err
		}

		return
	}
}
Ejemplo n.º 10
0
// Stream takes a set of prefixes lists them and
// streams the contents by line
func Stream(prefixes []string, searchDepth int, keyRegex string, includeKeyName bool) {
	if len(prefixes) == 0 {
		fmt.Printf("No prefixes provided\n Usage: fasts3 get <prefix>")
		return
	}
	keys := make(chan string, len(prefixes)*2+1)
	var keyRegexFilter *regexp.Regexp
	if keyRegex != "" {
		keyRegexFilter = regexp.MustCompile(keyRegex)
	} else {
		keyRegexFilter = nil
	}
	var b *s3.Bucket = nil
	go func() {
		for _, prefix := range prefixes {
			bucket, prefix := parseS3Uri(prefix)

			if b == nil {
				b = GetBucket(bucket)
			}

			keyExists, err := b.Exists(prefix)
			if err != nil {
				log.Fatalln(err)
			}

			if keyExists {
				if keyRegexFilter != nil && !keyRegexFilter.MatchString(prefix) {
					continue
				}
				keys <- prefix
			} else {
				for key := range s3wrapper.ListRecurse(b, prefix, searchDepth) {
					if keyRegexFilter != nil && !keyRegexFilter.MatchString(key.Key) {
						continue
					}
					keys <- key.Key
				}

			}
		}
		close(keys)
	}()

	var wg sync.WaitGroup
	msgs := make(chan string, 1000)
	for i := 1; i <= 10; i++ {
		wg.Add(1)
		go func() {
			for key := range keys {
				bts, err := s3wrapper.Get(b, key)
				reader, err := getReaderByExt(bts, key)
				if err != nil {
					panic(err)
				}
				for {
					line, _, err := reader.ReadLine()
					if err != nil {
						if err.Error() == "EOF" {
							break
						} else {
							log.Fatalln(err)
						}
					}
					msg := fmt.Sprintf("%s\n", string(line))
					if includeKeyName {
						msg = fmt.Sprintf("[%s] %s", key, msg)
					}
					msgs <- msg
				}
			}
			wg.Done()
		}()
	}
	go func() {
		wg.Wait()
		close(msgs)
	}()
	for msg := range msgs {
		fmt.Print(msg)
	}
}
Ejemplo n.º 11
0
// Get lists and retrieves s3 keys given a list of prefixes
// searchDepth can also be specified to increase speed of listing
func Get(prefixes []string, searchDepth int) {
	if len(prefixes) == 0 {
		fmt.Printf("No prefixes provided\n Usage: fasts3 get <prefix>")
		return
	}
	getRequests := make(chan GetRequest, len(prefixes)*2+1)
	var b *s3.Bucket = nil
	go func() {
		for _, prefix := range prefixes {
			bucket, prefix := parseS3Uri(prefix)

			if b == nil {
				b = GetBucket(bucket)
			}

			keyExists, err := b.Exists(prefix)
			if err != nil {
				log.Fatalln(err)
			}

			if keyExists {
				keyParts := strings.Split(prefix, "/")
				ogPrefix := strings.Join(keyParts[0:len(keyParts)-1], "/") + "/"
				getRequests <- GetRequest{Key: prefix, OriginalPrefix: ogPrefix}
			} else {
				for key := range s3wrapper.ListRecurse(b, prefix, searchDepth) {
					getRequests <- GetRequest{Key: key.Key, OriginalPrefix: prefix}
				}

			}
		}
		close(getRequests)
	}()

	var wg sync.WaitGroup
	msgs := make(chan string, 1000)
	workingDirectory, err := os.Getwd()
	if err != nil {
		log.Fatalln(err)
	}
	for i := 1; i <= 10; i++ {
		wg.Add(1)
		go func() {
			for rq := range getRequests {
				dest := path.Join(workingDirectory, strings.Replace(rq.Key, rq.OriginalPrefix, "", 1))
				msgs <- fmt.Sprintf("Getting %s -> %s\n", rq.Key, dest)
				err := s3wrapper.GetToFile(b, rq.Key, dest)
				if err != nil {
					log.Fatalln(err)
				}
			}
			wg.Done()
		}()
	}
	go func() {
		wg.Wait()
		close(msgs)
	}()
	for msg := range msgs {
		fmt.Print(msg)
	}
}
Ejemplo n.º 12
0
// Del deletes a set of prefixes(s3 keys or partial keys
func Del(prefixes []string, searchDepth int, isRecursive bool) {
	if len(*delPrefixes) == 0 {
		fmt.Printf("No prefixes provided\n Usage: fasts3 del <prefix>")
		return
	}
	keys := make(chan string, len(prefixes)*2+1)
	var b *s3.Bucket = nil
	go func() {
		for _, delPrefix := range prefixes {
			bucket, prefix := parseS3Uri(delPrefix)

			if b == nil {
				b = GetBucket(bucket)
			}

			keys <- prefix
			if *delRecurse {
				keyExists, err := b.Exists(prefix)
				if err != nil {
					log.Fatalln(err)
				}

				if keyExists {
					keys <- prefix
				} else if *delRecurse {
					for key := range s3wrapper.ListRecurse(b, prefix, searchDepth) {
						keys <- key.Key
					}

				} else {
					fmt.Printf("trying to delete a prefix, please add --recursive or -r to proceed\n")
				}
			}
		}
		close(keys)
	}()

	var wg sync.WaitGroup
	msgs := make(chan string, 1000)
	for i := 1; i <= 10; i++ {
		wg.Add(1)
		go func() {
			batch := make([]string, 0, 100)
			for key := range keys {
				batch = append(batch, key)
				if len(batch) >= 100 {
					err := s3wrapper.DeleteMulti(b, batch)
					if err != nil {
						log.Fatalln(err)
					}
					for _, k := range batch {
						msgs <- fmt.Sprintf("File %s Deleted\n", k)
					}
					batch = batch[:0]
				}
			}

			if len(batch) > 0 {
				err := s3wrapper.DeleteMulti(b, batch)
				if err != nil {
					log.Fatalln(err)
				}
				for _, k := range batch {
					msgs <- fmt.Sprintf("File %s Deleted\n", k)
				}
			}
			wg.Done()
		}()
	}
	go func() {
		wg.Wait()
		close(msgs)
	}()
	for msg := range msgs {
		fmt.Print(msg)
	}
}