func getFiles(fileList []string, bucket *s3.Bucket, date string, filesChan chan string) { var wg sync.WaitGroup myHeader := make(http.Header) myHeader.Add("Accept-Encoding", "compress, gzip") for _, file := range fileList { wg.Add(1) go func(file string) { timeStamp := strings.Split(strings.SplitN(file, ".", 2)[0], "/")[1] fmt.Println(timeStamp) response, err := bucket.GetResponseWithHeaders(file, myHeader) if err != nil { log.Fatal(err) } data, err := ioutil.ReadAll(response.Body) if err != nil { log.Fatal(err) } filename := date + "-" + timeStamp + ".gz" ioutil.WriteFile(filename, data, 0777) filesChan <- filename wg.Done() }(file) } wg.Wait() }
func headName(name string, bucket *s3.Bucket) (*http.Response, error) { resp, err := bucket.Head(name, make(map[string][]string)) if err != nil && err.Error() == "404 Not Found" { return nil, afero.ErrFileNotFound } else if err != nil { return nil, err } return resp, nil }
func asyncGetFile(bucket *s3.Bucket, path string, fileData chan []byte) { go func() { data, getErr := bucket.Get(path) if getErr != nil { fileData <- nil } else { fileData <- data } }() }
// Put puts a serialize.Msgpack object into bucket b. // by default uploads checksum of the files func Put(b *s3.Bucket, data []byte, path string, checksum string) error { // data is a log of msgpack file kind // aws api wants []byte options := s3.Options{} options.ContentMD5 = checksum err := b.Put(path, data, "", s3.Private, options) if err != nil { return err } return nil }
func killBucket(b *s3.Bucket) { var err error for attempt := attempts.Start(); attempt.Next(); { err = b.DelBucket() if err == nil { return } if _, ok := err.(*net.DNSError); ok { return } e, ok := err.(*s3.Error) if ok && e.Code == "NoSuchBucket" { return } if ok && e.Code == "BucketNotEmpty" { // Errors are ignored here. Just retry. resp, err := b.List("", "", "", 1000) if err == nil { for _, key := range resp.Contents { _ = b.Del(key.Key) } } multis, _, _ := b.ListMulti("", "") for _, m := range multis { _ = m.Abort() } } } message := "cannot delete test bucket" if err != nil { message += ": " + err.Error() } panic(message) }
func getFileList(date string, bucket *s3.Bucket) []string { resultsSlice := make([]string,0) response, err := bucket.List(date, "", "", 500) if err != nil { log.Fatal(err) } for _, objects := range response.Contents { resultsSlice = append(resultsSlice, objects.Key) } return resultsSlice }
//ApplyToMultiList applies the action a to all the keys that match prefix //To select ALL contents of the bucket use prefix, delim = "" func ApplyToMultiList(b *s3.Bucket, prefix, delim string, a Action) { //prints stats go func() { for { select { case <-time.After(3 * time.Second): r := atomic.LoadUint64(&donecounter) fmt.Printf("read %v keys\n", r) r = atomic.LoadUint64(&counter) fmt.Printf("processed %v keys\n", r) } } }() resp, err := b.List(prefix, delim, "", 1000) if err != nil { panic(err) } if len(resp.Contents) < 1 { log.Infof("got no Contents") return } lastSeen := resp.Contents[len(resp.Contents)-1] for _, obj := range resp.Contents { atomic.AddUint64(&counter, 1) go a(KeyBucket{b, obj}) } for { if resp.IsTruncated { resp, err = b.List(prefix, delim, lastSeen.Key, 1000) if err != nil { panic(err) } lastSeen = resp.Contents[len(resp.Contents)-1] fmt.Printf("------ \n %v \n-----", lastSeen.Key) // TODO allow setting a max number of workers for _, obj := range resp.Contents { atomic.AddUint64(&counter, 1) go func() { wg.Add(1) a(KeyBucket{b, obj}) wg.Done() }() } } else { break } } wg.Wait() }
// Get gets the s3 object at the specified path and saves to disk // with its filename and a root prependeded func Get(b *s3.Bucket, path string, root string) error { var conn Connection err := conn.Connect() if err != nil { return err } data, err := b.Get(path) if err != nil { panic(fmt.Sprintf("can't download %v \n ", err)) } localpath := fmt.Sprintf("%v/%v", root, path) stripped := strings.Split(localpath, "/") pathonly := strings.Join(stripped[:len(stripped)-1], "/") err = os.MkdirAll(pathonly, 0777) if err != nil { return err } err = ioutil.WriteFile(localpath, data, 0666) if err != nil { return err } fmt.Printf("Downloaded: %v \n", localpath) return nil }
func processDir(dirName string, s3KeyPrefix string, bucket *s3.Bucket) { if verbose { log.Printf("Processing directory %s", dirName) } fileInfos, err := ioutil.ReadDir(dirName) if err != nil { log.Fatal(err) } for _, fileInfo := range fileInfos { if time.Now().After(stopTime) { log.Fatal("Timeout limit reached") } filePath := path.Join(dirName, fileInfo.Name()) // Ignore symlinks for now. // TODO: add option to follow symlinks if (fileInfo.Mode() & os.ModeSymlink) != 0 { continue } if fileInfo.IsDir() { if shouldRecurseInto(fileInfo.Name()) { subDirName := path.Join(dirName, fileInfo.Name()) processDir(subDirName, s3KeyPrefix+fileInfo.Name()+"/", bucket) } continue } if ignoreNames[fileInfo.Name()] != "" { continue } s3Key := s3KeyPrefix + fileInfo.Name() putRequired := false var data []byte s3ETag := s3Objects[s3Key] if s3ETag == "" { if verbose { log.Printf("Not found in S3 bucket: %s", s3Key) } putRequired = true } data, err := ioutil.ReadFile(filePath) if err != nil { log.Fatal(err) } // if the object exists, then we check the MD5 of the file to determine whether // the file needs to be uploaded if !putRequired { digest := md5.Sum(data) // note the need to convert digest to a slice because it is a byte array ([16]byte) fileETag := "\"" + hex.EncodeToString(digest[:]) + "\"" if fileETag != s3ETag { if verbose { log.Printf("Need to upload %s: expected ETag = %s, actual = %s", filePath, fileETag, s3ETag) } putRequired = true } } if putRequired { // TODO: this should be configurable, but for now if the mime-type cannot // be determined, do not upload contentType := mime.TypeByExtension(path.Ext(strings.ToLower(fileInfo.Name()))) if contentType == "" && includeUnknownMimeTypes { contentType = "application/octet-stream" } if contentType != "" { err = bucket.Put(s3Key, data, contentType, s3.Private, s3.Options{}) if err != nil { log.Fatal(err) } log.Printf("Uploaded %s\n", s3Key) } } else { if verbose { log.Printf("Identical file, no upload required: %s", filePath) } } } }
// List get list from bucket. func List(bucket *s3.Bucket, prefix, delim, marker string, max int) (*s3.ListResp, error) { return bucket.List(prefix, delim, marker, max) }