func main() { flag.Parse() if *region == "" { *region = aws.InstanceRegion() } auth, err := aws.GetAuth("", "", "", time.Now()) if err != nil { log.Panic(err) } s3service := s3.New(auth, aws.GetRegion(*region)) bucket := s3service.Bucket(flag.Arg(0)) for { var entries []RoutingEntry data, err := bucket.Get("/routing-table.json") if err == nil { err = json.Unmarshal(data, &entries) if err == nil { updateProxies(entries) } } else { log.Print("no get routing table", err) } time.Sleep(time.Second * 10) } }
// NewS3Storage initializes the S3Storage with required AWS arguments func NewS3Storage(region aws.Region, auth aws.Auth, bucketName string, prefix string, bucketACL s3.ACL) (*S3Storage, error) { s3obj := s3.New(auth, region) bucket := s3obj.Bucket(bucketName) // Running PutBucket too many times in parallel (such as distributed cron) can generate the error: // "A conflicting conditional operation is currently in progress against this resource. Please try again" // We should only call PutBucket when we suspect that the bucket doesn't exist. Unfortunately, the // current AdRoll/goamz lib doesn't implement ListBuckets, so to check that the bucket exists // do a List and see if we get an error before calling PutBucket. _, err := bucket.List("", "/", "", 1) // technically, there are many reasons this could fail (such as access denied, or other network error) // but this should sufficiently limit the number of times PutBucket is called in normal operations if err != nil { err = bucket.PutBucket(bucketACL) if err != nil { return nil, err } } return &S3Storage{ s3: s3obj, bucket: bucket, region: region, auth: auth, prefix: prefix, }, nil }
// GetBucket builds a s3 connection retrieving the bucket func GetBucket(bucket string) *s3.Bucket { auth, err := awswrapper.GetAwsAuth() if err != nil { log.Fatalln(err) } b := s3.New(auth, aws.USEast).Bucket(bucket) loc, err := b.Location() if err != nil { log.Fatalln(err) } if aws.GetRegion(loc) != aws.USEast { b = s3.New(auth, aws.GetRegion(loc)).Bucket(bucket) } return b }
func (u *S3Uploader) Setup(destination string, debugHTTP bool) error { u.Destination = destination u.DebugHTTP = debugHTTP // Try to auth with S3 auth, err := awsS3Auth() if err != nil { return errors.New(fmt.Sprintf("Error creating AWS S3 authentication: %s", err.Error())) } // Try and get the region region, err := awsS3Region() if err != nil { return err } logger.Debug("Authorizing S3 credentials and finding bucket `%s` in region `%s`...", u.BucketName(), region.Name) // Find the bucket s3 := s3.New(auth, region) bucket := s3.Bucket(u.BucketName()) // If the list doesn't return an error, then we've got our bucket _, err = bucket.List("", "", "", 0) if err != nil { return errors.New("Could not find bucket `" + u.BucketName() + "` in region `" + region.Name + "` (" + err.Error() + ")") } u.Bucket = bucket return nil }
// Communicate with all endpoints to see if they are alive. func (s *ClientTests) TestRegions(c *check.C) { errs := make(chan error, len(aws.Regions)) for _, region := range aws.Regions { go func(r aws.Region) { s := s3.New(s.s3.Auth, r) b := s.Bucket("goamz-" + s.Auth.AccessKey) _, err := b.Get("non-existent") errs <- err }(region) } for _ = range aws.Regions { err := <-errs if err != nil { s3_err, ok := err.(*s3.Error) if ok { c.Check(s3_err.Code, check.Matches, "NoSuchBucket") } else if _, ok = err.(*net.DNSError); ok { // Okay as well. } else { c.Errorf("Non-S3 error: %s", err) } } else { c.Errorf("Test should have errored but it seems to have succeeded") } } }
func (s *LocalServerSuite) SetUpSuite(c *check.C) { s.srv.SetUp(c) s.clientTests.s3 = s3.New(s.srv.auth, s.srv.region) // TODO Sadly the fake server ignores auth completely right now. :-( s.clientTests.authIsBroken = true s.clientTests.Cleanup() }
// New constructs a new Driver with the given AWS credentials, region, encryption flag, and // bucketName func New(params DriverParameters) (*Driver, error) { auth, err := aws.GetAuth(params.AccessKey, params.SecretKey, "", time.Time{}) if err != nil { return nil, err } if !params.Secure { params.Region.S3Endpoint = strings.Replace(params.Region.S3Endpoint, "https", "http", 1) } s3obj := s3.New(auth, params.Region) bucket := s3obj.Bucket(params.Bucket) if params.V4Auth { s3obj.Signature = aws.V4Signature } else { if params.Region.Name == "eu-central-1" { return nil, fmt.Errorf("The eu-central-1 region only works with v4 authentication") } } // Validate that the given credentials have at least read permissions in the // given bucket scope. if _, err := bucket.List(strings.TrimRight(params.RootDirectory, "/"), "", "", 1); err != nil { return nil, err } // TODO Currently multipart uploads have no timestamps, so this would be unwise // if you initiated a new s3driver while another one is running on the same bucket. // multis, _, err := bucket.ListMulti("", "") // if err != nil { // return nil, err // } // for _, multi := range multis { // err := multi.Abort() // //TODO appropriate to do this error checking? // if err != nil { // return nil, err // } // } d := &driver{ S3: s3obj, Bucket: bucket, ChunkSize: params.ChunkSize, Encrypt: params.Encrypt, RootDirectory: params.RootDirectory, } return &Driver{ baseEmbed: baseEmbed{ Base: base.Base{ StorageDriver: d, }, }, }, nil }
// New constructs a new Driver with the given AWS credentials, region, encryption flag, and // bucketName func New(params DriverParameters) (*Driver, error) { auth, err := aws.GetAuth(params.AccessKey, params.SecretKey, "", time.Time{}) if err != nil { return nil, fmt.Errorf("unable to resolve aws credentials, please ensure that 'accesskey' and 'secretkey' are properly set or the credentials are available in $HOME/.aws/credentials: %v", err) } if !params.Secure { params.Region.S3Endpoint = strings.Replace(params.Region.S3Endpoint, "https", "http", 1) } s3obj := s3.New(auth, params.Region) bucket := s3obj.Bucket(params.Bucket) if params.V4Auth { s3obj.Signature = aws.V4Signature } else { if params.Region.Name == "eu-central-1" { return nil, fmt.Errorf("The eu-central-1 region only works with v4 authentication") } } // TODO Currently multipart uploads have no timestamps, so this would be unwise // if you initiated a new s3driver while another one is running on the same bucket. // multis, _, err := bucket.ListMulti("", "") // if err != nil { // return nil, err // } // for _, multi := range multis { // err := multi.Abort() // //TODO appropriate to do this error checking? // if err != nil { // return nil, err // } // } d := &driver{ S3: s3obj, Bucket: bucket, ChunkSize: params.ChunkSize, Encrypt: params.Encrypt, RootDirectory: params.RootDirectory, zeros: make([]byte, params.ChunkSize), } d.pool.New = func() interface{} { return make([]byte, d.ChunkSize) } return &Driver{ baseEmbed: baseEmbed{ Base: base.Base{ StorageDriver: d, }, }, }, nil }
func InitBucket(name string) error { auth, err := aws.EnvAuth() if err != nil { return err } s3 := s3.New(auth, aws.GetRegion("ap-southeast-1")) Bucket = s3.Bucket(name) return nil }
func initAwsBucket() { expiration := time.Now().Add(time.Hour * 1) auth, err := aws.GetAuth(config.AccessKey, config.SecretKey, "", expiration) //"" = token which isn't needed if err != nil { panic(err) } aws_bucket = s3.New(auth, aws.GetRegion(config.Region)).Bucket(config.Bucket) }
func (s *AmazonClientSuite) SetUpSuite(c *check.C) { if !testutil.Amazon { c.Skip("live tests against AWS disabled (no -amazon)") } s.srv.SetUp(c) s.s3 = s3.New(s.srv.auth, s.Region) // In case tests were interrupted in the middle before. s.ClientTests.Cleanup() }
func (s *AmazonDomainClientSuite) SetUpSuite(c *check.C) { if !testutil.Amazon { c.Skip("live tests against AWS disabled (no -amazon)") } s.srv.SetUp(c) region := s.Region region.S3BucketEndpoint = "https://${bucket}.s3.amazonaws.com" s.s3 = s3.New(s.srv.auth, region) s.ClientTests.Cleanup() }
// NewS3Volume returns a new S3Volume using the given auth, region, // and bucket name. The replication argument specifies the replication // level to report when writing data. func NewS3Volume(auth aws.Auth, region aws.Region, bucket string, readonly bool, replication int) *S3Volume { return &S3Volume{ Bucket: &s3.Bucket{ S3: s3.New(auth, region), Name: bucket, }, readonly: readonly, replication: replication, indexPageSize: 1000, } }
func initAwsBucket() { fmt.Println("Initializing aws buccket bear!", config.Port) expiration := time.Now().Add(time.Hour * 1) auth, err := aws.GetAuth(config.AccessKey, config.SecretKey, "", expiration) //"" = token which isn't needed if err != nil { panic(err) } aws_bucket = s3.New(auth, aws.GetRegion(config.Region)).Bucket(config.Bucket) }
func (d S3Downloader) Start() error { // Try to auth with S3 auth, err := awsS3Auth() if err != nil { return errors.New(fmt.Sprintf("Error creating AWS S3 authentication: %s", err.Error())) } // Try and get the region region, err := awsS3Region() if err != nil { return err } // Split apart the bucket bucketParts := strings.Split(strings.TrimLeft(d.Bucket, "s3://"), "/") bucketName := bucketParts[0] bucketPath := strings.Join(bucketParts[1:len(bucketParts)], "/") logger.Debug("Authorizing S3 credentials and finding bucket `%s` in region `%s`...", bucketName, region.Name) // Find the bucket s3 := s3.New(auth, region) bucket := s3.Bucket(bucketName) // If the list doesn't return an error, then we've got our bucket _, err = bucket.List("", "", "", 0) if err != nil { return errors.New("Could not find bucket `" + bucketName + "` in region `" + region.Name + "` (" + err.Error() + ")") } // Create the location of the file var s3Location string if bucketPath != "" { s3Location = strings.TrimRight(bucketPath, "/") + "/" + strings.TrimLeft(d.Path, "/") } else { s3Location = d.Path } // Generate a Signed URL signedURL := bucket.SignedURL(s3Location, time.Now().Add(time.Hour)) // We can now cheat and pass the URL onto our regular downloader return Download{ URL: signedURL, Path: d.Path, Destination: d.Destination, Retries: d.Retries, DebugHTTP: d.DebugHTTP, }.Start() }
func writeToBucket(f string, a string) { p, s, setErr := getSettings() if setErr != nil { log.Println("Error:", setErr) return } auth := aws.Auth{AccessKey: p, SecretKey: s} S3 := s3.New(auth, aws.APNortheast) bucket := S3.Bucket("math-results") err := bucket.Put(f, []byte(a), "text/plain", s3.PublicRead, s3.Options{}) if err != nil { log.Println("ERROR:", err) } }
// NewS3Storage initializes the S3Storage with required AWS arguments func NewS3Storage(region aws.Region, auth aws.Auth, bucketName string, prefix string, bucketACL s3.ACL) (*S3Storage, error) { s3obj := s3.New(auth, region) bucket := s3obj.Bucket(bucketName) err := bucket.PutBucket(bucketACL) if err != nil { return nil, err } return &S3Storage{ s3: s3obj, bucket: bucket, region: region, auth: auth, prefix: prefix, }, nil }
func downloadFromBucket(b string, f string) ([]byte, error) { p, s, setErr := getSettings() if setErr != nil { return nil, setErr } auth := aws.Auth{AccessKey: p, SecretKey: s} S3 := s3.New(auth, aws.USEast) bucket := S3.Bucket(b) log.Println("Starting Get...") data, err := bucket.Get(f) if err != nil { return nil, err } log.Println("Completed Get!", len(data)) return data, nil }
func (input *S3OffsetInput) Init(config interface{}) (err error) { conf := config.(*S3OffsetInputConfig) input.S3OffsetInputConfig = conf if conf.MetaFile != "" { // We already have the required metadata. Don't need to fetch it. input.metaFileName = conf.MetaFile } else if conf.ClientIdListFile != "" { // Load clientids from file. input.clientids, err = readLines(conf.ClientIdListFile) if err != nil { return fmt.Errorf("Error reading file %s for 'client_id_list': %s", conf.ClientIdListFile, err) } } else { return fmt.Errorf("Missing parameter: You must specify either 'client_id_list' or 'metadata_file'") } auth, err := aws.GetAuth(conf.AWSKey, conf.AWSSecretKey, "", time.Now()) if err != nil { return fmt.Errorf("Authentication error: %s\n", err) } region, ok := aws.Regions[conf.AWSRegion] if !ok { return fmt.Errorf("Parameter 'aws_region' must be a valid AWS Region") } s := s3.New(auth, region) s.ConnectTimeout = time.Duration(conf.S3ConnectTimeout) * time.Second s.ReadTimeout = time.Duration(conf.S3ReadTimeout) * time.Second // TODO: ensure we can read from (and list, for meta) the buckets. input.bucket = s.Bucket(conf.S3Bucket) if conf.S3MetaBucket != "" { input.metaBucket = s.Bucket(conf.S3MetaBucket) } else if conf.MetaFile == "" { return fmt.Errorf("Parameter 's3_meta_bucket' is required unless using 'metadata_file'") } // Remove any excess path separators from the bucket prefix. conf.S3MetaBucketPrefix = CleanBucketPrefix(conf.S3MetaBucketPrefix) input.stop = make(chan bool) input.offsetChan = make(chan MessageLocation, 1000) return nil }
func getS3(u *url.URL) (io.ReadCloser, error) { var err error if AWSAuth.AccessKey == "" || AWSRegion.Name == "" { AWSAuth, AWSRegion, err = LoadAWSCredentials("") if err != nil { return nil, err } log.Println("region:", AWSRegion.Name) log.Println("aws_access_key_id:", AWSAuth.AccessKey) } client := s3.New(AWSAuth, AWSRegion) bucket := client.Bucket(u.Host) rc, err := bucket.GetReader(u.Path) if err != nil { return nil, err } return rc, nil }
func (input *S3SplitFileInput) Init(config interface{}) (err error) { conf := config.(*S3SplitFileInputConfig) input.S3SplitFileInputConfig = conf input.schema, err = LoadSchema(conf.SchemaFile) if err != nil { return fmt.Errorf("Parameter 'schema_file' must be a valid JSON file: %s", err) } if conf.S3Bucket != "" { auth, err := aws.GetAuth(conf.AWSKey, conf.AWSSecretKey, "", time.Now()) if err != nil { return fmt.Errorf("Authentication error: %s\n", err) } region, ok := aws.Regions[conf.AWSRegion] if !ok { return fmt.Errorf("Parameter 'aws_region' must be a valid AWS Region") } s := s3.New(auth, region) s.ConnectTimeout = time.Duration(conf.S3ConnectTimeout) * time.Second s.ReadTimeout = time.Duration(conf.S3ReadTimeout) * time.Second // TODO: ensure we can read from the bucket. input.bucket = s.Bucket(conf.S3Bucket) } else { input.bucket = nil } if conf.S3ObjectMatchRegex != "" { if input.objectMatch, err = regexp.Compile(conf.S3ObjectMatchRegex); err != nil { err = fmt.Errorf("S3SplitFileInput: %s", err) return } } else { input.objectMatch = nil } // Remove any excess path separators from the bucket prefix. conf.S3BucketPrefix = CleanBucketPrefix(conf.S3BucketPrefix) input.stop = make(chan bool) input.listChan = make(chan string, 1000) return nil }
func (u *S3Uploader) Setup(destination string) error { u.Destination = destination // Try to auth with S3 auth, err := awsS3Auth() if err != nil { return errors.New(fmt.Sprintf("Error creating AWS S3 authentication: %s", err.Error())) } regionName := "us-east-1" if os.Getenv("BUILDKITE_S3_DEFAULT_REGION") != "" { regionName = os.Getenv("BUILDKITE_S3_DEFAULT_REGION") } else if os.Getenv("AWS_DEFAULT_REGION") != "" { regionName = os.Getenv("AWS_DEFAULT_REGION") } // Check to make sure the region exists. There is a GetRegion API, but // there doesn't seem to be a way to make it error out if the region // doesn't exist. region, ok := aws.Regions[regionName] if ok == false { return errors.New("Unknown AWS S3 Region `" + regionName + "`") } logger.Debug("Authorizing S3 credentials and finding bucket `%s` in region `%s`...", u.bucketName(), regionName) // Find the bucket s3 := s3.New(auth, region) bucket := s3.Bucket(u.bucketName()) // If the list doesn't return an error, then we've got our bucket _, err = bucket.List("", "", "", 0) if err != nil { return errors.New("Could not find bucket `" + u.bucketName() + "` in region `" + region.Name + "` (" + err.Error() + ")") } u.Bucket = bucket return nil }
func (d S3Downloader) Start() error { // Try to auth with S3 auth, err := awsS3Auth() if err != nil { return errors.New(fmt.Sprintf("Error creating AWS S3 authentication: %s", err.Error())) } // Try and get the region region, err := awsS3Region() if err != nil { return err } logger.Debug("Authorizing S3 credentials and finding bucket `%s` in region `%s`...", d.BucketName(), region.Name) // Find the bucket s3 := s3.New(auth, region) bucket := s3.Bucket(d.BucketName()) // If the list doesn't return an error, then we've got our bucket _, err = bucket.List("", "", "", 0) if err != nil { return errors.New("Could not find bucket `" + d.BucketName() + "` in region `" + region.Name + "` (" + err.Error() + ")") } // Generate a Signed URL signedURL := bucket.SignedURL(d.BucketFileLocation(), time.Now().Add(time.Hour)) // We can now cheat and pass the URL onto our regular downloader return Download{ Client: *http.DefaultClient, URL: signedURL, Path: d.Path, Destination: d.Destination, Retries: d.Retries, DebugHTTP: d.DebugHTTP, }.Start() }
func NewTestableS3Volume(c *check.C, readonly bool, replication int) *TestableS3Volume { clock := &fakeClock{} srv, err := s3test.NewServer(&s3test.Config{Clock: clock}) c.Assert(err, check.IsNil) auth := aws.Auth{} region := aws.Region{ Name: "test-region-1", S3Endpoint: srv.URL(), S3LocationConstraint: true, } bucket := &s3.Bucket{ S3: s3.New(auth, region), Name: TestBucketName, } err = bucket.PutBucket(s3.ACL("private")) c.Assert(err, check.IsNil) return &TestableS3Volume{ S3Volume: NewS3Volume(auth, region, TestBucketName, readonly, replication), server: srv, serverClock: clock, } }
func main() { flagSchema := flag.String("schema", "", "Filename of the schema to use as a filter") flagBucket := flag.String("bucket", "default-bucket", "S3 Bucket name") flagBucketPrefix := flag.String("bucket-prefix", "", "S3 Bucket path prefix") flagAWSKey := flag.String("aws-key", "", "AWS Key") flagAWSSecretKey := flag.String("aws-secret-key", "", "AWS Secret Key") flagAWSRegion := flag.String("aws-region", "us-west-2", "AWS Region") flagDryRun := flag.Bool("dry-run", false, "Don't actually do anything, just output what would be done") flagVerbose := flag.Bool("verbose", false, "Print detailed info") flag.Parse() if flag.NArg() != 0 { flag.PrintDefaults() os.Exit(1) } var err error var schema s3splitfile.Schema schema, err = s3splitfile.LoadSchema(*flagSchema) if err != nil { fmt.Printf("schema: %s\n", err) os.Exit(2) } if *flagDryRun { fmt.Printf("Dry Run: Would have listed files in s3://%s/%s according to filter schema %s\n", *flagBucket, *flagBucketPrefix, *flagSchema) os.Exit(0) } var b *s3.Bucket prefix := s3splitfile.CleanBucketPrefix(*flagBucketPrefix) // Initialize the S3 bucket auth, err := aws.GetAuth(*flagAWSKey, *flagAWSSecretKey, "", time.Now()) if err != nil { fmt.Printf("Authentication error: %s\n", err) os.Exit(4) } region, ok := aws.Regions[*flagAWSRegion] if !ok { fmt.Printf("Parameter 'aws-region' must be a valid AWS Region\n") os.Exit(5) } s := s3.New(auth, region) b = s.Bucket(*flagBucket) var errCount int var totalCount int var totalSize int64 startTime := time.Now().UTC() // List the keys as we see them for k := range s3splitfile.S3Iterator(b, prefix, schema) { if k.Err != nil { fmt.Printf("ERROR fetching key: %s\n", k.Err) errCount++ } else { totalCount++ totalSize += k.Key.Size fmt.Printf("%s\n", k.Key.Key) } } duration := time.Now().UTC().Sub(startTime).Seconds() if *flagVerbose { fmt.Printf("Filter matched %d files totaling %s in %.02fs (%d errors)\n", totalCount, s3splitfile.PrettySize(totalSize), duration, errCount) } }
func main() { flag.Parse() // Read argv if shouldOutputVersion { fmt.Printf("redshift-tracking-copy-from-s3 %s\n", VERSION) os.Exit(0) } // Read config file parseConfigfile() // ----------------------------- Startup goroutine for each Bucket/Prefix/Table & Repeat migration check per table ----------------------------- done := make(chan bool, len(cfg.Redshift.Tables)) for i, _ := range cfg.Redshift.Tables { quitSignal := make(chan os.Signal, 1) signal.Notify(quitSignal, os.Interrupt) go func(currentTable string, currentBucket string, currentPrefix string) { quitReceived := false go func() { <-quitSignal if cfg.Default.Debug { fmt.Printf("Quit signal received on %s watcher. Going down...\n", currentTable) } quitReceived = true }() db, err := sql.Open("postgres", fmt.Sprintf("host=%s port=%d user=%s password=%s dbname=%s sslmode=disable", cfg.Redshift.Host, cfg.Redshift.Port, cfg.Redshift.User, cfg.Redshift.Password, cfg.Redshift.Database)) if err != nil { reportError("Couldn't connect to redshift database: ", err) } rows, err := db.Query(fmt.Sprintf("select COLUMN_NAME, DATA_TYPE from INFORMATION_SCHEMA.COLUMNS where table_name = '%s' limit 1000", currentTable)) if err != nil { reportError("Couldn't execute statement for INFORMATION_SCHEMA.COLUMNS: ", err) } if cfg.Default.Debug { fmt.Println("Looking for table, columns will display below.") } anyRows := false for rows.Next() { var column_name string var data_type string err = rows.Scan(&column_name, &data_type) if err != nil { reportError("Couldn't scan row for table: ", err) } if cfg.Default.Debug { fmt.Printf(" %s, %s\n", column_name, data_type) } anyRows = true } if !anyRows { reportError("Table had no columns: ", err) } else { if cfg.Default.Debug { fmt.Println("Table found, will not migrate") } } // ----------------------------- Take a look at STL_FILE_SCAN on this Table to see if any files have already been imported ----------------------------- for !quitReceived { if cfg.Default.Debug { fmt.Printf("Re-polling with %s watcher.\n", currentTable) } loadedFiles := map[string]bool{} rows, err = db.Query(fmt.Sprintf("select * from STL_FILE_SCAN")) if err != nil { reportError("Couldn't execute STL_FILE_SCAN: ", err) } anyRows = false for rows.Next() { var ( userid int query int slice int name string lines int64 bytes int64 loadtime int64 curtime time.Time ) err = rows.Scan(&userid, &query, &slice, &name, &lines, &bytes, &loadtime, &curtime) if err != nil { reportError("Couldn't scan row for STL_FILE_SCAN: ", err) } if cfg.Default.Debug { fmt.Printf(" Already loaded: %d|%d|%d|%s|%d|%d|%d|%s\n", userid, query, slice, name, lines, bytes, loadtime, curtime) } loadedFiles[strings.TrimPrefix(strings.TrimSpace(name), fmt.Sprintf("s3://%s/", currentBucket))] = true anyRows = true } // ----------------------------- If not: run generic COPY for this Bucket/Prefix/Table ----------------------------- if !anyRows { copyStmt := defaultCopyStmt(¤tTable, ¤tBucket, ¤tPrefix) if cfg.Default.Debug { fmt.Printf("No records found in STL_FILE_SCAN, running `%s`\n", copyStmt) } _, err = db.Exec(copyStmt) if err != nil { reportError("Couldn't execute default copy statement: ", err) } } else { // ----------------------------- If yes: diff STL_FILE_SCAN with S3 bucket files list, COPY and missing files into this Table ----------------------------- if cfg.Default.Debug { fmt.Printf("Records found, have to do manual copies from now on.\n") } s3bucket := s3.New(*aws.NewAuth(cfg.Aws.Accesskey, cfg.Aws.Secretkey, "", time.Now()), aws.Regions[cfg.Aws.Region]).Bucket(currentBucket) // list all missing files and copy in the ones that are missing nonLoadedFiles := []string{} keyMarker := "" moreResults := true for moreResults { if cfg.Default.Debug { fmt.Printf("Checking s3 bucket %s.\n", currentBucket) } results, err := s3bucket.List(currentPrefix, "", keyMarker, 0) if err != nil { reportError("Couldn't list default s3 bucket: ", err) } if cfg.Default.Debug { fmt.Printf("s3bucket.List returned %#v.\n", results) } if len(results.Contents) == 0 { break } // empty request, assume we found every file for _, s3obj := range results.Contents { if cfg.Default.Debug { fmt.Printf("Checking whether or not %s was preloaded.\n", strings.TrimSpace(s3obj.Key)) } if !loadedFiles[strings.TrimSpace(s3obj.Key)] { nonLoadedFiles = append(nonLoadedFiles, s3obj.Key) } } keyMarker = results.Contents[len(results.Contents)-1].Key moreResults = results.IsTruncated } if cfg.Default.Debug { fmt.Printf("Haven't ever loaded %#v.\n", nonLoadedFiles) } for _, s3key := range nonLoadedFiles { copyStmt := defaultCopyStmt(¤tTable, ¤tBucket, &s3key) if cfg.Default.Debug { fmt.Printf(" Copying `%s`\n", copyStmt) } _, err = db.Exec(copyStmt) if err != nil { reportError("Couldn't execute default copy statement: ", err) } } } time.Sleep(time.Duration(cfg.Default.Pollsleepinseconds*1000) * time.Millisecond) } done <- true }(cfg.Redshift.Tables[i], cfg.S3.Buckets[i], cfg.S3.Prefixes[i]) } <-done // wait until the last iteration finishes before returning }
func main() { flagBase := flag.String("base-dir", "/", "Base directory in which to look for files to export") flagPattern := flag.String("pattern", ".*", "Filenames must match this regular expression to be uploaded") flagBucket := flag.String("bucket", "default-bucket", "S3 Bucket name") flagBucketPrefix := flag.String("bucket-prefix", "", "S3 Bucket path prefix") flagAWSKey := flag.String("aws-key", "", "AWS Key") flagAWSSecretKey := flag.String("aws-secret-key", "", "AWS Secret Key") flagAWSRegion := flag.String("aws-region", "us-west-2", "AWS Region") flagLoop := flag.Bool("loop", false, "Run in a loop and keep watching for more files to export") flagDryRun := flag.Bool("dry-run", false, "Don't actually do anything, just output what would be done") flag.Parse() if flag.NArg() != 0 { flag.PrintDefaults() os.Exit(1) } var err error baseStat, err := os.Stat(*flagBase) if err != nil || !baseStat.IsDir() { fmt.Printf("base-dir: %s\n", err) os.Exit(2) } pattern, err := regexp.Compile(*flagPattern) if err != nil { fmt.Printf("pattern: %s\n", err) os.Exit(3) } // fmt.Printf("Base:%s Pattern:%s Bucket: s3://%s/%s AWSKey:%s / %s Region:%s Dry Run:%t Loop:%t\n", // *flagBase, *flagPattern, *flagBucket, *flagBucketPrefix, *flagAWSKey, *flagAWSSecretKey, *flagAWSRegion, *flagDryRun, *flagLoop) var progress Progress var rate float64 var uploadMB float64 var b *s3.Bucket if !*flagDryRun { auth, err := aws.GetAuth(*flagAWSKey, *flagAWSSecretKey, "", time.Now()) if err != nil { fmt.Printf("Authentication error: %s\n", err) os.Exit(4) } region, ok := aws.Regions[*flagAWSRegion] if !ok { fmt.Printf("Parameter 'aws-region' must be a valid AWS Region\n") os.Exit(5) } s := s3.New(auth, region) b = s.Bucket(*flagBucket) } else { // b declared and not used :( _ = b } for true { progress = Progress{} startTime := time.Now().UTC() err = filepath.Walk(*flagBase, makeupload(*flagBase, pattern, b, *flagBucketPrefix, *flagDryRun, &progress)) if err != nil { fmt.Printf("Error reading files from %s: %s\n", *flagBase, err) } if progress.Count > 0 { uploadMB = float64(progress.Bytes) / 1024.0 / 1024.0 duration := time.Now().UTC().Sub(startTime).Seconds() if duration > 0 { rate = uploadMB / duration } else { rate = 0 } fmt.Printf("Uploaded %d files containing %.2fMB in %.02fs (%.02fMB/s). Encountered %d errors.\n", progress.Count, uploadMB, duration, rate, progress.Errors) } else { // We didn't upload any files. if !*flagLoop { fmt.Println("Nothing to upload") } else { // Only sleep if we didn't find anything to upload. If we did upload // something, we want to try again right away. fmt.Println("Waiting for files to upload...") time.Sleep(10 * time.Second) } } if !*flagLoop { break } } }
func (o *S3SplitFileOutput) Init(config interface{}) (err error) { conf := config.(*S3SplitFileOutputConfig) o.S3SplitFileOutputConfig = conf var intPerm int64 if intPerm, err = strconv.ParseInt(conf.FolderPerm, 8, 32); err != nil { err = fmt.Errorf("S3SplitFileOutput '%s' can't parse `folder_perm`, is it an octal integer string?", o.Path) return } o.folderPerm = os.FileMode(intPerm) if intPerm, err = strconv.ParseInt(conf.Perm, 8, 32); err != nil { err = fmt.Errorf("S3SplitFileOutput '%s' can't parse `perm`, is it an octal integer string?", o.Path) return } o.perm = os.FileMode(intPerm) if conf.MaxFileSize < 1 { err = fmt.Errorf("Parameter 'max_file_size' must be greater than 0.") return } if conf.MaxFileAge < 1 { err = fmt.Errorf("Parameter 'max_file_age' must be greater than 0.") return } if conf.MaxOpenFiles < 0 { err = fmt.Errorf("Parameter 'max_open_files' must not be negative.") return } o.fopenCache, err = lru.New(conf.MaxOpenFiles) if err != nil { // This should never happen since we already checked for negative size. return } // Close files as they are evicted / removed from the cache. o.fopenCache.OnEvicted = func(key interface{}, val interface{}) { // If it's not a file, we don't care about it. switch t := val.(type) { case *os.File: t.Close() } } o.dimFiles = map[string]*SplitFileInfo{} // TODO: fall back to default schema. //fmt.Printf("schema_file = '%s'\n", conf.SchemaFile) if conf.SchemaFile == "" { err = fmt.Errorf("Parameter 'schema_file' is missing") return } o.schema, err = LoadSchema(conf.SchemaFile) if err != nil { return fmt.Errorf("Parameter 'schema_file' must be a valid JSON file: %s", err) } if conf.S3Bucket != "" { auth, err := aws.GetAuth(conf.AWSKey, conf.AWSSecretKey, "", time.Now()) if err != nil { return fmt.Errorf("Authentication error: %s\n", err) } region, ok := aws.Regions[conf.AWSRegion] if !ok { return fmt.Errorf("Parameter 'aws_region' must be a valid AWS Region") } s := s3.New(auth, region) s.ConnectTimeout = time.Duration(conf.S3ConnectTimeout) * time.Second s.ReadTimeout = time.Duration(conf.S3ReadTimeout) * time.Second // TODO: ensure we can write to the bucket. o.bucket = s.Bucket(conf.S3Bucket) } else { o.bucket = nil } // Remove any excess path separators from the bucket prefix. conf.S3BucketPrefix = fmt.Sprintf("/%s", strings.Trim(conf.S3BucketPrefix, "/")) o.publishChan = make(chan PublishAttempt, 1000) o.shuttingDown = false return }
func main() { flagStdin := flag.Bool("stdin", false, "read list of s3 key names from stdin") flagBucket := flag.String("bucket", "default-bucket", "S3 Bucket name") flagAWSKey := flag.String("aws-key", "", "AWS Key") flagAWSSecretKey := flag.String("aws-secret-key", "", "AWS Secret Key") flagAWSRegion := flag.String("aws-region", "us-west-2", "AWS Region") flagConnectTimeout := flag.Uint64("connect_timeout", 60, "Max seconds to wait for an S3 connection") flagReadTimeout := flag.Uint64("read_timeout", 300, "Max seconds to wait for an S3 file read to complete") flag.Parse() if !*flagStdin && flag.NArg() < 1 { flag.PrintDefaults() os.Exit(1) } var connectTimeout uint32 if *flagConnectTimeout < math.MaxUint32 { connectTimeout = uint32(*flagConnectTimeout) } else { fmt.Fprintf(os.Stderr, "Connection Timeout is too large:%d.\n", flagConnectTimeout) os.Exit(8) } var readTimeout uint32 if *flagReadTimeout < math.MaxUint32 { readTimeout = uint32(*flagReadTimeout) } else { fmt.Fprintf(os.Stderr, "Read Timeout is too large:%d.\n", flagReadTimeout) os.Exit(8) } auth, err := aws.GetAuth(*flagAWSKey, *flagAWSSecretKey, "", time.Now()) if err != nil { fmt.Fprintf(os.Stderr, "Authentication error: %s\n", err) os.Exit(4) } region, ok := aws.Regions[*flagAWSRegion] if !ok { fmt.Fprintf(os.Stderr, "Parameter 'aws-region' must be a valid AWS Region\n") os.Exit(5) } s := s3.New(auth, region) if connectTimeout > 0 { s.ConnectTimeout = time.Duration(connectTimeout) * time.Second } if readTimeout > 0 { s.ReadTimeout = time.Duration(readTimeout) * time.Second } bucket := s.Bucket(*flagBucket) startTime := time.Now().UTC() totalFiles := 0 if *flagStdin { scanner := bufio.NewScanner(os.Stdin) for scanner.Scan() { filename := scanner.Text() totalFiles++ cat(bucket, filename) } } else { for _, filename := range flag.Args() { totalFiles++ cat(bucket, filename) } } duration := time.Now().UTC().Sub(startTime).Seconds() mb := float64(bytesRead) / 1024.0 / 1024.0 if duration == 0.0 { duration = 1.0 } fmt.Fprintf(os.Stderr, "All done processing %d files, %.2fMB in %.2f seconds (%.2fMB/s)\n", totalFiles, mb, duration, (mb / duration)) }
func main() { flagMatch := flag.String("match", "TRUE", "message_matcher filter expression") flagFormat := flag.String("format", "txt", "output format [txt|json|heka|count]") flagOutput := flag.String("output", "", "output filename, defaults to stdout") flagStdin := flag.Bool("stdin", false, "read list of s3 key names from stdin") flagBucket := flag.String("bucket", "default-bucket", "S3 Bucket name") flagAWSKey := flag.String("aws-key", "", "AWS Key") flagAWSSecretKey := flag.String("aws-secret-key", "", "AWS Secret Key") flagAWSRegion := flag.String("aws-region", "us-west-2", "AWS Region") flagMaxMessageSize := flag.Uint64("max-message-size", 4*1024*1024, "maximum message size in bytes") flagWorkers := flag.Uint64("workers", 16, "number of parallel workers") flagConnectTimeout := flag.Uint64("connect_timeout", 60, "Max seconds to wait for an S3 connection") flagReadTimeout := flag.Uint64("read_timeout", 300, "Max seconds to wait for an S3 file read to complete") flag.Parse() if !*flagStdin && flag.NArg() < 1 { flag.PrintDefaults() os.Exit(1) } if *flagMaxMessageSize < math.MaxUint32 { maxSize := uint32(*flagMaxMessageSize) message.SetMaxMessageSize(maxSize) } else { fmt.Fprintf(os.Stderr, "Message size is too large: %d\n", flagMaxMessageSize) os.Exit(8) } workers := 1 if *flagWorkers == 0 { fmt.Fprintf(os.Stderr, "Cannot run with zero workers. Using 1.\n") } else if *flagWorkers < 2000 { workers = int(*flagWorkers) } else { fmt.Fprintf(os.Stderr, "Too many workers: %d. Use a reasonable value (up to a few hundred).\n", flagWorkers) os.Exit(8) } var connectTimeout uint32 if *flagConnectTimeout < math.MaxUint32 { connectTimeout = uint32(*flagConnectTimeout) } else { fmt.Fprintf(os.Stderr, "Connection Timeout is too large:%d.\n", flagConnectTimeout) os.Exit(8) } var readTimeout uint32 if *flagReadTimeout < math.MaxUint32 { readTimeout = uint32(*flagReadTimeout) } else { fmt.Fprintf(os.Stderr, "Read Timeout is too large:%d.\n", flagReadTimeout) os.Exit(8) } var err error var match *message.MatcherSpecification if match, err = message.CreateMatcherSpecification(*flagMatch); err != nil { fmt.Fprintf(os.Stderr, "Match specification - %s\n", err) os.Exit(2) } var out *os.File if "" == *flagOutput { out = os.Stdout } else { if out, err = os.OpenFile(*flagOutput, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644); err != nil { fmt.Fprintf(os.Stderr, "%s\n", err) os.Exit(3) } defer out.Close() } auth, err := aws.GetAuth(*flagAWSKey, *flagAWSSecretKey, "", time.Now()) if err != nil { fmt.Fprintf(os.Stderr, "Authentication error: %s\n", err) os.Exit(4) } region, ok := aws.Regions[*flagAWSRegion] if !ok { fmt.Fprintf(os.Stderr, "Parameter 'aws-region' must be a valid AWS Region\n") os.Exit(5) } s := s3.New(auth, region) if connectTimeout > 0 { s.ConnectTimeout = time.Duration(connectTimeout) * time.Second } if readTimeout > 0 { s.ReadTimeout = time.Duration(readTimeout) * time.Second } bucket := s.Bucket(*flagBucket) filenameChannel := make(chan string, 1000) recordChannel := make(chan s3splitfile.S3Record, 1000) doneChannel := make(chan string, 1000) allDone := make(chan int) for i := 1; i <= workers; i++ { go cat(bucket, filenameChannel, recordChannel, doneChannel) } go save(recordChannel, match, *flagFormat, out, allDone) startTime := time.Now().UTC() totalFiles := 0 pendingFiles := 0 if *flagStdin { scanner := bufio.NewScanner(os.Stdin) for scanner.Scan() { filename := scanner.Text() totalFiles++ pendingFiles++ filenameChannel <- filename if pendingFiles >= 1000 { waitFor(doneChannel, 1) pendingFiles-- } } close(filenameChannel) } else { for _, filename := range flag.Args() { totalFiles++ pendingFiles++ filenameChannel <- filename if pendingFiles >= 1000 { waitFor(doneChannel, 1) pendingFiles-- } } close(filenameChannel) } fmt.Fprintf(os.Stderr, "Waiting for last %d files\n", pendingFiles) waitFor(doneChannel, pendingFiles) close(recordChannel) bytesRead := <-allDone // All done! Win! duration := time.Now().UTC().Sub(startTime).Seconds() mb := float64(bytesRead) / 1024.0 / 1024.0 if duration == 0.0 { duration = 1.0 } fmt.Fprintf(os.Stderr, "All done processing %d files, %.2fMB in %.2f seconds (%.2fMB/s)\n", totalFiles, mb, duration, (mb / duration)) }