Пример #1
0
func main() {
	flag.Parse()

	if *region == "" {
		*region = aws.InstanceRegion()
	}

	auth, err := aws.GetAuth("", "", "", time.Now())
	if err != nil {
		log.Panic(err)
	}

	s3service := s3.New(auth, aws.GetRegion(*region))
	bucket := s3service.Bucket(flag.Arg(0))

	for {
		var entries []RoutingEntry

		data, err := bucket.Get("/routing-table.json")
		if err == nil {
			err = json.Unmarshal(data, &entries)
			if err == nil {
				updateProxies(entries)
			}
		} else {
			log.Print("no get routing table", err)
		}

		time.Sleep(time.Second * 10)
	}
}
Пример #2
0
// NewS3Storage initializes the S3Storage with required AWS arguments
func NewS3Storage(region aws.Region, auth aws.Auth, bucketName string, prefix string, bucketACL s3.ACL) (*S3Storage, error) {
	s3obj := s3.New(auth, region)
	bucket := s3obj.Bucket(bucketName)

	// Running PutBucket too many times in parallel (such as distributed cron) can generate the error:
	// "A conflicting conditional operation is currently in progress against this resource. Please try again"
	// We should only call PutBucket when we suspect that the bucket doesn't exist. Unfortunately, the
	// current AdRoll/goamz lib doesn't implement ListBuckets, so to check that the bucket exists
	// do a List and see if we get an error before calling PutBucket.
	_, err := bucket.List("", "/", "", 1)
	// technically, there are many reasons this could fail (such as access denied, or other network error)
	// but this should sufficiently limit the number of times PutBucket is called in normal operations
	if err != nil {
		err = bucket.PutBucket(bucketACL)
		if err != nil {
			return nil, err
		}
	}
	return &S3Storage{
		s3:     s3obj,
		bucket: bucket,
		region: region,
		auth:   auth,
		prefix: prefix,
	}, nil
}
Пример #3
0
// GetBucket builds a s3 connection retrieving the bucket
func GetBucket(bucket string) *s3.Bucket {
	auth, err := awswrapper.GetAwsAuth()
	if err != nil {
		log.Fatalln(err)
	}
	b := s3.New(auth, aws.USEast).Bucket(bucket)
	loc, err := b.Location()
	if err != nil {
		log.Fatalln(err)

	}
	if aws.GetRegion(loc) != aws.USEast {
		b = s3.New(auth, aws.GetRegion(loc)).Bucket(bucket)
	}
	return b
}
Пример #4
0
func (u *S3Uploader) Setup(destination string, debugHTTP bool) error {
	u.Destination = destination
	u.DebugHTTP = debugHTTP

	// Try to auth with S3
	auth, err := awsS3Auth()
	if err != nil {
		return errors.New(fmt.Sprintf("Error creating AWS S3 authentication: %s", err.Error()))
	}

	// Try and get the region
	region, err := awsS3Region()
	if err != nil {
		return err
	}

	logger.Debug("Authorizing S3 credentials and finding bucket `%s` in region `%s`...", u.BucketName(), region.Name)

	// Find the bucket
	s3 := s3.New(auth, region)
	bucket := s3.Bucket(u.BucketName())

	// If the list doesn't return an error, then we've got our bucket
	_, err = bucket.List("", "", "", 0)
	if err != nil {
		return errors.New("Could not find bucket `" + u.BucketName() + "` in region `" + region.Name + "` (" + err.Error() + ")")
	}

	u.Bucket = bucket

	return nil
}
Пример #5
0
// Communicate with all endpoints to see if they are alive.
func (s *ClientTests) TestRegions(c *check.C) {
	errs := make(chan error, len(aws.Regions))
	for _, region := range aws.Regions {
		go func(r aws.Region) {
			s := s3.New(s.s3.Auth, r)
			b := s.Bucket("goamz-" + s.Auth.AccessKey)
			_, err := b.Get("non-existent")
			errs <- err
		}(region)
	}
	for _ = range aws.Regions {
		err := <-errs
		if err != nil {
			s3_err, ok := err.(*s3.Error)
			if ok {
				c.Check(s3_err.Code, check.Matches, "NoSuchBucket")
			} else if _, ok = err.(*net.DNSError); ok {
				// Okay as well.
			} else {
				c.Errorf("Non-S3 error: %s", err)
			}
		} else {
			c.Errorf("Test should have errored but it seems to have succeeded")
		}
	}
}
Пример #6
0
func (s *LocalServerSuite) SetUpSuite(c *check.C) {
	s.srv.SetUp(c)
	s.clientTests.s3 = s3.New(s.srv.auth, s.srv.region)

	// TODO Sadly the fake server ignores auth completely right now. :-(
	s.clientTests.authIsBroken = true
	s.clientTests.Cleanup()
}
Пример #7
0
// New constructs a new Driver with the given AWS credentials, region, encryption flag, and
// bucketName
func New(params DriverParameters) (*Driver, error) {
	auth, err := aws.GetAuth(params.AccessKey, params.SecretKey, "", time.Time{})
	if err != nil {
		return nil, err
	}

	if !params.Secure {
		params.Region.S3Endpoint = strings.Replace(params.Region.S3Endpoint, "https", "http", 1)
	}

	s3obj := s3.New(auth, params.Region)
	bucket := s3obj.Bucket(params.Bucket)

	if params.V4Auth {
		s3obj.Signature = aws.V4Signature
	} else {
		if params.Region.Name == "eu-central-1" {
			return nil, fmt.Errorf("The eu-central-1 region only works with v4 authentication")
		}
	}

	// Validate that the given credentials have at least read permissions in the
	// given bucket scope.
	if _, err := bucket.List(strings.TrimRight(params.RootDirectory, "/"), "", "", 1); err != nil {
		return nil, err
	}

	// TODO Currently multipart uploads have no timestamps, so this would be unwise
	// if you initiated a new s3driver while another one is running on the same bucket.
	// multis, _, err := bucket.ListMulti("", "")
	// if err != nil {
	// 	return nil, err
	// }

	// for _, multi := range multis {
	// 	err := multi.Abort()
	// 	//TODO appropriate to do this error checking?
	// 	if err != nil {
	// 		return nil, err
	// 	}
	// }

	d := &driver{
		S3:            s3obj,
		Bucket:        bucket,
		ChunkSize:     params.ChunkSize,
		Encrypt:       params.Encrypt,
		RootDirectory: params.RootDirectory,
	}

	return &Driver{
		baseEmbed: baseEmbed{
			Base: base.Base{
				StorageDriver: d,
			},
		},
	}, nil
}
Пример #8
0
// New constructs a new Driver with the given AWS credentials, region, encryption flag, and
// bucketName
func New(params DriverParameters) (*Driver, error) {
	auth, err := aws.GetAuth(params.AccessKey, params.SecretKey, "", time.Time{})
	if err != nil {
		return nil, fmt.Errorf("unable to resolve aws credentials, please ensure that 'accesskey' and 'secretkey' are properly set or the credentials are available in $HOME/.aws/credentials: %v", err)
	}

	if !params.Secure {
		params.Region.S3Endpoint = strings.Replace(params.Region.S3Endpoint, "https", "http", 1)
	}

	s3obj := s3.New(auth, params.Region)
	bucket := s3obj.Bucket(params.Bucket)

	if params.V4Auth {
		s3obj.Signature = aws.V4Signature
	} else {
		if params.Region.Name == "eu-central-1" {
			return nil, fmt.Errorf("The eu-central-1 region only works with v4 authentication")
		}
	}

	// TODO Currently multipart uploads have no timestamps, so this would be unwise
	// if you initiated a new s3driver while another one is running on the same bucket.
	// multis, _, err := bucket.ListMulti("", "")
	// if err != nil {
	// 	return nil, err
	// }

	// for _, multi := range multis {
	// 	err := multi.Abort()
	// 	//TODO appropriate to do this error checking?
	// 	if err != nil {
	// 		return nil, err
	// 	}
	// }

	d := &driver{
		S3:            s3obj,
		Bucket:        bucket,
		ChunkSize:     params.ChunkSize,
		Encrypt:       params.Encrypt,
		RootDirectory: params.RootDirectory,
		zeros:         make([]byte, params.ChunkSize),
	}

	d.pool.New = func() interface{} {
		return make([]byte, d.ChunkSize)
	}

	return &Driver{
		baseEmbed: baseEmbed{
			Base: base.Base{
				StorageDriver: d,
			},
		},
	}, nil
}
Пример #9
0
func InitBucket(name string) error {
	auth, err := aws.EnvAuth()
	if err != nil {
		return err
	}
	s3 := s3.New(auth, aws.GetRegion("ap-southeast-1"))
	Bucket = s3.Bucket(name)
	return nil
}
Пример #10
0
func initAwsBucket() {
	expiration := time.Now().Add(time.Hour * 1)
	auth, err := aws.GetAuth(config.AccessKey, config.SecretKey, "", expiration) //"" = token which isn't needed
	if err != nil {
		panic(err)
	}

	aws_bucket = s3.New(auth, aws.GetRegion(config.Region)).Bucket(config.Bucket)
}
Пример #11
0
func (s *AmazonClientSuite) SetUpSuite(c *check.C) {
	if !testutil.Amazon {
		c.Skip("live tests against AWS disabled (no -amazon)")
	}
	s.srv.SetUp(c)
	s.s3 = s3.New(s.srv.auth, s.Region)
	// In case tests were interrupted in the middle before.
	s.ClientTests.Cleanup()
}
Пример #12
0
func (s *AmazonDomainClientSuite) SetUpSuite(c *check.C) {
	if !testutil.Amazon {
		c.Skip("live tests against AWS disabled (no -amazon)")
	}
	s.srv.SetUp(c)
	region := s.Region
	region.S3BucketEndpoint = "https://${bucket}.s3.amazonaws.com"
	s.s3 = s3.New(s.srv.auth, region)
	s.ClientTests.Cleanup()
}
Пример #13
0
// NewS3Volume returns a new S3Volume using the given auth, region,
// and bucket name. The replication argument specifies the replication
// level to report when writing data.
func NewS3Volume(auth aws.Auth, region aws.Region, bucket string, readonly bool, replication int) *S3Volume {
	return &S3Volume{
		Bucket: &s3.Bucket{
			S3:   s3.New(auth, region),
			Name: bucket,
		},
		readonly:      readonly,
		replication:   replication,
		indexPageSize: 1000,
	}
}
Пример #14
0
func initAwsBucket() {

	fmt.Println("Initializing aws buccket bear!", config.Port)
	expiration := time.Now().Add(time.Hour * 1)
	auth, err := aws.GetAuth(config.AccessKey, config.SecretKey, "", expiration) //"" = token which isn't needed
	if err != nil {
		panic(err)
	}

	aws_bucket = s3.New(auth, aws.GetRegion(config.Region)).Bucket(config.Bucket)
}
Пример #15
0
func (d S3Downloader) Start() error {
	// Try to auth with S3
	auth, err := awsS3Auth()
	if err != nil {
		return errors.New(fmt.Sprintf("Error creating AWS S3 authentication: %s", err.Error()))
	}

	// Try and get the region
	region, err := awsS3Region()
	if err != nil {
		return err
	}

	// Split apart the bucket
	bucketParts := strings.Split(strings.TrimLeft(d.Bucket, "s3://"), "/")
	bucketName := bucketParts[0]
	bucketPath := strings.Join(bucketParts[1:len(bucketParts)], "/")

	logger.Debug("Authorizing S3 credentials and finding bucket `%s` in region `%s`...", bucketName, region.Name)

	// Find the bucket
	s3 := s3.New(auth, region)
	bucket := s3.Bucket(bucketName)

	// If the list doesn't return an error, then we've got our bucket
	_, err = bucket.List("", "", "", 0)
	if err != nil {
		return errors.New("Could not find bucket `" + bucketName + "` in region `" + region.Name + "` (" + err.Error() + ")")
	}

	// Create the location of the file
	var s3Location string
	if bucketPath != "" {
		s3Location = strings.TrimRight(bucketPath, "/") + "/" + strings.TrimLeft(d.Path, "/")
	} else {
		s3Location = d.Path
	}

	// Generate a Signed URL
	signedURL := bucket.SignedURL(s3Location, time.Now().Add(time.Hour))

	// We can now cheat and pass the URL onto our regular downloader
	return Download{
		URL:         signedURL,
		Path:        d.Path,
		Destination: d.Destination,
		Retries:     d.Retries,
		DebugHTTP:   d.DebugHTTP,
	}.Start()
}
Пример #16
0
func writeToBucket(f string, a string) {
	p, s, setErr := getSettings()
	if setErr != nil {
		log.Println("Error:", setErr)
		return
	}
	auth := aws.Auth{AccessKey: p, SecretKey: s}

	S3 := s3.New(auth, aws.APNortheast)
	bucket := S3.Bucket("math-results")
	err := bucket.Put(f, []byte(a), "text/plain", s3.PublicRead, s3.Options{})
	if err != nil {
		log.Println("ERROR:", err)
	}
}
Пример #17
0
// NewS3Storage initializes the S3Storage with required AWS arguments
func NewS3Storage(region aws.Region, auth aws.Auth, bucketName string, prefix string, bucketACL s3.ACL) (*S3Storage, error) {
	s3obj := s3.New(auth, region)
	bucket := s3obj.Bucket(bucketName)

	err := bucket.PutBucket(bucketACL)
	if err != nil {
		return nil, err
	}
	return &S3Storage{
		s3:     s3obj,
		bucket: bucket,
		region: region,
		auth:   auth,
		prefix: prefix,
	}, nil
}
Пример #18
0
func downloadFromBucket(b string, f string) ([]byte, error) {
	p, s, setErr := getSettings()
	if setErr != nil {
		return nil, setErr
	}
	auth := aws.Auth{AccessKey: p, SecretKey: s}

	S3 := s3.New(auth, aws.USEast)
	bucket := S3.Bucket(b)
	log.Println("Starting Get...")
	data, err := bucket.Get(f)
	if err != nil {
		return nil, err
	}
	log.Println("Completed Get!", len(data))
	return data, nil
}
Пример #19
0
func (input *S3OffsetInput) Init(config interface{}) (err error) {
	conf := config.(*S3OffsetInputConfig)
	input.S3OffsetInputConfig = conf

	if conf.MetaFile != "" {
		// We already have the required metadata. Don't need to fetch it.
		input.metaFileName = conf.MetaFile
	} else if conf.ClientIdListFile != "" {
		// Load clientids from file.
		input.clientids, err = readLines(conf.ClientIdListFile)
		if err != nil {
			return fmt.Errorf("Error reading file %s for 'client_id_list': %s", conf.ClientIdListFile, err)
		}
	} else {
		return fmt.Errorf("Missing parameter: You must specify either 'client_id_list' or 'metadata_file'")
	}

	auth, err := aws.GetAuth(conf.AWSKey, conf.AWSSecretKey, "", time.Now())
	if err != nil {
		return fmt.Errorf("Authentication error: %s\n", err)
	}
	region, ok := aws.Regions[conf.AWSRegion]
	if !ok {
		return fmt.Errorf("Parameter 'aws_region' must be a valid AWS Region")
	}
	s := s3.New(auth, region)
	s.ConnectTimeout = time.Duration(conf.S3ConnectTimeout) * time.Second
	s.ReadTimeout = time.Duration(conf.S3ReadTimeout) * time.Second

	// TODO: ensure we can read from (and list, for meta) the buckets.
	input.bucket = s.Bucket(conf.S3Bucket)

	if conf.S3MetaBucket != "" {
		input.metaBucket = s.Bucket(conf.S3MetaBucket)
	} else if conf.MetaFile == "" {
		return fmt.Errorf("Parameter 's3_meta_bucket' is required unless using 'metadata_file'")
	}

	// Remove any excess path separators from the bucket prefix.
	conf.S3MetaBucketPrefix = CleanBucketPrefix(conf.S3MetaBucketPrefix)

	input.stop = make(chan bool)
	input.offsetChan = make(chan MessageLocation, 1000)

	return nil
}
Пример #20
0
func getS3(u *url.URL) (io.ReadCloser, error) {
	var err error
	if AWSAuth.AccessKey == "" || AWSRegion.Name == "" {
		AWSAuth, AWSRegion, err = LoadAWSCredentials("")
		if err != nil {
			return nil, err
		}
		log.Println("region:", AWSRegion.Name)
		log.Println("aws_access_key_id:", AWSAuth.AccessKey)
	}
	client := s3.New(AWSAuth, AWSRegion)
	bucket := client.Bucket(u.Host)
	rc, err := bucket.GetReader(u.Path)
	if err != nil {
		return nil, err
	}
	return rc, nil
}
Пример #21
0
func (input *S3SplitFileInput) Init(config interface{}) (err error) {
	conf := config.(*S3SplitFileInputConfig)
	input.S3SplitFileInputConfig = conf

	input.schema, err = LoadSchema(conf.SchemaFile)
	if err != nil {
		return fmt.Errorf("Parameter 'schema_file' must be a valid JSON file: %s", err)
	}

	if conf.S3Bucket != "" {
		auth, err := aws.GetAuth(conf.AWSKey, conf.AWSSecretKey, "", time.Now())
		if err != nil {
			return fmt.Errorf("Authentication error: %s\n", err)
		}
		region, ok := aws.Regions[conf.AWSRegion]
		if !ok {
			return fmt.Errorf("Parameter 'aws_region' must be a valid AWS Region")
		}
		s := s3.New(auth, region)
		s.ConnectTimeout = time.Duration(conf.S3ConnectTimeout) * time.Second
		s.ReadTimeout = time.Duration(conf.S3ReadTimeout) * time.Second
		// TODO: ensure we can read from the bucket.
		input.bucket = s.Bucket(conf.S3Bucket)
	} else {
		input.bucket = nil
	}

	if conf.S3ObjectMatchRegex != "" {
		if input.objectMatch, err = regexp.Compile(conf.S3ObjectMatchRegex); err != nil {
			err = fmt.Errorf("S3SplitFileInput: %s", err)
			return
		}
	} else {
		input.objectMatch = nil
	}

	// Remove any excess path separators from the bucket prefix.
	conf.S3BucketPrefix = CleanBucketPrefix(conf.S3BucketPrefix)

	input.stop = make(chan bool)
	input.listChan = make(chan string, 1000)

	return nil
}
Пример #22
0
func (u *S3Uploader) Setup(destination string) error {
	u.Destination = destination

	// Try to auth with S3
	auth, err := awsS3Auth()
	if err != nil {
		return errors.New(fmt.Sprintf("Error creating AWS S3 authentication: %s", err.Error()))
	}

	regionName := "us-east-1"
	if os.Getenv("BUILDKITE_S3_DEFAULT_REGION") != "" {
		regionName = os.Getenv("BUILDKITE_S3_DEFAULT_REGION")
	} else if os.Getenv("AWS_DEFAULT_REGION") != "" {
		regionName = os.Getenv("AWS_DEFAULT_REGION")
	}

	// Check to make sure the region exists. There is a GetRegion API, but
	// there doesn't seem to be a way to make it error out if the region
	// doesn't exist.
	region, ok := aws.Regions[regionName]
	if ok == false {
		return errors.New("Unknown AWS S3 Region `" + regionName + "`")
	}

	logger.Debug("Authorizing S3 credentials and finding bucket `%s` in region `%s`...", u.bucketName(), regionName)

	// Find the bucket
	s3 := s3.New(auth, region)
	bucket := s3.Bucket(u.bucketName())

	// If the list doesn't return an error, then we've got our bucket
	_, err = bucket.List("", "", "", 0)
	if err != nil {
		return errors.New("Could not find bucket `" + u.bucketName() + "` in region `" + region.Name + "` (" + err.Error() + ")")
	}

	u.Bucket = bucket

	return nil
}
Пример #23
0
func (d S3Downloader) Start() error {
	// Try to auth with S3
	auth, err := awsS3Auth()
	if err != nil {
		return errors.New(fmt.Sprintf("Error creating AWS S3 authentication: %s", err.Error()))
	}

	// Try and get the region
	region, err := awsS3Region()
	if err != nil {
		return err
	}

	logger.Debug("Authorizing S3 credentials and finding bucket `%s` in region `%s`...", d.BucketName(), region.Name)

	// Find the bucket
	s3 := s3.New(auth, region)
	bucket := s3.Bucket(d.BucketName())

	// If the list doesn't return an error, then we've got our bucket
	_, err = bucket.List("", "", "", 0)
	if err != nil {
		return errors.New("Could not find bucket `" + d.BucketName() + "` in region `" + region.Name + "` (" + err.Error() + ")")
	}

	// Generate a Signed URL
	signedURL := bucket.SignedURL(d.BucketFileLocation(), time.Now().Add(time.Hour))

	// We can now cheat and pass the URL onto our regular downloader
	return Download{
		Client:      *http.DefaultClient,
		URL:         signedURL,
		Path:        d.Path,
		Destination: d.Destination,
		Retries:     d.Retries,
		DebugHTTP:   d.DebugHTTP,
	}.Start()
}
Пример #24
0
func NewTestableS3Volume(c *check.C, readonly bool, replication int) *TestableS3Volume {
	clock := &fakeClock{}
	srv, err := s3test.NewServer(&s3test.Config{Clock: clock})
	c.Assert(err, check.IsNil)
	auth := aws.Auth{}
	region := aws.Region{
		Name:                 "test-region-1",
		S3Endpoint:           srv.URL(),
		S3LocationConstraint: true,
	}
	bucket := &s3.Bucket{
		S3:   s3.New(auth, region),
		Name: TestBucketName,
	}
	err = bucket.PutBucket(s3.ACL("private"))
	c.Assert(err, check.IsNil)

	return &TestableS3Volume{
		S3Volume:    NewS3Volume(auth, region, TestBucketName, readonly, replication),
		server:      srv,
		serverClock: clock,
	}
}
Пример #25
0
func main() {
	flagSchema := flag.String("schema", "", "Filename of the schema to use as a filter")
	flagBucket := flag.String("bucket", "default-bucket", "S3 Bucket name")
	flagBucketPrefix := flag.String("bucket-prefix", "", "S3 Bucket path prefix")
	flagAWSKey := flag.String("aws-key", "", "AWS Key")
	flagAWSSecretKey := flag.String("aws-secret-key", "", "AWS Secret Key")
	flagAWSRegion := flag.String("aws-region", "us-west-2", "AWS Region")
	flagDryRun := flag.Bool("dry-run", false, "Don't actually do anything, just output what would be done")
	flagVerbose := flag.Bool("verbose", false, "Print detailed info")
	flag.Parse()

	if flag.NArg() != 0 {
		flag.PrintDefaults()
		os.Exit(1)
	}

	var err error
	var schema s3splitfile.Schema
	schema, err = s3splitfile.LoadSchema(*flagSchema)
	if err != nil {
		fmt.Printf("schema: %s\n", err)
		os.Exit(2)
	}

	if *flagDryRun {
		fmt.Printf("Dry Run: Would have listed files in s3://%s/%s according to filter schema %s\n",
			*flagBucket, *flagBucketPrefix, *flagSchema)
		os.Exit(0)
	}

	var b *s3.Bucket

	prefix := s3splitfile.CleanBucketPrefix(*flagBucketPrefix)

	// Initialize the S3 bucket
	auth, err := aws.GetAuth(*flagAWSKey, *flagAWSSecretKey, "", time.Now())
	if err != nil {
		fmt.Printf("Authentication error: %s\n", err)
		os.Exit(4)
	}
	region, ok := aws.Regions[*flagAWSRegion]
	if !ok {
		fmt.Printf("Parameter 'aws-region' must be a valid AWS Region\n")
		os.Exit(5)
	}
	s := s3.New(auth, region)
	b = s.Bucket(*flagBucket)

	var errCount int
	var totalCount int
	var totalSize int64

	startTime := time.Now().UTC()

	// List the keys as we see them
	for k := range s3splitfile.S3Iterator(b, prefix, schema) {
		if k.Err != nil {
			fmt.Printf("ERROR fetching key: %s\n", k.Err)
			errCount++
		} else {
			totalCount++
			totalSize += k.Key.Size
			fmt.Printf("%s\n", k.Key.Key)
		}
	}

	duration := time.Now().UTC().Sub(startTime).Seconds()

	if *flagVerbose {
		fmt.Printf("Filter matched %d files totaling %s in %.02fs (%d errors)\n",
			totalCount, s3splitfile.PrettySize(totalSize), duration, errCount)
	}
}
func main() {
	flag.Parse() // Read argv

	if shouldOutputVersion {
		fmt.Printf("redshift-tracking-copy-from-s3 %s\n", VERSION)
		os.Exit(0)
	}

	// Read config file
	parseConfigfile()

	// ----------------------------- Startup goroutine for each Bucket/Prefix/Table & Repeat migration check per table -----------------------------

	done := make(chan bool, len(cfg.Redshift.Tables))
	for i, _ := range cfg.Redshift.Tables {
		quitSignal := make(chan os.Signal, 1)
		signal.Notify(quitSignal, os.Interrupt)

		go func(currentTable string, currentBucket string, currentPrefix string) {
			quitReceived := false

			go func() {
				<-quitSignal
				if cfg.Default.Debug {
					fmt.Printf("Quit signal received on %s watcher. Going down...\n", currentTable)
				}
				quitReceived = true
			}()

			db, err := sql.Open("postgres", fmt.Sprintf("host=%s port=%d user=%s password=%s dbname=%s sslmode=disable", cfg.Redshift.Host, cfg.Redshift.Port, cfg.Redshift.User, cfg.Redshift.Password, cfg.Redshift.Database))
			if err != nil {
				reportError("Couldn't connect to redshift database: ", err)
			}
			rows, err := db.Query(fmt.Sprintf("select COLUMN_NAME, DATA_TYPE from INFORMATION_SCHEMA.COLUMNS where table_name = '%s' limit 1000", currentTable))
			if err != nil {
				reportError("Couldn't execute statement for INFORMATION_SCHEMA.COLUMNS: ", err)
			}
			if cfg.Default.Debug {
				fmt.Println("Looking for table, columns will display below.")
			}
			anyRows := false
			for rows.Next() {
				var column_name string
				var data_type string
				err = rows.Scan(&column_name, &data_type)
				if err != nil {
					reportError("Couldn't scan row for table: ", err)
				}
				if cfg.Default.Debug {
					fmt.Printf("   %s, %s\n", column_name, data_type)
				}
				anyRows = true
			}

			if !anyRows {
				reportError("Table had no columns: ", err)
			} else {
				if cfg.Default.Debug {
					fmt.Println("Table found, will not migrate")
				}
			}

			// ----------------------------- Take a look at STL_FILE_SCAN on this Table to see if any files have already been imported -----------------------------

			for !quitReceived {
				if cfg.Default.Debug {
					fmt.Printf("Re-polling with %s watcher.\n", currentTable)
				}
				loadedFiles := map[string]bool{}

				rows, err = db.Query(fmt.Sprintf("select * from STL_FILE_SCAN"))
				if err != nil {
					reportError("Couldn't execute STL_FILE_SCAN: ", err)
				}
				anyRows = false
				for rows.Next() {
					var (
						userid   int
						query    int
						slice    int
						name     string
						lines    int64
						bytes    int64
						loadtime int64
						curtime  time.Time
					)
					err = rows.Scan(&userid, &query, &slice, &name, &lines, &bytes, &loadtime, &curtime)
					if err != nil {
						reportError("Couldn't scan row for STL_FILE_SCAN: ", err)
					}
					if cfg.Default.Debug {
						fmt.Printf("  Already loaded: %d|%d|%d|%s|%d|%d|%d|%s\n", userid, query, slice, name, lines, bytes, loadtime, curtime)
					}
					loadedFiles[strings.TrimPrefix(strings.TrimSpace(name), fmt.Sprintf("s3://%s/", currentBucket))] = true
					anyRows = true
				}

				// ----------------------------- If not: run generic COPY for this Bucket/Prefix/Table -----------------------------
				if !anyRows {
					copyStmt := defaultCopyStmt(&currentTable, &currentBucket, &currentPrefix)
					if cfg.Default.Debug {
						fmt.Printf("No records found in STL_FILE_SCAN, running `%s`\n", copyStmt)
					}
					_, err = db.Exec(copyStmt)
					if err != nil {
						reportError("Couldn't execute default copy statement: ", err)
					}
				} else {

					// ----------------------------- If yes: diff STL_FILE_SCAN with S3 bucket files list, COPY and missing files into this Table -----------------------------
					if cfg.Default.Debug {
						fmt.Printf("Records found, have to do manual copies from now on.\n")
					}
					s3bucket := s3.New(*aws.NewAuth(cfg.Aws.Accesskey, cfg.Aws.Secretkey, "", time.Now()), aws.Regions[cfg.Aws.Region]).Bucket(currentBucket)

					// list all missing files and copy in the ones that are missing
					nonLoadedFiles := []string{}
					keyMarker := ""
					moreResults := true
					for moreResults {
						if cfg.Default.Debug {
							fmt.Printf("Checking s3 bucket %s.\n", currentBucket)
						}
						results, err := s3bucket.List(currentPrefix, "", keyMarker, 0)
						if err != nil {
							reportError("Couldn't list default s3 bucket: ", err)
						}
						if cfg.Default.Debug {
							fmt.Printf("s3bucket.List returned %#v.\n", results)
						}
						if len(results.Contents) == 0 {
							break
						} // empty request, assume we found every file
						for _, s3obj := range results.Contents {
							if cfg.Default.Debug {
								fmt.Printf("Checking whether or not %s was preloaded.\n", strings.TrimSpace(s3obj.Key))
							}
							if !loadedFiles[strings.TrimSpace(s3obj.Key)] {
								nonLoadedFiles = append(nonLoadedFiles, s3obj.Key)
							}
						}
						keyMarker = results.Contents[len(results.Contents)-1].Key
						moreResults = results.IsTruncated
					}

					if cfg.Default.Debug {
						fmt.Printf("Haven't ever loaded %#v.\n", nonLoadedFiles)
					}
					for _, s3key := range nonLoadedFiles {
						copyStmt := defaultCopyStmt(&currentTable, &currentBucket, &s3key)
						if cfg.Default.Debug {
							fmt.Printf("  Copying `%s`\n", copyStmt)
						}
						_, err = db.Exec(copyStmt)
						if err != nil {
							reportError("Couldn't execute default copy statement: ", err)
						}
					}

				}

				time.Sleep(time.Duration(cfg.Default.Pollsleepinseconds*1000) * time.Millisecond)
			}

			done <- true
		}(cfg.Redshift.Tables[i], cfg.S3.Buckets[i], cfg.S3.Prefixes[i])

	}

	<-done // wait until the last iteration finishes before returning
}
Пример #27
0
func main() {
	flagBase := flag.String("base-dir", "/", "Base directory in which to look for files to export")
	flagPattern := flag.String("pattern", ".*", "Filenames must match this regular expression to be uploaded")
	flagBucket := flag.String("bucket", "default-bucket", "S3 Bucket name")
	flagBucketPrefix := flag.String("bucket-prefix", "", "S3 Bucket path prefix")
	flagAWSKey := flag.String("aws-key", "", "AWS Key")
	flagAWSSecretKey := flag.String("aws-secret-key", "", "AWS Secret Key")
	flagAWSRegion := flag.String("aws-region", "us-west-2", "AWS Region")
	flagLoop := flag.Bool("loop", false, "Run in a loop and keep watching for more files to export")
	flagDryRun := flag.Bool("dry-run", false, "Don't actually do anything, just output what would be done")
	flag.Parse()

	if flag.NArg() != 0 {
		flag.PrintDefaults()
		os.Exit(1)
	}

	var err error
	baseStat, err := os.Stat(*flagBase)
	if err != nil || !baseStat.IsDir() {
		fmt.Printf("base-dir: %s\n", err)
		os.Exit(2)
	}

	pattern, err := regexp.Compile(*flagPattern)
	if err != nil {
		fmt.Printf("pattern: %s\n", err)
		os.Exit(3)
	}

	// fmt.Printf("Base:%s  Pattern:%s  Bucket: s3://%s/%s  AWSKey:%s / %s  Region:%s  Dry Run:%t  Loop:%t\n",
	// 	*flagBase, *flagPattern, *flagBucket, *flagBucketPrefix, *flagAWSKey, *flagAWSSecretKey, *flagAWSRegion, *flagDryRun, *flagLoop)

	var progress Progress
	var rate float64
	var uploadMB float64

	var b *s3.Bucket
	if !*flagDryRun {
		auth, err := aws.GetAuth(*flagAWSKey, *flagAWSSecretKey, "", time.Now())
		if err != nil {
			fmt.Printf("Authentication error: %s\n", err)
			os.Exit(4)
		}

		region, ok := aws.Regions[*flagAWSRegion]
		if !ok {
			fmt.Printf("Parameter 'aws-region' must be a valid AWS Region\n")
			os.Exit(5)
		}
		s := s3.New(auth, region)
		b = s.Bucket(*flagBucket)
	} else {
		// b declared and not used :(
		_ = b
	}

	for true {
		progress = Progress{}
		startTime := time.Now().UTC()
		err = filepath.Walk(*flagBase, makeupload(*flagBase, pattern, b, *flagBucketPrefix, *flagDryRun, &progress))
		if err != nil {
			fmt.Printf("Error reading files from %s: %s\n", *flagBase, err)
		}

		if progress.Count > 0 {
			uploadMB = float64(progress.Bytes) / 1024.0 / 1024.0
			duration := time.Now().UTC().Sub(startTime).Seconds()

			if duration > 0 {
				rate = uploadMB / duration
			} else {
				rate = 0
			}
			fmt.Printf("Uploaded %d files containing %.2fMB in %.02fs (%.02fMB/s). Encountered %d errors.\n", progress.Count, uploadMB, duration, rate, progress.Errors)
		} else {
			// We didn't upload any files.
			if !*flagLoop {
				fmt.Println("Nothing to upload")
			} else {
				// Only sleep if we didn't find anything to upload. If we did upload
				// something, we want to try again right away.
				fmt.Println("Waiting for files to upload...")
				time.Sleep(10 * time.Second)
			}
		}

		if !*flagLoop {
			break
		}
	}
}
Пример #28
0
func (o *S3SplitFileOutput) Init(config interface{}) (err error) {
	conf := config.(*S3SplitFileOutputConfig)
	o.S3SplitFileOutputConfig = conf
	var intPerm int64

	if intPerm, err = strconv.ParseInt(conf.FolderPerm, 8, 32); err != nil {
		err = fmt.Errorf("S3SplitFileOutput '%s' can't parse `folder_perm`, is it an octal integer string?",
			o.Path)
		return
	}
	o.folderPerm = os.FileMode(intPerm)

	if intPerm, err = strconv.ParseInt(conf.Perm, 8, 32); err != nil {
		err = fmt.Errorf("S3SplitFileOutput '%s' can't parse `perm`, is it an octal integer string?",
			o.Path)
		return
	}
	o.perm = os.FileMode(intPerm)

	if conf.MaxFileSize < 1 {
		err = fmt.Errorf("Parameter 'max_file_size' must be greater than 0.")
		return
	}
	if conf.MaxFileAge < 1 {
		err = fmt.Errorf("Parameter 'max_file_age' must be greater than 0.")
		return
	}

	if conf.MaxOpenFiles < 0 {
		err = fmt.Errorf("Parameter 'max_open_files' must not be negative.")
		return
	}
	o.fopenCache, err = lru.New(conf.MaxOpenFiles)
	if err != nil {
		// This should never happen since we already checked for negative size.
		return
	}

	// Close files as they are evicted / removed from the cache.
	o.fopenCache.OnEvicted = func(key interface{}, val interface{}) {
		// If it's not a file, we don't care about it.
		switch t := val.(type) {
		case *os.File:
			t.Close()
		}
	}

	o.dimFiles = map[string]*SplitFileInfo{}

	// TODO: fall back to default schema.
	//fmt.Printf("schema_file = '%s'\n", conf.SchemaFile)
	if conf.SchemaFile == "" {
		err = fmt.Errorf("Parameter 'schema_file' is missing")
		return
	}

	o.schema, err = LoadSchema(conf.SchemaFile)
	if err != nil {
		return fmt.Errorf("Parameter 'schema_file' must be a valid JSON file: %s", err)
	}

	if conf.S3Bucket != "" {
		auth, err := aws.GetAuth(conf.AWSKey, conf.AWSSecretKey, "", time.Now())
		if err != nil {
			return fmt.Errorf("Authentication error: %s\n", err)
		}
		region, ok := aws.Regions[conf.AWSRegion]
		if !ok {
			return fmt.Errorf("Parameter 'aws_region' must be a valid AWS Region")
		}
		s := s3.New(auth, region)
		s.ConnectTimeout = time.Duration(conf.S3ConnectTimeout) * time.Second
		s.ReadTimeout = time.Duration(conf.S3ReadTimeout) * time.Second
		// TODO: ensure we can write to the bucket.
		o.bucket = s.Bucket(conf.S3Bucket)
	} else {
		o.bucket = nil
	}

	// Remove any excess path separators from the bucket prefix.
	conf.S3BucketPrefix = fmt.Sprintf("/%s", strings.Trim(conf.S3BucketPrefix, "/"))

	o.publishChan = make(chan PublishAttempt, 1000)

	o.shuttingDown = false

	return
}
Пример #29
0
func main() {
	flagStdin := flag.Bool("stdin", false, "read list of s3 key names from stdin")
	flagBucket := flag.String("bucket", "default-bucket", "S3 Bucket name")
	flagAWSKey := flag.String("aws-key", "", "AWS Key")
	flagAWSSecretKey := flag.String("aws-secret-key", "", "AWS Secret Key")
	flagAWSRegion := flag.String("aws-region", "us-west-2", "AWS Region")
	flagConnectTimeout := flag.Uint64("connect_timeout", 60, "Max seconds to wait for an S3 connection")
	flagReadTimeout := flag.Uint64("read_timeout", 300, "Max seconds to wait for an S3 file read to complete")
	flag.Parse()

	if !*flagStdin && flag.NArg() < 1 {
		flag.PrintDefaults()
		os.Exit(1)
	}

	var connectTimeout uint32
	if *flagConnectTimeout < math.MaxUint32 {
		connectTimeout = uint32(*flagConnectTimeout)
	} else {
		fmt.Fprintf(os.Stderr, "Connection Timeout is too large:%d.\n", flagConnectTimeout)
		os.Exit(8)
	}

	var readTimeout uint32
	if *flagReadTimeout < math.MaxUint32 {
		readTimeout = uint32(*flagReadTimeout)
	} else {
		fmt.Fprintf(os.Stderr, "Read Timeout is too large:%d.\n", flagReadTimeout)
		os.Exit(8)
	}

	auth, err := aws.GetAuth(*flagAWSKey, *flagAWSSecretKey, "", time.Now())
	if err != nil {
		fmt.Fprintf(os.Stderr, "Authentication error: %s\n", err)
		os.Exit(4)
	}
	region, ok := aws.Regions[*flagAWSRegion]
	if !ok {
		fmt.Fprintf(os.Stderr, "Parameter 'aws-region' must be a valid AWS Region\n")
		os.Exit(5)
	}
	s := s3.New(auth, region)
	if connectTimeout > 0 {
		s.ConnectTimeout = time.Duration(connectTimeout) * time.Second
	}
	if readTimeout > 0 {
		s.ReadTimeout = time.Duration(readTimeout) * time.Second
	}
	bucket := s.Bucket(*flagBucket)

	startTime := time.Now().UTC()
	totalFiles := 0
	if *flagStdin {
		scanner := bufio.NewScanner(os.Stdin)
		for scanner.Scan() {
			filename := scanner.Text()
			totalFiles++
			cat(bucket, filename)
		}
	} else {
		for _, filename := range flag.Args() {
			totalFiles++
			cat(bucket, filename)
		}
	}

	duration := time.Now().UTC().Sub(startTime).Seconds()
	mb := float64(bytesRead) / 1024.0 / 1024.0
	if duration == 0.0 {
		duration = 1.0
	}
	fmt.Fprintf(os.Stderr, "All done processing %d files, %.2fMB in %.2f seconds (%.2fMB/s)\n", totalFiles, mb, duration, (mb / duration))
}
Пример #30
0
func main() {
	flagMatch := flag.String("match", "TRUE", "message_matcher filter expression")
	flagFormat := flag.String("format", "txt", "output format [txt|json|heka|count]")
	flagOutput := flag.String("output", "", "output filename, defaults to stdout")
	flagStdin := flag.Bool("stdin", false, "read list of s3 key names from stdin")
	flagBucket := flag.String("bucket", "default-bucket", "S3 Bucket name")
	flagAWSKey := flag.String("aws-key", "", "AWS Key")
	flagAWSSecretKey := flag.String("aws-secret-key", "", "AWS Secret Key")
	flagAWSRegion := flag.String("aws-region", "us-west-2", "AWS Region")
	flagMaxMessageSize := flag.Uint64("max-message-size", 4*1024*1024, "maximum message size in bytes")
	flagWorkers := flag.Uint64("workers", 16, "number of parallel workers")
	flagConnectTimeout := flag.Uint64("connect_timeout", 60, "Max seconds to wait for an S3 connection")
	flagReadTimeout := flag.Uint64("read_timeout", 300, "Max seconds to wait for an S3 file read to complete")
	flag.Parse()

	if !*flagStdin && flag.NArg() < 1 {
		flag.PrintDefaults()
		os.Exit(1)
	}

	if *flagMaxMessageSize < math.MaxUint32 {
		maxSize := uint32(*flagMaxMessageSize)
		message.SetMaxMessageSize(maxSize)
	} else {
		fmt.Fprintf(os.Stderr, "Message size is too large: %d\n", flagMaxMessageSize)
		os.Exit(8)
	}

	workers := 1
	if *flagWorkers == 0 {
		fmt.Fprintf(os.Stderr, "Cannot run with zero workers. Using 1.\n")
	} else if *flagWorkers < 2000 {
		workers = int(*flagWorkers)
	} else {
		fmt.Fprintf(os.Stderr, "Too many workers: %d. Use a reasonable value (up to a few hundred).\n", flagWorkers)
		os.Exit(8)
	}

	var connectTimeout uint32
	if *flagConnectTimeout < math.MaxUint32 {
		connectTimeout = uint32(*flagConnectTimeout)
	} else {
		fmt.Fprintf(os.Stderr, "Connection Timeout is too large:%d.\n", flagConnectTimeout)
		os.Exit(8)
	}

	var readTimeout uint32
	if *flagReadTimeout < math.MaxUint32 {
		readTimeout = uint32(*flagReadTimeout)
	} else {
		fmt.Fprintf(os.Stderr, "Read Timeout is too large:%d.\n", flagReadTimeout)
		os.Exit(8)
	}

	var err error
	var match *message.MatcherSpecification
	if match, err = message.CreateMatcherSpecification(*flagMatch); err != nil {
		fmt.Fprintf(os.Stderr, "Match specification - %s\n", err)
		os.Exit(2)
	}

	var out *os.File
	if "" == *flagOutput {
		out = os.Stdout
	} else {
		if out, err = os.OpenFile(*flagOutput, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644); err != nil {
			fmt.Fprintf(os.Stderr, "%s\n", err)
			os.Exit(3)
		}
		defer out.Close()
	}

	auth, err := aws.GetAuth(*flagAWSKey, *flagAWSSecretKey, "", time.Now())
	if err != nil {
		fmt.Fprintf(os.Stderr, "Authentication error: %s\n", err)
		os.Exit(4)
	}
	region, ok := aws.Regions[*flagAWSRegion]
	if !ok {
		fmt.Fprintf(os.Stderr, "Parameter 'aws-region' must be a valid AWS Region\n")
		os.Exit(5)
	}
	s := s3.New(auth, region)
	if connectTimeout > 0 {
		s.ConnectTimeout = time.Duration(connectTimeout) * time.Second
	}
	if readTimeout > 0 {
		s.ReadTimeout = time.Duration(readTimeout) * time.Second
	}
	bucket := s.Bucket(*flagBucket)

	filenameChannel := make(chan string, 1000)
	recordChannel := make(chan s3splitfile.S3Record, 1000)
	doneChannel := make(chan string, 1000)
	allDone := make(chan int)

	for i := 1; i <= workers; i++ {
		go cat(bucket, filenameChannel, recordChannel, doneChannel)
	}
	go save(recordChannel, match, *flagFormat, out, allDone)

	startTime := time.Now().UTC()
	totalFiles := 0
	pendingFiles := 0
	if *flagStdin {
		scanner := bufio.NewScanner(os.Stdin)
		for scanner.Scan() {
			filename := scanner.Text()
			totalFiles++
			pendingFiles++
			filenameChannel <- filename
			if pendingFiles >= 1000 {
				waitFor(doneChannel, 1)
				pendingFiles--
			}
		}
		close(filenameChannel)
	} else {
		for _, filename := range flag.Args() {
			totalFiles++
			pendingFiles++
			filenameChannel <- filename
			if pendingFiles >= 1000 {
				waitFor(doneChannel, 1)
				pendingFiles--
			}
		}
		close(filenameChannel)
	}

	fmt.Fprintf(os.Stderr, "Waiting for last %d files\n", pendingFiles)
	waitFor(doneChannel, pendingFiles)
	close(recordChannel)
	bytesRead := <-allDone
	// All done! Win!
	duration := time.Now().UTC().Sub(startTime).Seconds()
	mb := float64(bytesRead) / 1024.0 / 1024.0
	if duration == 0.0 {
		duration = 1.0
	}
	fmt.Fprintf(os.Stderr, "All done processing %d files, %.2fMB in %.2f seconds (%.2fMB/s)\n", totalFiles, mb, duration, (mb / duration))
}