Ejemplo n.º 1
0
func (conn S3Connection) listObjects(prefix string, delimiter string) ([]Object, error) {
	s3Objs := make([]Object, 0)
	moreResults := false
	nextMarker := aws.String("")
	for {
		input := s3.ListObjectsInput{
			Bucket:    aws.String(conn.BucketName),
			Prefix:    aws.String(prefix),
			Delimiter: aws.String(delimiter),
		}
		if moreResults {
			input.Marker = nextMarker
		}
		result, err := conn.Connection.ListObjects(&input)
		if err != nil {
			log.Debugln("Failed to ListObjects for bucket %s, prefix %s: %s", conn.BucketName, prefix, err)
			return nil, err
		}
		if delimiter == "/" { // folders
			for _, commonPrefix := range result.CommonPrefixes {
				s3Obj := S3Object{
					S3FullPath: strings.TrimSuffix(*commonPrefix.Prefix, "/"),
				}
				s3Objs = append(s3Objs, s3Obj)
			}
		} else { // regular files
			for _, contents := range result.Contents {
				s3Obj := S3Object{
					S3FullPath: *contents.Key,
				}
				s3Objs = append(s3Objs, s3Obj)
			}
		}
		time.Sleep(100 * time.Millisecond)
		moreResults = *result.IsTruncated
		if moreResults {
			nextMarker = result.NextMarker
		} else {
			break
		}
	}
	return s3Objs, nil
}
Ejemplo n.º 2
0
func ReadFromS3(options S3Options) <-chan map[string]interface{} {
	ch := make(chan map[string]interface{})

	cfg := options.AWSConfig
	if cfg == nil {
		cfg = aws.DefaultConfig
	}

	c := s3.New(cfg)

	var includeRegexp *regexp.Regexp
	if options.Include != "" {
		includeRegexp = regexp.MustCompile(options.Include)
	}

	var excludeRegexp *regexp.Regexp
	if options.Exclude != "" {
		excludeRegexp = regexp.MustCompile(options.Exclude)
	}

	stateFile := options.StateFile

	if stateFile == "" {
		u, err := user.Current()
		if err != nil {
			panic(err)
		}

		sha := crypto.SHA1.New()
		sha.Write([]byte(strings.Join([]string{
			options.Bucket,
			options.Prefix,
			options.Include,
			options.Exclude,
		}, "$$")))

		stateFile = path.Join(u.HomeDir, ".logfetch", "s3_"+hex.EncodeToString(sha.Sum(nil)))
	}

	if err := os.MkdirAll(path.Dir(stateFile), 0755); err != nil {
		panic(err)
	}

	seen, err := seendb.New(stateFile)
	if err != nil {
		panic(err)
	}

	go func() {
		for {
			marker := ""

		outer:
			for {
				listConfig := s3.ListObjectsInput{
					Bucket: aws.String(options.Bucket),
				}

				if options.Prefix != "" {
					listConfig.Prefix = aws.String(options.Prefix)
				}

				if marker != "" {
					listConfig.Marker = aws.String(marker)
				}

				res, err := c.ListObjects(&listConfig)
				if err != nil {
					break outer
				}

			inner:
				for _, o := range res.Contents {
					if includeRegexp != nil {
						if !includeRegexp.MatchString(*o.Key) {
							continue
						}
					}

					if excludeRegexp != nil {
						if excludeRegexp.MatchString(*o.Key) {
							continue
						}
					}

					if seen.Seen(*o.Key) {
						continue
					}

					fileConfig := s3.GetObjectInput{
						Bucket: aws.String(options.Bucket),
						Key:    o.Key,
					}

					f, err := c.GetObject(&fileConfig)
					if err != nil {
						continue inner
					}
					defer f.Body.Close()

					b := f.Body

					if strings.HasSuffix(*o.Key, ".gz") {
						gz, err := gzip.NewReader(b)
						if err != nil {
							if err := seen.Mark(*o.Key); err != nil {
								panic(err)
							}

							continue inner
						}
						defer gz.Close()

						b = gz
					}

					r := bufio.NewReader(b)

					var i int

					for {
						l, err := r.ReadString('\n')
						if err == io.EOF {
							break
						} else if err != nil {
							break
						}

						i++

						l = strings.TrimSpace(l)
						if l == "" {
							continue
						}

						ch <- map[string]interface{}{
							"text":      l,
							"s3_bucket": options.Bucket,
							"s3_key":    *o.Key,
							"s3_line":   i,
						}
					}

					if err := seen.Mark(*o.Key); err != nil {
						panic(err)
					}
				}

				if !*res.IsTruncated {
					break
				}

				marker = *res.Contents[len(res.Contents)-1].Key
			}

			time.Sleep(time.Minute)
		}
	}()

	return ch
}