func parseConfigfile() {
	config, err := configfile.ReadConfigFile(cfgFile)
	if err != nil {
		fmt.Printf("Couldn't read config file %s because: %#v\n", cfgFile, err)
		panic(err)
	}

	// Prioritize config values required to send SNS notifications
	cfg.Aws.Accesskey, err = config.GetString("aws", "accesskey")
	if err != nil {
		reportError("Couldn't parse config: ", err)
	}
	cfg.Aws.Secretkey, err = config.GetString("aws", "secretkey")
	if err != nil {
		reportError("Couldn't parse config: ", err)
	}
	cfg.Sns.FailureNotifications, err = config.GetBool("sns", "failure_notifications")
	if err != nil {
		reportError("Couldn't parse config: ", err)
	}
	cfg.Sns.Topic, err = config.GetString("sns", "topic")
	if err != nil {
		reportError("Couldn't parse config: ", err)
	}

	// Everything else
	cfg.Default.Debug, err = config.GetBool("default", "debug")
	if err != nil {
		reportError("Couldn't parse config: ", err)
	}
	cfg.Default.Pollsleepinseconds, err = config.GetInt64("default", "pollsleepinseconds")
	if err != nil {
		reportError("Couldn't parse config: ", err)
	}
	cfg.Aws.Region, err = config.GetString("aws", "region")
	if err != nil {
		reportError("Couldn't parse config: ", err)
	}

	var arrayString string
	var stringsArray []string

	arrayString, err = config.GetString("s3", "buckets")
	if err != nil {
		reportError("Couldn't parse config: ", err)
	}
	stringsArray = strings.Split(arrayString, ",")
	cfg.S3.Buckets = make([]string, len(stringsArray))
	for i, _ := range stringsArray {
		cfg.S3.Buckets[i] = strings.TrimSpace(stringsArray[i])
	}

	arrayString, err = config.GetString("s3", "prefixes")
	if err != nil {
		reportError("Couldn't parse config: ", err)
	}
	stringsArray = strings.Split(arrayString, ",")
	cfg.S3.Prefixes = make([]string, len(stringsArray))
	for i, _ := range stringsArray {
		cfg.S3.Prefixes[i] = strings.TrimSpace(stringsArray[i])
	}

	arrayString, err = config.GetString("redshift", "tables")
	if err != nil {
		reportError("Couldn't parse config: ", err)
	}
	stringsArray = strings.Split(arrayString, ",")
	cfg.Redshift.Tables = make([]string, len(stringsArray))
	for i, _ := range stringsArray {
		cfg.Redshift.Tables[i] = strings.TrimSpace(stringsArray[i])
	}

	cfg.Redshift.Host, err = config.GetString("redshift", "host")
	if err != nil {
		reportError("Couldn't parse config: ", err)
	}
	cfg.Redshift.Port, err = config.GetInt64("redshift", "port")
	if err != nil {
		reportError("Couldn't parse config: ", err)
	}
	cfg.Redshift.Database, err = config.GetString("redshift", "database")
	if err != nil {
		reportError("Couldn't parse config: ", err)
	}
	cfg.Redshift.User, err = config.GetString("redshift", "user")
	if err != nil {
		reportError("Couldn't parse config: ", err)
	}
	cfg.Redshift.Password, err = config.GetString("redshift", "password")
	if err != nil {
		reportError("Couldn't parse config: ", err)
	}
	cfg.Redshift.Emptyasnull, err = config.GetBool("redshift", "emptyasnull")
	if err != nil {
		reportError("Couldn't parse config: ", err)
	}
	cfg.Redshift.Blanksasnull, err = config.GetBool("redshift", "blanksasnull")
	if err != nil {
		reportError("Couldn't parse config: ", err)
	}
	cfg.Redshift.Fillrecord, err = config.GetBool("redshift", "fillrecord")
	if err != nil {
		reportError("Couldn't parse config: ", err)
	}
	cfg.Redshift.Maxerror, err = config.GetInt64("redshift", "maxerror")
	if err != nil {
		reportError("Couldn't parse config: ", err)
	}
	cfg.Redshift.JsonPath, err = config.GetString("redshift", "json_path")
	if err != nil {
		reportError("Couldn't parse config: ", err)
	}
	cfg.Redshift.TimeFormat, err = config.GetString("redshift", "time_format")
	if err != nil {
		reportError("Couldn't parse config: ", err)
	}
	cfg.Redshift.Gzip, err = config.GetBool("redshift", "gzip")
	if err != nil {
		reportError("Couldn't parse config: ", err)
	}
	cfg.Redshift.TruncateColumns, err = config.GetBool("redshift", "truncate_columns")
	if err != nil {
		reportError("Couldn't parse config: ", err)
	}
}
Beispiel #2
0
func main() {
	flag.Parse() // Read argv

	if shouldOutputVersion {
		fmt.Printf("kafka-s3-consumer %s\n", VERSION)
		os.Exit(0)
	}

	config, err := configfile.ReadConfigFile(configFilename)
	if err != nil {
		fmt.Printf("Couldn't read config file %s because: %#v\n", configFilename, err)
		panic(err)
	}

	// Read configuration file
	host, _ := config.GetString("kafka", "host")
	debug, _ = config.GetBool("default", "debug")
	bufferMaxSizeInByes, _ := config.GetInt64("default", "maxchunksizebytes")
	bufferMaxAgeInMinutes, _ := config.GetInt64("default", "maxchunkagemins")
	port, _ := config.GetString("kafka", "port")
	hostname := fmt.Sprintf("%s:%s", host, port)
	awsKey, _ := config.GetString("s3", "accesskey")
	awsSecret, _ := config.GetString("s3", "secretkey")
	awsRegion, _ := config.GetString("s3", "region")
	s3BucketName, _ := config.GetString("s3", "bucket")
	s3bucket := s3.New(aws.Auth{AccessKey: awsKey, SecretKey: awsSecret}, aws.Regions[awsRegion]).Bucket(s3BucketName)

	kafkaPollSleepMilliSeconds, _ := config.GetInt64("default", "pollsleepmillis")
	maxSize, _ := config.GetInt64("kafka", "maxmessagesize")
	tempfilePath, _ := config.GetString("default", "filebufferpath")
	topicsRaw, _ := config.GetString("kafka", "topics")
	topics := strings.Split(topicsRaw, ",")
	for i, _ := range topics {
		topics[i] = strings.TrimSpace(topics[i])
	}
	partitionsRaw, _ := config.GetString("kafka", "partitions")
	partitionStrings := strings.Split(partitionsRaw, ",")
	partitions := make([]int64, len(partitionStrings))
	for i, _ := range partitionStrings {
		partitions[i], _ = strconv.ParseInt(strings.TrimSpace(partitionStrings[i]), 10, 64)
	}

	// Fetch Offsets from S3 (look for last written file and guid)
	if debug {
		fmt.Printf("Fetching offsets for each topic from s3 bucket %s ...\n", s3bucket.Name)
	}
	offsets := make([]uint64, len(topics))
	for i, _ := range offsets {
		prefix := S3TopicPartitionPrefix(&topics[i], partitions[i])
		if debug {
			fmt.Printf("  Looking at %s object versions: ", prefix)
		}
		latestKey, err := LastS3KeyWithPrefix(s3bucket, &prefix)
		if err != nil {
			panic(err)
		}

		if debug {
			fmt.Printf("Got: %#v\n", latestKey)
		}

		if len(latestKey) == 0 { // no keys found, there aren't any files written, so start at 0 offset
			offsets[i] = 0
			if debug {
				fmt.Printf("  No s3 object found, assuming Offset:%d\n", offsets[i])
			}
		} else { // if a key was found we have to open the object and find the last offset
			if debug {
				fmt.Printf("  Found s3 object %s, got: ", latestKey)
			}
			contentBytes, err := s3bucket.Get(latestKey)
			guidPrefix := KafkaMsgGuidPrefix(&topics[i], partitions[i])
			lines := strings.Split(string(contentBytes), "\n")
			for l := len(lines) - 1; l >= 0; l-- {
				if debug {
					fmt.Printf("    Looking at Line '%s'\n", lines[l])
				}
				if strings.HasPrefix(lines[l], guidPrefix) { // found a line with a guid, extract offset and escape out
					guidSplits := strings.SplitN(strings.SplitN(lines[l], "|", 2)[0], guidPrefix, 2)
					offsetString := guidSplits[len(guidSplits)-1]
					offsets[i], err = strconv.ParseUint(offsetString, 10, 64)
					if err != nil {
						panic(err)
					}
					if debug {
						fmt.Printf("OffsetString:%s(L#%d), Offset:%d\n", offsetString, l, offsets[i])
					}
					break
				}
			}
		}
	}

	if debug {
		fmt.Printf("Making sure chunkbuffer directory structure exists at %s\n", tempfilePath)
	}
	err = os.MkdirAll(tempfilePath, 0700)
	if err != nil {
		fmt.Errorf("Error ensuring chunkbuffer directory structure %s: %#v\n", tempfilePath, err)
		panic(err)
	}

	if debug {
		fmt.Printf("Watching %d topics, opening a chunkbuffer for each.\n", len(topics))
	}
	buffers := make([]*ChunkBuffer, len(topics))
	for i, _ := range topics {
		buffers[i] = &ChunkBuffer{FilePath: &tempfilePath,
			MaxSizeInBytes: bufferMaxSizeInByes,
			MaxAgeInMins:   bufferMaxAgeInMinutes,
			Topic:          &topics[i],
			Partition:      partitions[i],
			Offset:         offsets[i],
		}
		buffers[i].CreateBufferFileOrPanic()
		if debug {
			fmt.Printf("Consumer[%s#%d][chunkbuffer]: %s\n", hostname, i, buffers[i].File.Name())
		}
	}

	if debug {
		fmt.Printf("Setting up a broker for each of the %d topics.\n", len(topics))
	}
	brokers := make([]*kafka.BrokerConsumer, len(topics))
	for i, _ := range partitionStrings {
		fmt.Printf("Setup Consumer[%s#%d]: { topic: %s, partition: %d, offset: %d, maxMessageSize: %d }\n",
			hostname,
			i,
			topics[i],
			partitions[i],
			offsets[i],
			maxSize,
		)
		brokers[i] = kafka.NewBrokerConsumer(hostname, topics[i], int(partitions[i]), uint64(offsets[i]), uint32(maxSize))
	}

	if debug {
		fmt.Printf("Brokers created, starting to listen with %d brokers...\n", len(brokers))
	}

	brokerFinishes := make(chan bool, len(brokers))
	for idx, currentBroker := range brokers {
		go func(i int, broker *kafka.BrokerConsumer) {
			quitSignal := make(chan os.Signal, 1)
			signal.Notify(quitSignal, os.Interrupt)
			consumedCount, skippedCount, err := broker.ConsumeUntilQuit(kafkaPollSleepMilliSeconds, quitSignal, func(msg *kafka.Message) {
				if msg != nil {
					if debug {
						fmt.Printf("`%s` { ", topics[i])
						msg.Print()
						fmt.Printf("}\n")
					}
					buffers[i].PutMessage(msg)
				}

				// check for max size and max age ... if over, rotate
				// to new buffer file and upload the old one.
				if buffers[i].NeedsRotation() {
					rotatedOutBuffer := buffers[i]

					if debug {
						fmt.Printf("Broker#%d: Log Rotation needed! Rotating out of %s\n", i, rotatedOutBuffer.File.Name())
					}

					buffers[i] = &ChunkBuffer{FilePath: &tempfilePath,
						MaxSizeInBytes: bufferMaxSizeInByes,
						MaxAgeInMins:   bufferMaxAgeInMinutes,
						Topic:          &topics[i],
						Partition:      partitions[i],
						Offset:         msg.Offset(),
					}
					buffers[i].CreateBufferFileOrPanic()

					if debug {
						fmt.Printf("Broker#%d: Rotating into %s\n", i, buffers[i].File.Name())
					}

					rotatedOutBuffer.StoreToS3AndRelease(s3bucket)
				}
			})

			if err != nil {
				fmt.Printf("ERROR in Broker#%d:\n", i)
				panic(err)
			}

			if debug {
				fmt.Printf("Quit signal handled by Broker Consumer #%d (Topic `%s`)\n", i, topics[i])
				fmt.Printf("%s Report:  %d messages successfully consumed, %d messages skipped (typically corrupted, check logs)\n", topics[i], consumedCount, skippedCount)
			}

			// buffer stopped, let's clean up nicely
			buffers[i].StoreToS3AndRelease(s3bucket)

			brokerFinishes <- true
		}(idx, currentBroker)
	}

	<-brokerFinishes

	fmt.Printf("All %d brokers finished.\n", len(brokers))
}