func parseConfigfile() { config, err := configfile.ReadConfigFile(cfgFile) if err != nil { fmt.Printf("Couldn't read config file %s because: %#v\n", cfgFile, err) panic(err) } // Prioritize config values required to send SNS notifications cfg.Aws.Accesskey, err = config.GetString("aws", "accesskey") if err != nil { reportError("Couldn't parse config: ", err) } cfg.Aws.Secretkey, err = config.GetString("aws", "secretkey") if err != nil { reportError("Couldn't parse config: ", err) } cfg.Sns.FailureNotifications, err = config.GetBool("sns", "failure_notifications") if err != nil { reportError("Couldn't parse config: ", err) } cfg.Sns.Topic, err = config.GetString("sns", "topic") if err != nil { reportError("Couldn't parse config: ", err) } // Everything else cfg.Default.Debug, err = config.GetBool("default", "debug") if err != nil { reportError("Couldn't parse config: ", err) } cfg.Default.Pollsleepinseconds, err = config.GetInt64("default", "pollsleepinseconds") if err != nil { reportError("Couldn't parse config: ", err) } cfg.Aws.Region, err = config.GetString("aws", "region") if err != nil { reportError("Couldn't parse config: ", err) } var arrayString string var stringsArray []string arrayString, err = config.GetString("s3", "buckets") if err != nil { reportError("Couldn't parse config: ", err) } stringsArray = strings.Split(arrayString, ",") cfg.S3.Buckets = make([]string, len(stringsArray)) for i, _ := range stringsArray { cfg.S3.Buckets[i] = strings.TrimSpace(stringsArray[i]) } arrayString, err = config.GetString("s3", "prefixes") if err != nil { reportError("Couldn't parse config: ", err) } stringsArray = strings.Split(arrayString, ",") cfg.S3.Prefixes = make([]string, len(stringsArray)) for i, _ := range stringsArray { cfg.S3.Prefixes[i] = strings.TrimSpace(stringsArray[i]) } arrayString, err = config.GetString("redshift", "tables") if err != nil { reportError("Couldn't parse config: ", err) } stringsArray = strings.Split(arrayString, ",") cfg.Redshift.Tables = make([]string, len(stringsArray)) for i, _ := range stringsArray { cfg.Redshift.Tables[i] = strings.TrimSpace(stringsArray[i]) } cfg.Redshift.Host, err = config.GetString("redshift", "host") if err != nil { reportError("Couldn't parse config: ", err) } cfg.Redshift.Port, err = config.GetInt64("redshift", "port") if err != nil { reportError("Couldn't parse config: ", err) } cfg.Redshift.Database, err = config.GetString("redshift", "database") if err != nil { reportError("Couldn't parse config: ", err) } cfg.Redshift.User, err = config.GetString("redshift", "user") if err != nil { reportError("Couldn't parse config: ", err) } cfg.Redshift.Password, err = config.GetString("redshift", "password") if err != nil { reportError("Couldn't parse config: ", err) } cfg.Redshift.Emptyasnull, err = config.GetBool("redshift", "emptyasnull") if err != nil { reportError("Couldn't parse config: ", err) } cfg.Redshift.Blanksasnull, err = config.GetBool("redshift", "blanksasnull") if err != nil { reportError("Couldn't parse config: ", err) } cfg.Redshift.Fillrecord, err = config.GetBool("redshift", "fillrecord") if err != nil { reportError("Couldn't parse config: ", err) } cfg.Redshift.Maxerror, err = config.GetInt64("redshift", "maxerror") if err != nil { reportError("Couldn't parse config: ", err) } cfg.Redshift.JsonPath, err = config.GetString("redshift", "json_path") if err != nil { reportError("Couldn't parse config: ", err) } cfg.Redshift.TimeFormat, err = config.GetString("redshift", "time_format") if err != nil { reportError("Couldn't parse config: ", err) } cfg.Redshift.Gzip, err = config.GetBool("redshift", "gzip") if err != nil { reportError("Couldn't parse config: ", err) } cfg.Redshift.TruncateColumns, err = config.GetBool("redshift", "truncate_columns") if err != nil { reportError("Couldn't parse config: ", err) } }
func main() { flag.Parse() // Read argv if shouldOutputVersion { fmt.Printf("kafka-s3-consumer %s\n", VERSION) os.Exit(0) } config, err := configfile.ReadConfigFile(configFilename) if err != nil { fmt.Printf("Couldn't read config file %s because: %#v\n", configFilename, err) panic(err) } // Read configuration file host, _ := config.GetString("kafka", "host") debug, _ = config.GetBool("default", "debug") bufferMaxSizeInByes, _ := config.GetInt64("default", "maxchunksizebytes") bufferMaxAgeInMinutes, _ := config.GetInt64("default", "maxchunkagemins") port, _ := config.GetString("kafka", "port") hostname := fmt.Sprintf("%s:%s", host, port) awsKey, _ := config.GetString("s3", "accesskey") awsSecret, _ := config.GetString("s3", "secretkey") awsRegion, _ := config.GetString("s3", "region") s3BucketName, _ := config.GetString("s3", "bucket") s3bucket := s3.New(aws.Auth{AccessKey: awsKey, SecretKey: awsSecret}, aws.Regions[awsRegion]).Bucket(s3BucketName) kafkaPollSleepMilliSeconds, _ := config.GetInt64("default", "pollsleepmillis") maxSize, _ := config.GetInt64("kafka", "maxmessagesize") tempfilePath, _ := config.GetString("default", "filebufferpath") topicsRaw, _ := config.GetString("kafka", "topics") topics := strings.Split(topicsRaw, ",") for i, _ := range topics { topics[i] = strings.TrimSpace(topics[i]) } partitionsRaw, _ := config.GetString("kafka", "partitions") partitionStrings := strings.Split(partitionsRaw, ",") partitions := make([]int64, len(partitionStrings)) for i, _ := range partitionStrings { partitions[i], _ = strconv.ParseInt(strings.TrimSpace(partitionStrings[i]), 10, 64) } // Fetch Offsets from S3 (look for last written file and guid) if debug { fmt.Printf("Fetching offsets for each topic from s3 bucket %s ...\n", s3bucket.Name) } offsets := make([]uint64, len(topics)) for i, _ := range offsets { prefix := S3TopicPartitionPrefix(&topics[i], partitions[i]) if debug { fmt.Printf(" Looking at %s object versions: ", prefix) } latestKey, err := LastS3KeyWithPrefix(s3bucket, &prefix) if err != nil { panic(err) } if debug { fmt.Printf("Got: %#v\n", latestKey) } if len(latestKey) == 0 { // no keys found, there aren't any files written, so start at 0 offset offsets[i] = 0 if debug { fmt.Printf(" No s3 object found, assuming Offset:%d\n", offsets[i]) } } else { // if a key was found we have to open the object and find the last offset if debug { fmt.Printf(" Found s3 object %s, got: ", latestKey) } contentBytes, err := s3bucket.Get(latestKey) guidPrefix := KafkaMsgGuidPrefix(&topics[i], partitions[i]) lines := strings.Split(string(contentBytes), "\n") for l := len(lines) - 1; l >= 0; l-- { if debug { fmt.Printf(" Looking at Line '%s'\n", lines[l]) } if strings.HasPrefix(lines[l], guidPrefix) { // found a line with a guid, extract offset and escape out guidSplits := strings.SplitN(strings.SplitN(lines[l], "|", 2)[0], guidPrefix, 2) offsetString := guidSplits[len(guidSplits)-1] offsets[i], err = strconv.ParseUint(offsetString, 10, 64) if err != nil { panic(err) } if debug { fmt.Printf("OffsetString:%s(L#%d), Offset:%d\n", offsetString, l, offsets[i]) } break } } } } if debug { fmt.Printf("Making sure chunkbuffer directory structure exists at %s\n", tempfilePath) } err = os.MkdirAll(tempfilePath, 0700) if err != nil { fmt.Errorf("Error ensuring chunkbuffer directory structure %s: %#v\n", tempfilePath, err) panic(err) } if debug { fmt.Printf("Watching %d topics, opening a chunkbuffer for each.\n", len(topics)) } buffers := make([]*ChunkBuffer, len(topics)) for i, _ := range topics { buffers[i] = &ChunkBuffer{FilePath: &tempfilePath, MaxSizeInBytes: bufferMaxSizeInByes, MaxAgeInMins: bufferMaxAgeInMinutes, Topic: &topics[i], Partition: partitions[i], Offset: offsets[i], } buffers[i].CreateBufferFileOrPanic() if debug { fmt.Printf("Consumer[%s#%d][chunkbuffer]: %s\n", hostname, i, buffers[i].File.Name()) } } if debug { fmt.Printf("Setting up a broker for each of the %d topics.\n", len(topics)) } brokers := make([]*kafka.BrokerConsumer, len(topics)) for i, _ := range partitionStrings { fmt.Printf("Setup Consumer[%s#%d]: { topic: %s, partition: %d, offset: %d, maxMessageSize: %d }\n", hostname, i, topics[i], partitions[i], offsets[i], maxSize, ) brokers[i] = kafka.NewBrokerConsumer(hostname, topics[i], int(partitions[i]), uint64(offsets[i]), uint32(maxSize)) } if debug { fmt.Printf("Brokers created, starting to listen with %d brokers...\n", len(brokers)) } brokerFinishes := make(chan bool, len(brokers)) for idx, currentBroker := range brokers { go func(i int, broker *kafka.BrokerConsumer) { quitSignal := make(chan os.Signal, 1) signal.Notify(quitSignal, os.Interrupt) consumedCount, skippedCount, err := broker.ConsumeUntilQuit(kafkaPollSleepMilliSeconds, quitSignal, func(msg *kafka.Message) { if msg != nil { if debug { fmt.Printf("`%s` { ", topics[i]) msg.Print() fmt.Printf("}\n") } buffers[i].PutMessage(msg) } // check for max size and max age ... if over, rotate // to new buffer file and upload the old one. if buffers[i].NeedsRotation() { rotatedOutBuffer := buffers[i] if debug { fmt.Printf("Broker#%d: Log Rotation needed! Rotating out of %s\n", i, rotatedOutBuffer.File.Name()) } buffers[i] = &ChunkBuffer{FilePath: &tempfilePath, MaxSizeInBytes: bufferMaxSizeInByes, MaxAgeInMins: bufferMaxAgeInMinutes, Topic: &topics[i], Partition: partitions[i], Offset: msg.Offset(), } buffers[i].CreateBufferFileOrPanic() if debug { fmt.Printf("Broker#%d: Rotating into %s\n", i, buffers[i].File.Name()) } rotatedOutBuffer.StoreToS3AndRelease(s3bucket) } }) if err != nil { fmt.Printf("ERROR in Broker#%d:\n", i) panic(err) } if debug { fmt.Printf("Quit signal handled by Broker Consumer #%d (Topic `%s`)\n", i, topics[i]) fmt.Printf("%s Report: %d messages successfully consumed, %d messages skipped (typically corrupted, check logs)\n", topics[i], consumedCount, skippedCount) } // buffer stopped, let's clean up nicely buffers[i].StoreToS3AndRelease(s3bucket) brokerFinishes <- true }(idx, currentBroker) } <-brokerFinishes fmt.Printf("All %d brokers finished.\n", len(brokers)) }