func (input *FilePollingInput) Run(runner pipeline.InputRunner, helper pipeline.PluginHelper) error { input.runner = runner input.hostname = helper.PipelineConfig().Hostname() tickChan := runner.Ticker() sRunner := runner.NewSplitterRunner("") if !sRunner.UseMsgBytes() { sRunner.SetPackDecorator(input.packDecorator) } for { select { case <-input.stop: return nil case <-tickChan: } f, err := os.Open(input.FilePath) if err != nil { runner.LogError(fmt.Errorf("Error opening file: %s", err.Error())) continue } for err == nil { err = sRunner.SplitStream(f, nil) if err != io.EOF && err != nil { runner.LogError(fmt.Errorf("Error reading file: %s", err.Error())) } } } return nil }
// Creates deliverer and stop channel and starts the provided LogstreamInput. func (li *LogstreamerInput) startLogstreamInput(logstream *LogstreamInput, i int, ir p.InputRunner, h p.PluginHelper) { stop := make(chan chan bool, 1) token := strconv.Itoa(i) deliverer := ir.NewDeliverer(token) sRunner := ir.NewSplitterRunner(token) li.stopLogstreamChans = append(li.stopLogstreamChans, stop) go logstream.Run(ir, h, stop, deliverer, sRunner) }
func (input *Sqs3Input) Run(runner pipeline.InputRunner, helper pipeline.PluginHelper) error { // initialize input.runner = runner input.sqs = sqs.New(session.New()) input.s3 = s3.New(session.New()) queue_url, err := get_queue(input.sqs, input.SqsQueue) if err != nil { return err } input.queue_url = queue_url //input.hostname = helper.PipelineConfig().Hostname() tickChan := runner.Ticker() sRunner := runner.NewSplitterRunner("") if !sRunner.UseMsgBytes() { sRunner.SetPackDecorator(input.packDecorator) } defer sRunner.Done() for { select { case <-input.stop: return nil case <-tickChan: } receipt_handle, bucket, key, err := receive_from_queue(input.sqs, input.queue_url) if err != nil { runner.LogError(fmt.Errorf("Error reading queue: %s", err.Error())) continue } o, _, err := get_object(input.s3, bucket, key) if err != nil { runner.LogError(fmt.Errorf("Error opening file: %s", err.Error())) if aws_err := awserr.Error(err); aws_err != nil { f aws_err.Code == "NoSuchBucket" or aws_err.Code == "NoSuchKey" { delete_message(input.sqs, input.queue_url, receipt_handle) } } continue } for err == nil { err = sRunner.SplitStream(o, nil) if err != io.EOF && err != nil { runner.LogError(fmt.Errorf("Error reading file: %s", err.Error())) } } o.Close() } }
func (input *S3SplitFileInput) fetcher(runner pipeline.InputRunner, wg *sync.WaitGroup, workerId uint32) { var ( s3Key string startTime time.Time duration float64 ) fetcherName := fmt.Sprintf("S3Reader%d", workerId) deliverer := runner.NewDeliverer(fetcherName) defer deliverer.Done() splitterRunner := runner.NewSplitterRunner(fetcherName) ok := true for ok { select { case s3Key, ok = <-input.listChan: if !ok { // Channel is closed => we're shutting down, exit cleanly. // runner.LogMessage("Fetcher all done! shutting down.") break } startTime = time.Now().UTC() err := input.readS3File(runner, &deliverer, &splitterRunner, s3Key) atomic.AddInt64(&input.processFileCount, 1) leftovers := splitterRunner.GetRemainingData() lenLeftovers := len(leftovers) if lenLeftovers > 0 { atomic.AddInt64(&input.processFileDiscardedBytes, int64(lenLeftovers)) runner.LogError(fmt.Errorf("Trailing data, possible corruption: %d bytes left in stream at EOF: %s", lenLeftovers, s3Key)) } if err != nil && err != io.EOF { runner.LogError(fmt.Errorf("Error reading %s: %s", s3Key, err)) atomic.AddInt64(&input.processFileFailures, 1) continue } duration = time.Now().UTC().Sub(startTime).Seconds() runner.LogMessage(fmt.Sprintf("Successfully fetched %s in %.2fs ", s3Key, duration)) case <-input.stop: for _ = range input.listChan { // Drain the channel without processing the files. // Technically the S3Iterator can still add one back on to the // channel but this ensures there is room so it won't block. } ok = false } } wg.Done() }
func (k *KafkaInput) Run(ir pipeline.InputRunner, h pipeline.PluginHelper) (err error) { sRunner := ir.NewSplitterRunner("") defer func() { k.partitionConsumer.Close() k.consumer.Close() if k.checkpointFile != nil { k.checkpointFile.Close() } sRunner.Done() }() k.ir = ir k.stopChan = make(chan bool) var ( hostname = k.pConfig.Hostname() event *sarama.ConsumerMessage cError *sarama.ConsumerError ok bool n int ) packDec := func(pack *pipeline.PipelinePack) { pack.Message.SetType("heka.kafka") pack.Message.SetLogger(k.name) pack.Message.SetHostname(hostname) k.addField(pack, "Key", event.Key, "") k.addField(pack, "Topic", event.Topic, "") k.addField(pack, "Partition", event.Partition, "") k.addField(pack, "Offset", event.Offset, "") } if !sRunner.UseMsgBytes() { sRunner.SetPackDecorator(packDec) } eventChan := k.partitionConsumer.Messages() cErrChan := k.partitionConsumer.Errors() for { select { case event, ok = <-eventChan: if !ok { return nil } atomic.AddInt64(&k.processMessageCount, 1) if n, err = sRunner.SplitBytes(event.Value, nil); err != nil { ir.LogError(fmt.Errorf("processing message from topic %s: %s", event.Topic, err)) } if n > 0 && n != len(event.Value) { ir.LogError(fmt.Errorf("extra data dropped in message from topic %s", event.Topic)) } if k.config.OffsetMethod == "Manual" { if err = k.writeCheckpoint(event.Offset + 1); err != nil { return err } } case cError, ok = <-cErrChan: if !ok { // Don't exit until the eventChan is closed. ok = true continue } if cError.Err == sarama.ErrOffsetOutOfRange { ir.LogError(fmt.Errorf( "removing the out of range checkpoint file and stopping")) if k.checkpointFile != nil { k.checkpointFile.Close() k.checkpointFile = nil } if err := os.Remove(k.checkpointFilename); err != nil { ir.LogError(err) } return err } atomic.AddInt64(&k.processMessageFailures, 1) ir.LogError(cError.Err) case <-k.stopChan: return nil } } }
func (input *S3OffsetInput) fetcher(runner pipeline.InputRunner, wg *sync.WaitGroup, workerId uint32) { var ( loc MessageLocation startTime time.Time duration float64 headers map[string][]string record []byte err error ) headers = map[string][]string{ "Range": []string{""}, } fetcherName := fmt.Sprintf("S3Reader%d", workerId) deliverer := runner.NewDeliverer(fetcherName) defer deliverer.Done() splitterRunner := runner.NewSplitterRunner(fetcherName) ok := true for ok { select { case loc, ok = <-input.offsetChan: if !ok { // Channel is closed => we're shutting down, exit cleanly. runner.LogMessage("Fetcher all done! shutting down.") break } startTime = time.Now().UTC() // Read one message from the given location headers["Range"][0] = fmt.Sprintf("bytes=%d-%d", loc.Offset, loc.Offset+loc.Length-1) atomic.AddInt64(&input.processMessageCount, 1) atomic.AddInt64(&input.processMessageBytes, int64(loc.Length)) for attempt := uint32(1); attempt <= input.S3Retries; attempt++ { record, err = getClientRecord(input.bucket, &loc, headers) if err != nil { runner.LogMessage(fmt.Sprintf("Error #%d fetching %s @ %d+%d: %s\n", attempt, loc.Key, loc.Offset, loc.Length, err)) } else { break } } if err != nil { atomic.AddInt64(&input.processMessageFailures, 1) continue } splitterRunner.DeliverRecord(record, deliverer) duration = time.Now().UTC().Sub(startTime).Seconds() runner.LogMessage(fmt.Sprintf("Successfully fetched %s in %.2fs ", loc.Key, duration)) case <-input.stop: runner.LogMessage("Stopping fetcher...") for _ = range input.offsetChan { // Drain the channel without processing anything. } ok = false } } wg.Done() }
func (k *KafkaConsumerGroupInput) Run(ir pipeline.InputRunner, h pipeline.PluginHelper) (err error) { sRunner := ir.NewSplitterRunner("") defer func() { if err := k.consumer.Close(); err != nil { k.ir.LogError(fmt.Errorf("error closing the consumer: %s", err.Error())) } sRunner.Done() }() k.ir = ir go func() { for err := range k.consumer.Errors() { atomic.AddInt64(&k.processMessageFailures, 1) ir.LogError(err) } }() var ( hostname = k.pConfig.Hostname() event *sarama.ConsumerMessage ok bool n int ) packDec := func(pack *pipeline.PipelinePack) { pack.Message.SetType("heka.kafka") pack.Message.SetLogger(k.name) pack.Message.SetHostname(hostname) k.addField(pack, "Key", event.Key, "") k.addField(pack, "Topic", event.Topic, "") k.addField(pack, "Partition", event.Partition, "") k.addField(pack, "Offset", event.Offset, "") } if !sRunner.UseMsgBytes() { sRunner.SetPackDecorator(packDec) } offsets := make(map[string]map[int32]int64) for { select { case event, ok = <-k.consumer.Messages(): if !ok { return } if offsets[event.Topic] == nil { offsets[event.Topic] = make(map[int32]int64) } if offsets[event.Topic][event.Partition] != 0 && offsets[event.Topic][event.Partition] != event.Offset-1 { ir.LogError(fmt.Errorf("unexpected offset on %s:%d. Expected %d, found %d, diff %d.\n", event.Topic, event.Partition, offsets[event.Topic][event.Partition]+1, event.Offset, event.Offset-offsets[event.Topic][event.Partition]+1)) } atomic.AddInt64(&k.processMessageCount, 1) if n, err = sRunner.SplitBytes(event.Value, nil); err != nil { ir.LogError(fmt.Errorf("processing message from topic %s: %s", event.Topic, err)) } if n > 0 && n != len(event.Value) { ir.LogError(fmt.Errorf("extra data dropped in message from topic %s", event.Topic)) } offsets[event.Topic][event.Partition] = event.Offset k.consumer.CommitUpto(event) case <-k.stopChan: return } } }