// Standard text log file parser func (lsi *LogstreamInput) payloadParser(ir p.InputRunner, deliver Deliver, stop chan chan bool) (err error) { var ( pack *p.PipelinePack record []byte n int ) for err == nil { select { case lsi.stopped = <-stop: return default: } n, record, err = lsi.parser.Parse(lsi.stream) if err == io.ErrShortBuffer { ir.LogError(fmt.Errorf("record exceeded MAX_RECORD_SIZE %d", message.MAX_RECORD_SIZE)) err = nil // non-fatal, keep going } if n > 0 { lsi.stream.FlushBuffer(n) } if len(record) > 0 { payload := string(record) pack = <-ir.InChan() pack.Message.SetUuid(uuid.NewRandom()) pack.Message.SetTimestamp(time.Now().UnixNano()) pack.Message.SetType("logfile") pack.Message.SetHostname(lsi.hostName) pack.Message.SetLogger(lsi.loggerIdent) pack.Message.SetPayload(payload) deliver(pack) lsi.countRecord() } } return }
// TODO: handle "no such file" func (input *S3SplitFileInput) readS3File(runner pipeline.InputRunner, d *pipeline.Deliverer, sr *pipeline.SplitterRunner, s3Key string) (err error) { runner.LogMessage(fmt.Sprintf("Preparing to read: %s", s3Key)) if input.bucket == nil { runner.LogMessage(fmt.Sprintf("Dude, where's my bucket: %s", s3Key)) return } var lastGoodOffset uint64 var attempt uint32 RetryS3: for attempt = 1; attempt <= input.S3Retries; attempt++ { for r := range S3FileIterator(input.bucket, s3Key, lastGoodOffset) { record := r.Record err := r.Err if err != nil && err != io.EOF { runner.LogError(fmt.Errorf("Error in attempt %d reading %s at offset %d: %s", attempt, s3Key, lastGoodOffset, err)) atomic.AddInt64(&input.processMessageFailures, 1) continue RetryS3 } if len(record) > 0 { lastGoodOffset += uint64(r.BytesRead) atomic.AddInt64(&input.processMessageCount, 1) atomic.AddInt64(&input.processMessageBytes, int64(len(record))) (*sr).DeliverRecord(record, *d) } } break } return }
// Main Logstreamer Input runner. This runner kicks off all the other // logstream inputs, and handles rescanning for updates to the filesystem that // might affect file visibility for the logstream inputs. func (li *LogstreamerInput) Run(ir p.InputRunner, h p.PluginHelper) (err error) { var ( ok bool errs *ls.MultipleError newstreams []string ) // Kick off all the current logstreams we know of i := 0 for _, logstream := range li.plugins { i++ li.startLogstreamInput(logstream, i, ir, h) } ok = true rescan := time.Tick(li.rescanInterval) // Our main rescan loop that handles shutting down for ok { select { case <-li.stopChan: ok = false returnChans := make([]chan bool, len(li.stopLogstreamChans)) // Send out all the stop signals for i, ch := range li.stopLogstreamChans { ret := make(chan bool) ch <- ret returnChans[i] = ret } // Wait for all the stops for _, ch := range returnChans { <-ch } // Close our own stopChan to indicate we shut down close(li.stopChan) case <-rescan: li.logstreamSetLock.Lock() newstreams, errs = li.logstreamSet.ScanForLogstreams() if errs.IsError() { ir.LogError(errs) } for _, name := range newstreams { stream, ok := li.logstreamSet.GetLogstream(name) if !ok { ir.LogError(fmt.Errorf("Found new logstream: %s, but couldn't fetch it.", name)) continue } lsi := NewLogstreamInput(stream, name, li.hostName) li.plugins[name] = lsi i++ li.startLogstreamInput(lsi, i, ir, h) } li.logstreamSetLock.Unlock() } } return nil }
func (rli *RedisInput) InsertMessage(ir pipeline.InputRunner, decoder pipeline.Decoder, msg string) { var ( pack *pipeline.PipelinePack e error ) // Get the InputRunner's chan to receive empty PipelinePacks packSupply := ir.InChan() pack = <-packSupply pack.Message.SetType(rli.conf.Key) pack.Message.SetLogger("Redis") pack.Message.SetPayload(msg) pack.Message.SetTimestamp(time.Now().UnixNano()) var packs []*pipeline.PipelinePack if decoder == nil { packs = []*pipeline.PipelinePack{pack} } else { packs, e = decoder.Decode(pack) } if packs != nil { for _, p := range packs { ir.Inject(p) } } else { if e != nil { ir.LogError(fmt.Errorf("Couldn't parse %s", msg)) pack.Recycle(e) } else { pack.Recycle(nil) fmt.Println("pack recycle!") } } }
func (rpsi *RedisPubSubInput) Run(ir pipeline.InputRunner, h pipeline.PluginHelper) error { var ( dRunner pipeline.DecoderRunner decoder pipeline.Decoder pack *pipeline.PipelinePack e error ok bool ) // Get the InputRunner's chan to receive empty PipelinePacks packSupply := ir.InChan() if rpsi.conf.DecoderName != "" { if dRunner, ok = h.DecoderRunner(rpsi.conf.DecoderName, fmt.Sprintf("%s-%s", ir.Name(), rpsi.conf.DecoderName)); !ok { return fmt.Errorf("Decoder not found: %s", rpsi.conf.DecoderName) } decoder = dRunner.Decoder() } //Connect to the channel psc := redis.PubSubConn{Conn: rpsi.conn} psc.PSubscribe(rpsi.conf.Channel) for { switch n := psc.Receive().(type) { case redis.PMessage: // Grab an empty PipelinePack from the InputRunner pack = <-packSupply pack.Message.SetType("redis_pub_sub") pack.Message.SetLogger(n.Channel) pack.Message.SetPayload(string(n.Data)) pack.Message.SetTimestamp(time.Now().UnixNano()) var packs []*pipeline.PipelinePack if decoder == nil { packs = []*pipeline.PipelinePack{pack} } else { packs, e = decoder.Decode(pack) } if packs != nil { for _, p := range packs { ir.Inject(p) } } else { if e != nil { ir.LogError(fmt.Errorf("Couldn't parse Redis message: %s", n.Data)) } pack.Recycle(nil) } case redis.Subscription: ir.LogMessage(fmt.Sprintf("Subscription: %s %s %d\n", n.Kind, n.Channel, n.Count)) if n.Count == 0 { return errors.New("No channel to subscribe") } case error: fmt.Printf("error: %v\n", n) return n } } return nil }
func (input *FilePollingInput) Run(runner pipeline.InputRunner, helper pipeline.PluginHelper) error { input.runner = runner input.hostname = helper.PipelineConfig().Hostname() tickChan := runner.Ticker() sRunner := runner.NewSplitterRunner("") if !sRunner.UseMsgBytes() { sRunner.SetPackDecorator(input.packDecorator) } for { select { case <-input.stop: return nil case <-tickChan: } f, err := os.Open(input.FilePath) if err != nil { runner.LogError(fmt.Errorf("Error opening file: %s", err.Error())) continue } for err == nil { err = sRunner.SplitStream(f, nil) if err != io.EOF && err != nil { runner.LogError(fmt.Errorf("Error reading file: %s", err.Error())) } } } return nil }
func (rli *RedisListInput) Run(ir pipeline.InputRunner, h pipeline.PluginHelper) error { var ( pack *pipeline.PipelinePack packs []*pipeline.PipelinePack ) // Get the InputRunner's chan to receive empty PipelinePacks inChan := ir.InChan() for { message, err := rli.conn.Do("RPOP", rli.conf.ListName) if err != nil { ir.LogError(fmt.Errorf("Redis RPOP error: %s", err)) // TODO: should reconnect redis rather than close it rli.Stop() break } if message != nil { pack = <-inChan pack.Message.SetType("redis_list") pack.Message.SetPayload(string(message.([]uint8))) packs = []*pipeline.PipelinePack{pack} if packs != nil { for _, p := range packs { ir.Inject(p) } } else { pack.Recycle(nil) } } else { time.Sleep(time.Second) } } return nil }
func (lsi *LogstreamInput) Run(ir p.InputRunner, h p.PluginHelper, stopChan chan chan bool, dRunner p.DecoderRunner) { var ( parser func(ir p.InputRunner, deliver Deliver, stop chan chan bool) error err error ) if lsi.parseFunction == "payload" { parser = lsi.payloadParser } else if lsi.parseFunction == "messageProto" { parser = lsi.messageProtoParser } // Setup our pack delivery function appropriately for the configuration deliver := func(pack *p.PipelinePack) { if dRunner == nil { ir.Inject(pack) } else { dRunner.InChan() <- pack } } // Check for more data interval interval, _ := time.ParseDuration("250ms") tick := time.Tick(interval) ok := true for ok { // Clear our error err = nil // Attempt to read as many as we can err = parser(ir, deliver, stopChan) // Save our location after reading as much as we can lsi.stream.SavePosition() lsi.recordCount = 0 if err != nil && err != io.EOF { ir.LogError(err) } // Did our parser func get stopped? if lsi.stopped != nil { ok = false continue } // Wait for our next interval, stop if needed select { case lsi.stopped = <-stopChan: ok = false case <-tick: continue } } close(lsi.stopped) }
func (s *SandboxInput) Run(ir pipeline.InputRunner, h pipeline.PluginHelper) (err error) { s.sb.InjectMessage(func(payload, payload_type, payload_name string) int { pack := <-ir.InChan() if err := proto.Unmarshal([]byte(payload), pack.Message); err != nil { pack.Recycle() return 1 } if s.tz != time.UTC { const layout = "2006-01-02T15:04:05.999999999" // remove the incorrect UTC tz info t := time.Unix(0, pack.Message.GetTimestamp()) t = t.In(time.UTC) ct, _ := time.ParseInLocation(layout, t.Format(layout), s.tz) pack.Message.SetTimestamp(ct.UnixNano()) } ir.Inject(pack) atomic.AddInt64(&s.processMessageCount, 1) atomic.AddInt64(&s.processMessageBytes, int64(len(payload))) return 0 }) ticker := ir.Ticker() for true { retval := s.sb.ProcessMessage(nil) if retval <= 0 { // Sandbox is in polling mode if retval < 0 { atomic.AddInt64(&s.processMessageFailures, 1) em := s.sb.LastError() if len(em) > 0 { ir.LogError(errors.New(em)) } } if ticker == nil { ir.LogMessage("single run completed") break } select { // block until stop or poll interval case <-s.stopChan: case <-ticker: } } else { // Sandbox is shutting down em := s.sb.LastError() if !strings.HasSuffix(em, "shutting down") { ir.LogError(errors.New(em)) } break } } s.reportLock.Lock() if s.sbc.PreserveData { err = s.sb.Destroy(s.preservationFile) } else { err = s.sb.Destroy("") } s.sb = nil s.reportLock.Unlock() return }
func (cwi *CloudwatchInput) Run(ir pipeline.InputRunner, h pipeline.PluginHelper) (err error) { cwi.stopChan = make(chan bool) cwi.req.StartTime = time.Now() ticker := time.NewTicker(cwi.pollInterval) ok := true var ( resp *cloudwatch.GetMetricStatisticsResponse point cloudwatch.Datapoint pack *pipeline.PipelinePack dim cloudwatch.Dimension ) metricLoop: for ok { select { case _, ok = <-cwi.stopChan: continue case <-ticker.C: cwi.req.EndTime = time.Now() resp, err = cwi.cw.GetMetricStatistics(cwi.req) if err != nil { ir.LogError(err) err = nil continue } for _, point = range resp.GetMetricStatisticsResult.Datapoints { pack, ok = <-ir.InChan() if !ok { break metricLoop } pack.Message.SetType("cloudwatch") for _, dim = range cwi.req.Dimensions { newField(pack, "Dimension."+dim.Name, dim.Value) } newField(pack, "Period", cwi.req.Period) newField(pack, "Average", point.Average) newField(pack, "Maximum", point.Maximum) newField(pack, "Minimum", point.Minimum) newField(pack, "SampleCount", point.SampleCount) newField(pack, "Unit", point.Unit) newField(pack, "Sum", point.Sum) pack.Message.SetUuid(uuid.NewRandom()) pack.Message.SetTimestamp(point.Timestamp.UTC().UnixNano()) pack.Message.SetLogger(cwi.namespace) pack.Message.SetPayload(cwi.req.MetricName) ir.Inject(pack) } cwi.req.StartTime = cwi.req.EndTime.Add(time.Duration(1) * time.Nanosecond) } } return nil }
func (lsi *LogstreamInput) Run(ir p.InputRunner, h p.PluginHelper, stopChan chan chan bool, deliverer p.Deliverer, sRunner p.SplitterRunner) { if !sRunner.UseMsgBytes() { sRunner.SetPackDecorator(lsi.packDecorator) } lsi.ir = ir lsi.stopChan = stopChan lsi.deliverer = deliverer lsi.sRunner = sRunner var err error // Check for more data interval interval, _ := time.ParseDuration("250ms") tick := time.Tick(interval) ok := true for ok { // Clear our error err = nil // Attempt to read and deliver as many as we can. err = lsi.deliverRecords() // Save our position if the stream hasn't done so for us. if err != io.EOF { lsi.stream.SavePosition() } lsi.recordCount = 0 if err != nil && err != io.EOF { ir.LogError(err) } // Did our parser func get stopped? if lsi.stopped != nil { ok = false continue } // Wait for our next interval, stop if needed select { case lsi.stopped = <-stopChan: ok = false case <-tick: continue } } close(lsi.stopped) deliverer.Done() sRunner.Done() }
func (input *S3SplitFileInput) Run(runner pipeline.InputRunner, helper pipeline.PluginHelper) error { // Begin listing the files (either straight from S3 or from a cache) // Write matching filenames on a "lister" channel // Read from the lister channel: // - fetch the filename // - read records from it // - write them to a "reader" channel var ( wg sync.WaitGroup i uint32 ) wg.Add(1) go func() { runner.LogMessage("Starting S3 list") iteratorLoop: for r := range S3Iterator(input.bucket, input.S3BucketPrefix, input.schema) { select { case <-input.stop: runner.LogMessage("Stopping S3 list") break iteratorLoop default: } if r.Err != nil { runner.LogError(fmt.Errorf("Error getting S3 list: %s", r.Err)) } else { basename := r.Key.Key[strings.LastIndex(r.Key.Key, "/")+1:] if input.objectMatch == nil || input.objectMatch.MatchString(basename) { runner.LogMessage(fmt.Sprintf("Found: %s", r.Key.Key)) input.listChan <- r.Key.Key } else { runner.LogMessage(fmt.Sprintf("Skipping: %s", r.Key.Key)) } } } // All done listing, close the channel runner.LogMessage("All done listing. Closing channel") close(input.listChan) wg.Done() }() // Run a pool of concurrent readers. for i = 0; i < input.S3WorkerCount; i++ { wg.Add(1) go input.fetcher(runner, &wg, i) } wg.Wait() return nil }
func (zi *ZeroMQInput) Run(ir pipeline.InputRunner, h pipeline.PluginHelper) error { // Get the InputRunner's chan to receive empty PipelinePacks packs := ir.InChan() var decoding chan<- *pipeline.PipelinePack if zi.conf.Decoder != "" { // Fetch specified decoder decoder, ok := h.DecoderSet().ByName(zi.conf.Decoder) if !ok { err := fmt.Errorf("Could not find decoder", zi.conf.Decoder) return err } // Get the decoder's receiving chan decoding = decoder.InChan() } var pack *pipeline.PipelinePack var count int var b []byte var err error // Read data from websocket broadcast chan for { b, err = zi.socket.Recv(0) if err != nil { ir.LogError(err) continue } // Grab an empty PipelinePack from the InputRunner pack = <-packs // Trim the excess empty bytes count = len(b) pack.MsgBytes = pack.MsgBytes[:count] // Copy ws bytes into pack's bytes copy(pack.MsgBytes, b) if decoding != nil { // Send pack onto decoder decoding <- pack } else { // Send pack into Heka pipeline ir.Inject(pack) } } return nil }
func splitStream(ir p.InputRunner, sRunner p.SplitterRunner, r io.ReadCloser) error { var ( record []byte longRecord []byte err error deliver bool nullSplitter bool ) // If we're using a NullSplitter we want to make sure we capture the // entire HTTP request or response body and not be subject to what we get // from a single Read() call. if _, ok := sRunner.Splitter().(*p.NullSplitter); ok { nullSplitter = true } for err == nil { deliver = true _, record, err = sRunner.GetRecordFromStream(r) if err == io.ErrShortBuffer { if sRunner.KeepTruncated() { err = fmt.Errorf("record exceeded MAX_RECORD_SIZE %d and was truncated", message.MAX_RECORD_SIZE) } else { deliver = false err = fmt.Errorf("record exceeded MAX_RECORD_SIZE %d and was dropped", message.MAX_RECORD_SIZE) } ir.LogError(err) err = nil // non-fatal, keep going } else if sRunner.IncompleteFinal() && err == io.EOF && len(record) == 0 { record = sRunner.GetRemainingData() } if len(record) > 0 && deliver { if nullSplitter { // Concatenate all the records until EOF. This should be safe // b/c NullSplitter means FindRecord will always return the // full buffer contents, we don't have to worry about // GetRecordFromStream trying to append multiple reads to a // single record and triggering an io.ErrShortBuffer error. longRecord = append(longRecord, record...) } else { sRunner.DeliverRecord(record, nil) } } } r.Close() if err == io.EOF && nullSplitter && len(longRecord) > 0 { sRunner.DeliverRecord(longRecord, nil) } return err }
func (input *S3SplitFileInput) fetcher(runner pipeline.InputRunner, wg *sync.WaitGroup, workerId uint32) { var ( s3Key string startTime time.Time duration float64 ) fetcherName := fmt.Sprintf("S3Reader%d", workerId) deliverer := runner.NewDeliverer(fetcherName) defer deliverer.Done() splitterRunner := runner.NewSplitterRunner(fetcherName) ok := true for ok { select { case s3Key, ok = <-input.listChan: if !ok { // Channel is closed => we're shutting down, exit cleanly. // runner.LogMessage("Fetcher all done! shutting down.") break } startTime = time.Now().UTC() err := input.readS3File(runner, &deliverer, &splitterRunner, s3Key) atomic.AddInt64(&input.processFileCount, 1) leftovers := splitterRunner.GetRemainingData() lenLeftovers := len(leftovers) if lenLeftovers > 0 { atomic.AddInt64(&input.processFileDiscardedBytes, int64(lenLeftovers)) runner.LogError(fmt.Errorf("Trailing data, possible corruption: %d bytes left in stream at EOF: %s", lenLeftovers, s3Key)) } if err != nil && err != io.EOF { runner.LogError(fmt.Errorf("Error reading %s: %s", s3Key, err)) atomic.AddInt64(&input.processFileFailures, 1) continue } duration = time.Now().UTC().Sub(startTime).Seconds() runner.LogMessage(fmt.Sprintf("Successfully fetched %s in %.2fs ", s3Key, duration)) case <-input.stop: for _ = range input.listChan { // Drain the channel without processing the files. // Technically the S3Iterator can still add one back on to the // channel but this ensures there is room so it won't block. } ok = false } } wg.Done() }
func (di *DockerLogInput) Run(ir pipeline.InputRunner, h pipeline.PluginHelper) error { var ( pack *pipeline.PipelinePack ok bool ) hostname := h.Hostname() go di.attachMgr.Listen(di.logstream, di.closer) // Get the InputRunner's chan to receive empty PipelinePacks packSupply := ir.InChan() ok = true var err error for ok { select { case logline := <-di.logstream: pack = <-packSupply pack.Message.SetType("DockerLog") pack.Message.SetLogger(logline.Type) // stderr or stdout pack.Message.SetHostname(hostname) // Use the host's hosntame pack.Message.SetPayload(logline.Data) pack.Message.SetTimestamp(time.Now().UnixNano()) pack.Message.SetUuid(uuid.NewRandom()) for k, v := range logline.Fields { message.NewStringField(pack.Message, k, v) } ir.Deliver(pack) case err, ok = <-di.attachErrors: if !ok { err = errors.New("Docker event channel closed") break } ir.LogError(fmt.Errorf("Attacher error: %s", err)) case err = <-di.stopChan: ok = false } } di.closer <- struct{}{} close(di.logstream) return err }
func (input *FilePollingInput) Run(runner pipeline.InputRunner, helper pipeline.PluginHelper) error { var ( data []byte pack *pipeline.PipelinePack dRunner pipeline.DecoderRunner ok bool err error ) if input.DecoderName != "" { if dRunner, ok = helper.DecoderRunner(input.DecoderName, fmt.Sprintf("%s-%s", runner.Name(), input.DecoderName)); !ok { return fmt.Errorf("Decoder not found: %s", input.DecoderName) } input.decoderChan = dRunner.InChan() } input.runner = runner hostname := helper.PipelineConfig().Hostname() packSupply := runner.InChan() tickChan := runner.Ticker() for { select { case <-input.stop: return nil case <-tickChan: } data, err = ioutil.ReadFile(input.FilePath) if err != nil { runner.LogError(fmt.Errorf("Error reading file: %s", err)) continue } pack = <-packSupply pack.Message.SetUuid(uuid.NewRandom()) pack.Message.SetTimestamp(time.Now().UnixNano()) pack.Message.SetType("heka.file.polling") pack.Message.SetHostname(hostname) pack.Message.SetPayload(string(data)) if field, err := message.NewField("TickerInterval", int(input.TickerInterval), ""); err != nil { runner.LogError(err) } else { pack.Message.AddField(field) } if field, err := message.NewField("FilePath", input.FilePath, ""); err != nil { runner.LogError(err) } else { pack.Message.AddField(field) } input.sendPack(pack) } return nil }
func (input *Sqs3Input) Run(runner pipeline.InputRunner, helper pipeline.PluginHelper) error { // initialize input.runner = runner input.sqs = sqs.New(session.New()) input.s3 = s3.New(session.New()) queue_url, err := get_queue(input.sqs, input.SqsQueue) if err != nil { return err } input.queue_url = queue_url //input.hostname = helper.PipelineConfig().Hostname() tickChan := runner.Ticker() sRunner := runner.NewSplitterRunner("") if !sRunner.UseMsgBytes() { sRunner.SetPackDecorator(input.packDecorator) } defer sRunner.Done() for { select { case <-input.stop: return nil case <-tickChan: } receipt_handle, bucket, key, err := receive_from_queue(input.sqs, input.queue_url) if err != nil { runner.LogError(fmt.Errorf("Error reading queue: %s", err.Error())) continue } o, _, err := get_object(input.s3, bucket, key) if err != nil { runner.LogError(fmt.Errorf("Error opening file: %s", err.Error())) if aws_err := awserr.Error(err); aws_err != nil { f aws_err.Code == "NoSuchBucket" or aws_err.Code == "NoSuchKey" { delete_message(input.sqs, input.queue_url, receipt_handle) } } continue } for err == nil { err = sRunner.SplitStream(o, nil) if err != io.EOF && err != nil { runner.LogError(fmt.Errorf("Error reading file: %s", err.Error())) } } o.Close() } }
func (ri *RedisMQInput) Run(ir pipeline.InputRunner, h pipeline.PluginHelper) error { // Get the InputRunner's chan to receive empty PipelinePacks packs := ir.InChan() var decoding chan<- *pipeline.PipelinePack if ri.conf.Decoder != "" { // Fetch specified decoder decoder, ok := h.DecoderRunner(ri.conf.Decoder) if !ok { err := fmt.Errorf("Could not find decoder", ri.conf.Decoder) return err } // Get the decoder's receiving chan decoding = decoder.InChan() } var pack *pipeline.PipelinePack //var p []*redismq.Package var p *redismq.Package var count int var b []byte var err error for { p, err = ri.rdconsumer.Get() if err != nil { ir.LogError(err) continue } err = p.Ack() if err != nil { ir.LogError(err) } b = []byte(p.Payload) // Grab an empty PipelinePack from the InputRunner pack = <-packs // Trim the excess empty bytes count = len(b) pack.MsgBytes = pack.MsgBytes[:count] // Copy ws bytes into pack's bytes copy(pack.MsgBytes, b) if decoding != nil { // Send pack onto decoder decoding <- pack } else { // Send pack into Heka pipeline ir.Inject(pack) } } /* checkStat := time.Tick(ri.statInterval) ok := true for ok { select { case _, ok = <-ri.stopChan: break case <-checkStat: p, err = ri.rdconsumer.MultiGet(500) if err != nil { ir.LogError(err) continue } err = p[len(p)-1].MultiAck() if err != nil { ir.LogError(err) } for _, v := range p { b = []byte(v.Payload) // Grab an empty PipelinePack from the InputRunner pack = <-packs // Trim the excess empty bytes count = len(b) pack.MsgBytes = pack.MsgBytes[:count] // Copy ws bytes into pack's bytes copy(pack.MsgBytes, b) if decoding != nil { // Send pack onto decoder decoding <- pack } else { // Send pack into Heka pipeline ir.Inject(pack) } } } } */ return nil }
func (k *KafkaInput) Run(ir pipeline.InputRunner, h pipeline.PluginHelper) (err error) { sRunner := ir.NewSplitterRunner("") defer func() { k.partitionConsumer.Close() k.consumer.Close() if k.checkpointFile != nil { k.checkpointFile.Close() } sRunner.Done() }() k.ir = ir k.stopChan = make(chan bool) var ( hostname = k.pConfig.Hostname() event *sarama.ConsumerMessage cError *sarama.ConsumerError ok bool n int ) packDec := func(pack *pipeline.PipelinePack) { pack.Message.SetType("heka.kafka") pack.Message.SetLogger(k.name) pack.Message.SetHostname(hostname) k.addField(pack, "Key", event.Key, "") k.addField(pack, "Topic", event.Topic, "") k.addField(pack, "Partition", event.Partition, "") k.addField(pack, "Offset", event.Offset, "") } if !sRunner.UseMsgBytes() { sRunner.SetPackDecorator(packDec) } eventChan := k.partitionConsumer.Messages() cErrChan := k.partitionConsumer.Errors() for { select { case event, ok = <-eventChan: if !ok { return nil } atomic.AddInt64(&k.processMessageCount, 1) if n, err = sRunner.SplitBytes(event.Value, nil); err != nil { ir.LogError(fmt.Errorf("processing message from topic %s: %s", event.Topic, err)) } if n > 0 && n != len(event.Value) { ir.LogError(fmt.Errorf("extra data dropped in message from topic %s", event.Topic)) } if k.config.OffsetMethod == "Manual" { if err = k.writeCheckpoint(event.Offset + 1); err != nil { return err } } case cError, ok = <-cErrChan: if !ok { // Don't exit until the eventChan is closed. ok = true continue } if cError.Err == sarama.ErrOffsetOutOfRange { ir.LogError(fmt.Errorf( "removing the out of range checkpoint file and stopping")) if k.checkpointFile != nil { k.checkpointFile.Close() k.checkpointFile = nil } if err := os.Remove(k.checkpointFilename); err != nil { ir.LogError(err) } return err } atomic.AddInt64(&k.processMessageFailures, 1) ir.LogError(cError.Err) case <-k.stopChan: return nil } } }
func (input *S3OffsetInput) Run(runner pipeline.InputRunner, helper pipeline.PluginHelper) error { // List offset metadata index files // For each index D >= start and <= end // Read index D // Write offsets for any desired clients to offsetChan // Meanwhile, for each item in offsetChan // Go fetch that record, inject resulting message into pipeline. var ( wg sync.WaitGroup i uint32 emptySchema Schema ) if input.metaFileName != "" { wg.Add(1) go func() { reader, err := os.Open(input.metaFileName) if err != nil { runner.LogMessage(fmt.Sprintf("Error opening metadata file '%s': %s", input.metaFileName, err)) } defer reader.Close() err = input.parseMessageLocations(reader, input.metaFileName) if err != nil { runner.LogMessage(fmt.Sprintf("Error reading metadata: %s", err)) } // All done with metadata, close the channel runner.LogMessage("All done with metadata. Closing channel") close(input.offsetChan) wg.Done() }() } else if input.metaBucket != nil { wg.Add(1) go func() { runner.LogMessage("Starting S3 list") iteratorLoop: for r := range S3Iterator(input.metaBucket, input.S3MetaBucketPrefix, emptySchema) { select { case <-input.stop: runner.LogMessage("Stopping S3 list") break iteratorLoop default: } if r.Err != nil { runner.LogError(fmt.Errorf("Error getting S3 list: %s", r.Err)) } else { base := path.Base(r.Key.Key)[0:8] // Check if r is in the desired date range. if base >= input.StartDate && base <= input.EndDate { err := input.grep(r) if err != nil { runner.LogMessage(fmt.Sprintf("Error reading index: %s", err)) } } } } // All done listing, close the channel runner.LogMessage("All done listing. Closing channel") close(input.offsetChan) wg.Done() }() } else { runner.LogMessage("Nothing to do, no metadata available. Closing channel") close(input.offsetChan) wg.Done() } // Run a pool of concurrent readers. for i = 0; i < input.S3WorkerCount; i++ { wg.Add(1) go input.fetcher(runner, &wg, i) } wg.Wait() return nil }
func (ni *NsqInput) Run(ir pipeline.InputRunner, h pipeline.PluginHelper) error { // Get the InputRunner's chan to receive empty PipelinePacks var pack *pipeline.PipelinePack var err error var dRunner pipeline.DecoderRunner var decoder pipeline.Decoder var ok bool var e error //pos := 0 //output := make([]*Message, 2) packSupply := ir.InChan() if ni.conf.Decoder != "" { if dRunner, ok = h.DecoderRunner(ni.conf.Decoder); !ok { return fmt.Errorf("Decoder not found: %s", ni.conf.Decoder) } decoder = dRunner.Decoder() } err = ni.nsqReader.ConnectToLookupd(ni.conf.Address) if err != nil { ir.LogError(errors.New("ConnectToLookupd failed.")) } header := &message.Header{} stopped := false //readLoop: for !stopped { //stopped = true select { case <-ni.stopChan: ir.LogError(errors.New("get ni.stopChan, set stopped=true")) stopped = true default: pack = <-packSupply m, ok1 := <-ni.handler.logChan if !ok1 { stopped = true break } if ni.conf.Serialize { if dRunner == nil { pack.Recycle() ir.LogError(errors.New("Serialize messages require a decoder.")) } //header := &message.Header{} _, msgOk := findMessage(m.msg.Body, header, &(pack.MsgBytes)) if msgOk { dRunner.InChan() <- pack } else { pack.Recycle() ir.LogError(errors.New("Can't find Heka message.")) } header.Reset() } else { //ir.LogError(fmt.Errorf("message body: %s", m.msg.Body)) pack.Message.SetType("nsq") pack.Message.SetPayload(string(m.msg.Body)) pack.Message.SetTimestamp(time.Now().UnixNano()) var packs []*pipeline.PipelinePack if decoder == nil { packs = []*pipeline.PipelinePack{pack} } else { packs, e = decoder.Decode(pack) } if packs != nil { for _, p := range packs { ir.Inject(p) } } else { if e != nil { ir.LogError(fmt.Errorf("Couldn't parse Nsq message: %s", m.msg.Body)) } pack.Recycle() } } m.returnChannel <- &nsq.FinishedMessage{m.msg.Id, 0, true} /* output[pos] = m pos++ if pos == 2 { for pos > 0 { pos-- m1 := output[pos] m1.returnChannel <- &nsq.FinishedMessage{m1.msg.Id, 0, true} output[pos] = nil } } */ } } return nil }
func (k *KafkaConsumerGroupInput) Run(ir pipeline.InputRunner, h pipeline.PluginHelper) (err error) { sRunner := ir.NewSplitterRunner("") defer func() { if err := k.consumer.Close(); err != nil { k.ir.LogError(fmt.Errorf("error closing the consumer: %s", err.Error())) } sRunner.Done() }() k.ir = ir go func() { for err := range k.consumer.Errors() { atomic.AddInt64(&k.processMessageFailures, 1) ir.LogError(err) } }() var ( hostname = k.pConfig.Hostname() event *sarama.ConsumerMessage ok bool n int ) packDec := func(pack *pipeline.PipelinePack) { pack.Message.SetType("heka.kafka") pack.Message.SetLogger(k.name) pack.Message.SetHostname(hostname) k.addField(pack, "Key", event.Key, "") k.addField(pack, "Topic", event.Topic, "") k.addField(pack, "Partition", event.Partition, "") k.addField(pack, "Offset", event.Offset, "") } if !sRunner.UseMsgBytes() { sRunner.SetPackDecorator(packDec) } offsets := make(map[string]map[int32]int64) for { select { case event, ok = <-k.consumer.Messages(): if !ok { return } if offsets[event.Topic] == nil { offsets[event.Topic] = make(map[int32]int64) } if offsets[event.Topic][event.Partition] != 0 && offsets[event.Topic][event.Partition] != event.Offset-1 { ir.LogError(fmt.Errorf("unexpected offset on %s:%d. Expected %d, found %d, diff %d.\n", event.Topic, event.Partition, offsets[event.Topic][event.Partition]+1, event.Offset, event.Offset-offsets[event.Topic][event.Partition]+1)) } atomic.AddInt64(&k.processMessageCount, 1) if n, err = sRunner.SplitBytes(event.Value, nil); err != nil { ir.LogError(fmt.Errorf("processing message from topic %s: %s", event.Topic, err)) } if n > 0 && n != len(event.Value) { ir.LogError(fmt.Errorf("extra data dropped in message from topic %s", event.Topic)) } offsets[event.Topic][event.Partition] = event.Offset k.consumer.CommitUpto(event) case <-k.stopChan: return } } }
// Main Logstreamer Input runner // This runner kicks off all the other logstream inputs, and handles rescanning for // updates to the filesystem that might affect file visibility for the logstream // inputs func (li *LogstreamerInput) Run(ir p.InputRunner, h p.PluginHelper) (err error) { var ( ok bool dRunner p.DecoderRunner errs *ls.MultipleError newstreams []string ) // Setup the decoder runner that will be used if li.decoderName != "" { if dRunner, ok = h.DecoderRunner(li.decoderName, fmt.Sprintf("%s-%s", li.pluginName, li.decoderName)); !ok { return fmt.Errorf("Decoder not found: %s", li.decoderName) } } // Kick off all the current logstreams we know of for _, logstream := range li.plugins { stop := make(chan chan bool, 1) go logstream.Run(ir, h, stop, dRunner) li.stopLogstreamChans = append(li.stopLogstreamChans, stop) } ok = true rescan := time.Tick(li.rescanInterval) // Our main rescan loop that handles shutting down for ok { select { case <-li.stopChan: ok = false returnChans := make([]chan bool, len(li.stopLogstreamChans)) // Send out all the stop signals for i, ch := range li.stopLogstreamChans { ret := make(chan bool) ch <- ret returnChans[i] = ret } // Wait for all the stops for _, ch := range returnChans { <-ch } // Close our own stopChan to indicate we shut down close(li.stopChan) case <-rescan: li.logstreamSetLock.Lock() newstreams, errs = li.logstreamSet.ScanForLogstreams() if errs.IsError() { ir.LogError(errs) } for _, name := range newstreams { stream, ok := li.logstreamSet.GetLogstream(name) if !ok { ir.LogError(fmt.Errorf("Found new logstream: %s, but couldn't fetch it.", name)) continue } // Setup a new logstream input for this logstream and start it running stParser, parserFunc, _ := CreateParser(li.parser, li.delimiter, li.delimiterLocation, li.decoderName) lsi := NewLogstreamInput(stream, stParser, parserFunc, name, li.hostName) li.plugins[name] = lsi stop := make(chan chan bool, 1) go lsi.Run(ir, h, stop, dRunner) li.stopLogstreamChans = append(li.stopLogstreamChans, stop) } li.logstreamSetLock.Unlock() } } err = nil return }