Пример #1
0
// Standard text log file parser
func (lsi *LogstreamInput) payloadParser(ir p.InputRunner, deliver Deliver, stop chan chan bool) (err error) {
	var (
		pack   *p.PipelinePack
		record []byte
		n      int
	)
	for err == nil {
		select {
		case lsi.stopped = <-stop:
			return
		default:
		}
		n, record, err = lsi.parser.Parse(lsi.stream)
		if err == io.ErrShortBuffer {
			ir.LogError(fmt.Errorf("record exceeded MAX_RECORD_SIZE %d", message.MAX_RECORD_SIZE))
			err = nil // non-fatal, keep going
		}
		if n > 0 {
			lsi.stream.FlushBuffer(n)
		}
		if len(record) > 0 {
			payload := string(record)
			pack = <-ir.InChan()
			pack.Message.SetUuid(uuid.NewRandom())
			pack.Message.SetTimestamp(time.Now().UnixNano())
			pack.Message.SetType("logfile")
			pack.Message.SetHostname(lsi.hostName)
			pack.Message.SetLogger(lsi.loggerIdent)
			pack.Message.SetPayload(payload)
			deliver(pack)
			lsi.countRecord()
		}
	}
	return
}
Пример #2
0
// TODO: handle "no such file"
func (input *S3SplitFileInput) readS3File(runner pipeline.InputRunner, d *pipeline.Deliverer, sr *pipeline.SplitterRunner, s3Key string) (err error) {
	runner.LogMessage(fmt.Sprintf("Preparing to read: %s", s3Key))
	if input.bucket == nil {
		runner.LogMessage(fmt.Sprintf("Dude, where's my bucket: %s", s3Key))
		return
	}

	var lastGoodOffset uint64
	var attempt uint32

RetryS3:
	for attempt = 1; attempt <= input.S3Retries; attempt++ {
		for r := range S3FileIterator(input.bucket, s3Key, lastGoodOffset) {
			record := r.Record
			err := r.Err

			if err != nil && err != io.EOF {
				runner.LogError(fmt.Errorf("Error in attempt %d reading %s at offset %d: %s", attempt, s3Key, lastGoodOffset, err))
				atomic.AddInt64(&input.processMessageFailures, 1)
				continue RetryS3
			}
			if len(record) > 0 {
				lastGoodOffset += uint64(r.BytesRead)
				atomic.AddInt64(&input.processMessageCount, 1)
				atomic.AddInt64(&input.processMessageBytes, int64(len(record)))
				(*sr).DeliverRecord(record, *d)
			}
		}
		break
	}

	return
}
Пример #3
0
// Main Logstreamer Input runner. This runner kicks off all the other
// logstream inputs, and handles rescanning for updates to the filesystem that
// might affect file visibility for the logstream inputs.
func (li *LogstreamerInput) Run(ir p.InputRunner, h p.PluginHelper) (err error) {
	var (
		ok         bool
		errs       *ls.MultipleError
		newstreams []string
	)

	// Kick off all the current logstreams we know of
	i := 0
	for _, logstream := range li.plugins {
		i++
		li.startLogstreamInput(logstream, i, ir, h)
	}

	ok = true
	rescan := time.Tick(li.rescanInterval)
	// Our main rescan loop that handles shutting down
	for ok {
		select {
		case <-li.stopChan:
			ok = false
			returnChans := make([]chan bool, len(li.stopLogstreamChans))
			// Send out all the stop signals
			for i, ch := range li.stopLogstreamChans {
				ret := make(chan bool)
				ch <- ret
				returnChans[i] = ret
			}

			// Wait for all the stops
			for _, ch := range returnChans {
				<-ch
			}

			// Close our own stopChan to indicate we shut down
			close(li.stopChan)
		case <-rescan:
			li.logstreamSetLock.Lock()
			newstreams, errs = li.logstreamSet.ScanForLogstreams()
			if errs.IsError() {
				ir.LogError(errs)
			}
			for _, name := range newstreams {
				stream, ok := li.logstreamSet.GetLogstream(name)
				if !ok {
					ir.LogError(fmt.Errorf("Found new logstream: %s, but couldn't fetch it.",
						name))
					continue
				}

				lsi := NewLogstreamInput(stream, name, li.hostName)
				li.plugins[name] = lsi
				i++
				li.startLogstreamInput(lsi, i, ir, h)
			}
			li.logstreamSetLock.Unlock()
		}
	}
	return nil
}
Пример #4
0
func (rli *RedisInput) InsertMessage(ir pipeline.InputRunner, decoder pipeline.Decoder, msg string) {
	var (
		pack *pipeline.PipelinePack
		e    error
	)
	// Get the InputRunner's chan to receive empty PipelinePacks
	packSupply := ir.InChan()

	pack = <-packSupply
	pack.Message.SetType(rli.conf.Key)
	pack.Message.SetLogger("Redis")
	pack.Message.SetPayload(msg)
	pack.Message.SetTimestamp(time.Now().UnixNano())

	var packs []*pipeline.PipelinePack
	if decoder == nil {
		packs = []*pipeline.PipelinePack{pack}
	} else {
		packs, e = decoder.Decode(pack)
	}

	if packs != nil {
		for _, p := range packs {
			ir.Inject(p)
		}
	} else {
		if e != nil {
			ir.LogError(fmt.Errorf("Couldn't parse %s", msg))
			pack.Recycle(e)
		} else {
			pack.Recycle(nil)
			fmt.Println("pack recycle!")
		}
	}
}
Пример #5
0
func (rpsi *RedisPubSubInput) Run(ir pipeline.InputRunner, h pipeline.PluginHelper) error {
	var (
		dRunner pipeline.DecoderRunner
		decoder pipeline.Decoder
		pack    *pipeline.PipelinePack
		e       error
		ok      bool
	)
	// Get the InputRunner's chan to receive empty PipelinePacks
	packSupply := ir.InChan()

	if rpsi.conf.DecoderName != "" {
		if dRunner, ok = h.DecoderRunner(rpsi.conf.DecoderName, fmt.Sprintf("%s-%s", ir.Name(), rpsi.conf.DecoderName)); !ok {
			return fmt.Errorf("Decoder not found: %s", rpsi.conf.DecoderName)
		}
		decoder = dRunner.Decoder()
	}

	//Connect to the channel
	psc := redis.PubSubConn{Conn: rpsi.conn}
	psc.PSubscribe(rpsi.conf.Channel)

	for {
		switch n := psc.Receive().(type) {
		case redis.PMessage:
			// Grab an empty PipelinePack from the InputRunner
			pack = <-packSupply
			pack.Message.SetType("redis_pub_sub")
			pack.Message.SetLogger(n.Channel)
			pack.Message.SetPayload(string(n.Data))
			pack.Message.SetTimestamp(time.Now().UnixNano())
			var packs []*pipeline.PipelinePack
			if decoder == nil {
				packs = []*pipeline.PipelinePack{pack}
			} else {
				packs, e = decoder.Decode(pack)
			}
			if packs != nil {
				for _, p := range packs {
					ir.Inject(p)
				}
			} else {
				if e != nil {
					ir.LogError(fmt.Errorf("Couldn't parse Redis message: %s", n.Data))
				}
				pack.Recycle(nil)
			}
		case redis.Subscription:
			ir.LogMessage(fmt.Sprintf("Subscription: %s %s %d\n", n.Kind, n.Channel, n.Count))
			if n.Count == 0 {
				return errors.New("No channel to subscribe")
			}
		case error:
			fmt.Printf("error: %v\n", n)
			return n
		}
	}

	return nil
}
Пример #6
0
func (input *FilePollingInput) Run(runner pipeline.InputRunner,
	helper pipeline.PluginHelper) error {

	input.runner = runner
	input.hostname = helper.PipelineConfig().Hostname()
	tickChan := runner.Ticker()
	sRunner := runner.NewSplitterRunner("")
	if !sRunner.UseMsgBytes() {
		sRunner.SetPackDecorator(input.packDecorator)
	}

	for {
		select {
		case <-input.stop:
			return nil
		case <-tickChan:
		}

		f, err := os.Open(input.FilePath)
		if err != nil {
			runner.LogError(fmt.Errorf("Error opening file: %s", err.Error()))
			continue
		}
		for err == nil {
			err = sRunner.SplitStream(f, nil)
			if err != io.EOF && err != nil {
				runner.LogError(fmt.Errorf("Error reading file: %s", err.Error()))
			}
		}
	}

	return nil
}
Пример #7
0
func (rli *RedisListInput) Run(ir pipeline.InputRunner, h pipeline.PluginHelper) error {
	var (
		pack  *pipeline.PipelinePack
		packs []*pipeline.PipelinePack
	)

	// Get the InputRunner's chan to receive empty PipelinePacks
	inChan := ir.InChan()

	for {
		message, err := rli.conn.Do("RPOP", rli.conf.ListName)
		if err != nil {
			ir.LogError(fmt.Errorf("Redis RPOP error: %s", err))
			// TODO: should reconnect redis rather than close it
			rli.Stop()
			break
		}
		if message != nil {
			pack = <-inChan
			pack.Message.SetType("redis_list")
			pack.Message.SetPayload(string(message.([]uint8)))
			packs = []*pipeline.PipelinePack{pack}
			if packs != nil {
				for _, p := range packs {
					ir.Inject(p)
				}
			} else {
				pack.Recycle(nil)
			}
		} else {
			time.Sleep(time.Second)
		}
	}
	return nil
}
Пример #8
0
func (lsi *LogstreamInput) Run(ir p.InputRunner, h p.PluginHelper, stopChan chan chan bool,
	dRunner p.DecoderRunner) {

	var (
		parser func(ir p.InputRunner, deliver Deliver, stop chan chan bool) error
		err    error
	)

	if lsi.parseFunction == "payload" {
		parser = lsi.payloadParser
	} else if lsi.parseFunction == "messageProto" {
		parser = lsi.messageProtoParser
	}

	// Setup our pack delivery function appropriately for the configuration
	deliver := func(pack *p.PipelinePack) {
		if dRunner == nil {
			ir.Inject(pack)
		} else {
			dRunner.InChan() <- pack
		}
	}

	// Check for more data interval
	interval, _ := time.ParseDuration("250ms")
	tick := time.Tick(interval)

	ok := true
	for ok {
		// Clear our error
		err = nil

		// Attempt to read as many as we can
		err = parser(ir, deliver, stopChan)

		// Save our location after reading as much as we can
		lsi.stream.SavePosition()
		lsi.recordCount = 0

		if err != nil && err != io.EOF {
			ir.LogError(err)
		}

		// Did our parser func get stopped?
		if lsi.stopped != nil {
			ok = false
			continue
		}

		// Wait for our next interval, stop if needed
		select {
		case lsi.stopped = <-stopChan:
			ok = false
		case <-tick:
			continue
		}
	}
	close(lsi.stopped)
}
Пример #9
0
func (s *SandboxInput) Run(ir pipeline.InputRunner, h pipeline.PluginHelper) (err error) {
	s.sb.InjectMessage(func(payload, payload_type, payload_name string) int {
		pack := <-ir.InChan()
		if err := proto.Unmarshal([]byte(payload), pack.Message); err != nil {
			pack.Recycle()
			return 1
		}
		if s.tz != time.UTC {
			const layout = "2006-01-02T15:04:05.999999999" // remove the incorrect UTC tz info
			t := time.Unix(0, pack.Message.GetTimestamp())
			t = t.In(time.UTC)
			ct, _ := time.ParseInLocation(layout, t.Format(layout), s.tz)
			pack.Message.SetTimestamp(ct.UnixNano())
		}
		ir.Inject(pack)
		atomic.AddInt64(&s.processMessageCount, 1)
		atomic.AddInt64(&s.processMessageBytes, int64(len(payload)))
		return 0
	})

	ticker := ir.Ticker()

	for true {
		retval := s.sb.ProcessMessage(nil)
		if retval <= 0 { // Sandbox is in polling mode
			if retval < 0 {
				atomic.AddInt64(&s.processMessageFailures, 1)
				em := s.sb.LastError()
				if len(em) > 0 {
					ir.LogError(errors.New(em))
				}
			}
			if ticker == nil {
				ir.LogMessage("single run completed")
				break
			}
			select { // block until stop or poll interval
			case <-s.stopChan:
			case <-ticker:
			}
		} else { // Sandbox is shutting down
			em := s.sb.LastError()
			if !strings.HasSuffix(em, "shutting down") {
				ir.LogError(errors.New(em))
			}
			break
		}
	}

	s.reportLock.Lock()
	if s.sbc.PreserveData {
		err = s.sb.Destroy(s.preservationFile)
	} else {
		err = s.sb.Destroy("")
	}
	s.sb = nil
	s.reportLock.Unlock()
	return
}
Пример #10
0
func (cwi *CloudwatchInput) Run(ir pipeline.InputRunner, h pipeline.PluginHelper) (err error) {
	cwi.stopChan = make(chan bool)
	cwi.req.StartTime = time.Now()
	ticker := time.NewTicker(cwi.pollInterval)

	ok := true
	var (
		resp  *cloudwatch.GetMetricStatisticsResponse
		point cloudwatch.Datapoint
		pack  *pipeline.PipelinePack
		dim   cloudwatch.Dimension
	)

metricLoop:
	for ok {
		select {
		case _, ok = <-cwi.stopChan:
			continue
		case <-ticker.C:
			cwi.req.EndTime = time.Now()
			resp, err = cwi.cw.GetMetricStatistics(cwi.req)
			if err != nil {
				ir.LogError(err)
				err = nil
				continue
			}
			for _, point = range resp.GetMetricStatisticsResult.Datapoints {
				pack, ok = <-ir.InChan()
				if !ok {
					break metricLoop
				}
				pack.Message.SetType("cloudwatch")
				for _, dim = range cwi.req.Dimensions {
					newField(pack, "Dimension."+dim.Name, dim.Value)
				}
				newField(pack, "Period", cwi.req.Period)
				newField(pack, "Average", point.Average)
				newField(pack, "Maximum", point.Maximum)
				newField(pack, "Minimum", point.Minimum)
				newField(pack, "SampleCount", point.SampleCount)
				newField(pack, "Unit", point.Unit)
				newField(pack, "Sum", point.Sum)
				pack.Message.SetUuid(uuid.NewRandom())
				pack.Message.SetTimestamp(point.Timestamp.UTC().UnixNano())
				pack.Message.SetLogger(cwi.namespace)
				pack.Message.SetPayload(cwi.req.MetricName)
				ir.Inject(pack)
			}
			cwi.req.StartTime = cwi.req.EndTime.Add(time.Duration(1) * time.Nanosecond)
		}
	}
	return nil
}
Пример #11
0
func (lsi *LogstreamInput) Run(ir p.InputRunner, h p.PluginHelper, stopChan chan chan bool,
	deliverer p.Deliverer, sRunner p.SplitterRunner) {

	if !sRunner.UseMsgBytes() {
		sRunner.SetPackDecorator(lsi.packDecorator)
	}

	lsi.ir = ir
	lsi.stopChan = stopChan
	lsi.deliverer = deliverer
	lsi.sRunner = sRunner
	var err error

	// Check for more data interval
	interval, _ := time.ParseDuration("250ms")
	tick := time.Tick(interval)

	ok := true
	for ok {
		// Clear our error
		err = nil

		// Attempt to read and deliver as many as we can.
		err = lsi.deliverRecords()
		// Save our position if the stream hasn't done so for us.
		if err != io.EOF {
			lsi.stream.SavePosition()
		}
		lsi.recordCount = 0

		if err != nil && err != io.EOF {
			ir.LogError(err)
		}

		// Did our parser func get stopped?
		if lsi.stopped != nil {
			ok = false
			continue
		}

		// Wait for our next interval, stop if needed
		select {
		case lsi.stopped = <-stopChan:
			ok = false
		case <-tick:
			continue
		}
	}
	close(lsi.stopped)
	deliverer.Done()
	sRunner.Done()
}
Пример #12
0
func (input *S3SplitFileInput) Run(runner pipeline.InputRunner, helper pipeline.PluginHelper) error {
	// Begin listing the files (either straight from S3 or from a cache)
	// Write matching filenames on a "lister" channel
	// Read from the lister channel:
	//   - fetch the filename
	//   - read records from it
	//   - write them to a "reader" channel

	var (
		wg sync.WaitGroup
		i  uint32
	)

	wg.Add(1)
	go func() {
		runner.LogMessage("Starting S3 list")
	iteratorLoop:
		for r := range S3Iterator(input.bucket, input.S3BucketPrefix, input.schema) {
			select {
			case <-input.stop:
				runner.LogMessage("Stopping S3 list")
				break iteratorLoop
			default:
			}
			if r.Err != nil {
				runner.LogError(fmt.Errorf("Error getting S3 list: %s", r.Err))
			} else {
				basename := r.Key.Key[strings.LastIndex(r.Key.Key, "/")+1:]
				if input.objectMatch == nil || input.objectMatch.MatchString(basename) {
					runner.LogMessage(fmt.Sprintf("Found: %s", r.Key.Key))
					input.listChan <- r.Key.Key
				} else {
					runner.LogMessage(fmt.Sprintf("Skipping: %s", r.Key.Key))
				}
			}
		}
		// All done listing, close the channel
		runner.LogMessage("All done listing. Closing channel")
		close(input.listChan)
		wg.Done()
	}()

	// Run a pool of concurrent readers.
	for i = 0; i < input.S3WorkerCount; i++ {
		wg.Add(1)
		go input.fetcher(runner, &wg, i)
	}
	wg.Wait()

	return nil
}
Пример #13
0
func (zi *ZeroMQInput) Run(ir pipeline.InputRunner, h pipeline.PluginHelper) error {
	// Get the InputRunner's chan to receive empty PipelinePacks
	packs := ir.InChan()

	var decoding chan<- *pipeline.PipelinePack
	if zi.conf.Decoder != "" {
		// Fetch specified decoder
		decoder, ok := h.DecoderSet().ByName(zi.conf.Decoder)
		if !ok {
			err := fmt.Errorf("Could not find decoder", zi.conf.Decoder)
			return err
		}

		// Get the decoder's receiving chan
		decoding = decoder.InChan()
	}

	var pack *pipeline.PipelinePack
	var count int
	var b []byte
	var err error

	// Read data from websocket broadcast chan
	for {
		b, err = zi.socket.Recv(0)
		if err != nil {
			ir.LogError(err)
			continue
		}

		// Grab an empty PipelinePack from the InputRunner
		pack = <-packs

		// Trim the excess empty bytes
		count = len(b)
		pack.MsgBytes = pack.MsgBytes[:count]

		// Copy ws bytes into pack's bytes
		copy(pack.MsgBytes, b)

		if decoding != nil {
			// Send pack onto decoder
			decoding <- pack
		} else {
			// Send pack into Heka pipeline
			ir.Inject(pack)
		}
	}

	return nil
}
Пример #14
0
func splitStream(ir p.InputRunner, sRunner p.SplitterRunner, r io.ReadCloser) error {
	var (
		record       []byte
		longRecord   []byte
		err          error
		deliver      bool
		nullSplitter bool
	)
	// If we're using a NullSplitter we want to make sure we capture the
	// entire HTTP request or response body and not be subject to what we get
	// from a single Read() call.
	if _, ok := sRunner.Splitter().(*p.NullSplitter); ok {
		nullSplitter = true
	}
	for err == nil {
		deliver = true
		_, record, err = sRunner.GetRecordFromStream(r)
		if err == io.ErrShortBuffer {
			if sRunner.KeepTruncated() {
				err = fmt.Errorf("record exceeded MAX_RECORD_SIZE %d and was truncated",
					message.MAX_RECORD_SIZE)
			} else {
				deliver = false
				err = fmt.Errorf("record exceeded MAX_RECORD_SIZE %d and was dropped",
					message.MAX_RECORD_SIZE)
			}
			ir.LogError(err)
			err = nil // non-fatal, keep going
		} else if sRunner.IncompleteFinal() && err == io.EOF && len(record) == 0 {
			record = sRunner.GetRemainingData()
		}
		if len(record) > 0 && deliver {
			if nullSplitter {
				// Concatenate all the records until EOF. This should be safe
				// b/c NullSplitter means FindRecord will always return the
				// full buffer contents, we don't have to worry about
				// GetRecordFromStream trying to append multiple reads to a
				// single record and triggering an io.ErrShortBuffer error.
				longRecord = append(longRecord, record...)
			} else {
				sRunner.DeliverRecord(record, nil)
			}
		}
	}
	r.Close()
	if err == io.EOF && nullSplitter && len(longRecord) > 0 {
		sRunner.DeliverRecord(longRecord, nil)
	}
	return err
}
Пример #15
0
func (input *S3SplitFileInput) fetcher(runner pipeline.InputRunner, wg *sync.WaitGroup, workerId uint32) {
	var (
		s3Key     string
		startTime time.Time
		duration  float64
	)

	fetcherName := fmt.Sprintf("S3Reader%d", workerId)
	deliverer := runner.NewDeliverer(fetcherName)
	defer deliverer.Done()
	splitterRunner := runner.NewSplitterRunner(fetcherName)

	ok := true
	for ok {
		select {
		case s3Key, ok = <-input.listChan:
			if !ok {
				// Channel is closed => we're shutting down, exit cleanly.
				// runner.LogMessage("Fetcher all done! shutting down.")
				break
			}

			startTime = time.Now().UTC()
			err := input.readS3File(runner, &deliverer, &splitterRunner, s3Key)
			atomic.AddInt64(&input.processFileCount, 1)
			leftovers := splitterRunner.GetRemainingData()
			lenLeftovers := len(leftovers)
			if lenLeftovers > 0 {
				atomic.AddInt64(&input.processFileDiscardedBytes, int64(lenLeftovers))
				runner.LogError(fmt.Errorf("Trailing data, possible corruption: %d bytes left in stream at EOF: %s", lenLeftovers, s3Key))
			}
			if err != nil && err != io.EOF {
				runner.LogError(fmt.Errorf("Error reading %s: %s", s3Key, err))
				atomic.AddInt64(&input.processFileFailures, 1)
				continue
			}
			duration = time.Now().UTC().Sub(startTime).Seconds()
			runner.LogMessage(fmt.Sprintf("Successfully fetched %s in %.2fs ", s3Key, duration))
		case <-input.stop:
			for _ = range input.listChan {
				// Drain the channel without processing the files.
				// Technically the S3Iterator can still add one back on to the
				// channel but this ensures there is room so it won't block.
			}
			ok = false
		}
	}

	wg.Done()
}
Пример #16
0
func (di *DockerLogInput) Run(ir pipeline.InputRunner, h pipeline.PluginHelper) error {
	var (
		pack *pipeline.PipelinePack
		ok   bool
	)

	hostname := h.Hostname()

	go di.attachMgr.Listen(di.logstream, di.closer)

	// Get the InputRunner's chan to receive empty PipelinePacks
	packSupply := ir.InChan()

	ok = true
	var err error
	for ok {
		select {
		case logline := <-di.logstream:
			pack = <-packSupply

			pack.Message.SetType("DockerLog")
			pack.Message.SetLogger(logline.Type) // stderr or stdout
			pack.Message.SetHostname(hostname)   // Use the host's hosntame
			pack.Message.SetPayload(logline.Data)
			pack.Message.SetTimestamp(time.Now().UnixNano())
			pack.Message.SetUuid(uuid.NewRandom())
			for k, v := range logline.Fields {
				message.NewStringField(pack.Message, k, v)
			}

			ir.Deliver(pack)

		case err, ok = <-di.attachErrors:
			if !ok {
				err = errors.New("Docker event channel closed")
				break
			}
			ir.LogError(fmt.Errorf("Attacher error: %s", err))

		case err = <-di.stopChan:
			ok = false
		}
	}

	di.closer <- struct{}{}
	close(di.logstream)
	return err
}
Пример #17
0
func (input *FilePollingInput) Run(runner pipeline.InputRunner,
	helper pipeline.PluginHelper) error {

	var (
		data    []byte
		pack    *pipeline.PipelinePack
		dRunner pipeline.DecoderRunner
		ok      bool
		err     error
	)

	if input.DecoderName != "" {
		if dRunner, ok = helper.DecoderRunner(input.DecoderName,
			fmt.Sprintf("%s-%s", runner.Name(), input.DecoderName)); !ok {
			return fmt.Errorf("Decoder not found: %s", input.DecoderName)
		}
		input.decoderChan = dRunner.InChan()
	}
	input.runner = runner

	hostname := helper.PipelineConfig().Hostname()
	packSupply := runner.InChan()
	tickChan := runner.Ticker()

	for {
		select {
		case <-input.stop:
			return nil
		case <-tickChan:
		}

		data, err = ioutil.ReadFile(input.FilePath)
		if err != nil {
			runner.LogError(fmt.Errorf("Error reading file: %s", err))
			continue
		}

		pack = <-packSupply
		pack.Message.SetUuid(uuid.NewRandom())
		pack.Message.SetTimestamp(time.Now().UnixNano())
		pack.Message.SetType("heka.file.polling")
		pack.Message.SetHostname(hostname)
		pack.Message.SetPayload(string(data))
		if field, err := message.NewField("TickerInterval", int(input.TickerInterval), ""); err != nil {
			runner.LogError(err)
		} else {
			pack.Message.AddField(field)
		}
		if field, err := message.NewField("FilePath", input.FilePath, ""); err != nil {
			runner.LogError(err)
		} else {
			pack.Message.AddField(field)
		}
		input.sendPack(pack)
	}

	return nil
}
Пример #18
0
func (input *Sqs3Input) Run(runner pipeline.InputRunner,
    helper pipeline.PluginHelper) error {

    // initialize
    input.runner = runner
    input.sqs = sqs.New(session.New())
    input.s3 = s3.New(session.New())
    queue_url, err := get_queue(input.sqs, input.SqsQueue)
    if err != nil { return err }
    input.queue_url = queue_url
    //input.hostname = helper.PipelineConfig().Hostname()
    tickChan := runner.Ticker()
    sRunner := runner.NewSplitterRunner("")
    if !sRunner.UseMsgBytes() {
        sRunner.SetPackDecorator(input.packDecorator)
    }
    defer sRunner.Done()

    for {
        select {
        case <-input.stop:
            return nil
        case <-tickChan:
        }

        receipt_handle, bucket, key, err := receive_from_queue(input.sqs, input.queue_url)
        if err != nil {
            runner.LogError(fmt.Errorf("Error reading queue: %s", err.Error()))
            continue
        }

        o, _, err := get_object(input.s3, bucket, key)
        if err != nil {
            runner.LogError(fmt.Errorf("Error opening file: %s", err.Error()))
            if aws_err := awserr.Error(err); aws_err != nil {
                f aws_err.Code == "NoSuchBucket" or aws_err.Code == "NoSuchKey" {
                    delete_message(input.sqs, input.queue_url, receipt_handle)
                }
            }
            continue
        }
        for err == nil {
            err = sRunner.SplitStream(o, nil)
            if err != io.EOF && err != nil {
                runner.LogError(fmt.Errorf("Error reading file: %s", err.Error()))
            }
        }
        o.Close()
    }
}
Пример #19
0
func (ri *RedisMQInput) Run(ir pipeline.InputRunner, h pipeline.PluginHelper) error {
	// Get the InputRunner's chan to receive empty PipelinePacks
	packs := ir.InChan()

	var decoding chan<- *pipeline.PipelinePack
	if ri.conf.Decoder != "" {
		// Fetch specified decoder
		decoder, ok := h.DecoderRunner(ri.conf.Decoder)
		if !ok {
			err := fmt.Errorf("Could not find decoder", ri.conf.Decoder)
			return err
		}

		// Get the decoder's receiving chan
		decoding = decoder.InChan()
	}

	var pack *pipeline.PipelinePack
	//var p []*redismq.Package
	var p *redismq.Package
	var count int
	var b []byte
	var err error

	for {
		p, err = ri.rdconsumer.Get()
		if err != nil {
			ir.LogError(err)
			continue
		}
		err = p.Ack()
		if err != nil {
			ir.LogError(err)
		}
		b = []byte(p.Payload)
		// Grab an empty PipelinePack from the InputRunner
		pack = <-packs

		// Trim the excess empty bytes
		count = len(b)
		pack.MsgBytes = pack.MsgBytes[:count]

		// Copy ws bytes into pack's bytes
		copy(pack.MsgBytes, b)

		if decoding != nil {
			// Send pack onto decoder
			decoding <- pack
		} else {
			// Send pack into Heka pipeline
			ir.Inject(pack)
		}
	}
	/*
	           checkStat := time.Tick(ri.statInterval)
	           ok := true
	           for ok {
	               select {
	   		case _, ok = <-ri.stopChan:
	   			break
	   		case <-checkStat:
	                       p, err = ri.rdconsumer.MultiGet(500)
	                       if err != nil {
	                           ir.LogError(err)
	                           continue
	                       }
	                       err = p[len(p)-1].MultiAck()
	                       if err != nil {
	                           ir.LogError(err)
	                       }
	                       for _, v := range p {
	                         b = []byte(v.Payload)
	                         // Grab an empty PipelinePack from the InputRunner
	                         pack = <-packs

	                         // Trim the excess empty bytes
	                         count = len(b)
	                         pack.MsgBytes = pack.MsgBytes[:count]

	                         // Copy ws bytes into pack's bytes
	                         copy(pack.MsgBytes, b)

	                         if decoding != nil {
	                           // Send pack onto decoder
	                           decoding <- pack
	                         } else {
	                           // Send pack into Heka pipeline
	                           ir.Inject(pack)
	                         }
	                       }
	                   }
	           }
	*/
	return nil
}
Пример #20
0
func (k *KafkaInput) Run(ir pipeline.InputRunner, h pipeline.PluginHelper) (err error) {
	sRunner := ir.NewSplitterRunner("")

	defer func() {
		k.partitionConsumer.Close()
		k.consumer.Close()
		if k.checkpointFile != nil {
			k.checkpointFile.Close()
		}
		sRunner.Done()
	}()
	k.ir = ir
	k.stopChan = make(chan bool)

	var (
		hostname = k.pConfig.Hostname()
		event    *sarama.ConsumerMessage
		cError   *sarama.ConsumerError
		ok       bool
		n        int
	)

	packDec := func(pack *pipeline.PipelinePack) {
		pack.Message.SetType("heka.kafka")
		pack.Message.SetLogger(k.name)
		pack.Message.SetHostname(hostname)
		k.addField(pack, "Key", event.Key, "")
		k.addField(pack, "Topic", event.Topic, "")
		k.addField(pack, "Partition", event.Partition, "")
		k.addField(pack, "Offset", event.Offset, "")
	}
	if !sRunner.UseMsgBytes() {
		sRunner.SetPackDecorator(packDec)
	}

	eventChan := k.partitionConsumer.Messages()
	cErrChan := k.partitionConsumer.Errors()
	for {
		select {
		case event, ok = <-eventChan:
			if !ok {
				return nil
			}
			atomic.AddInt64(&k.processMessageCount, 1)
			if n, err = sRunner.SplitBytes(event.Value, nil); err != nil {
				ir.LogError(fmt.Errorf("processing message from topic %s: %s",
					event.Topic, err))
			}
			if n > 0 && n != len(event.Value) {
				ir.LogError(fmt.Errorf("extra data dropped in message from topic %s",
					event.Topic))
			}

			if k.config.OffsetMethod == "Manual" {
				if err = k.writeCheckpoint(event.Offset + 1); err != nil {
					return err
				}
			}

		case cError, ok = <-cErrChan:
			if !ok {
				// Don't exit until the eventChan is closed.
				ok = true
				continue
			}
			if cError.Err == sarama.ErrOffsetOutOfRange {
				ir.LogError(fmt.Errorf(
					"removing the out of range checkpoint file and stopping"))
				if k.checkpointFile != nil {
					k.checkpointFile.Close()
					k.checkpointFile = nil
				}
				if err := os.Remove(k.checkpointFilename); err != nil {
					ir.LogError(err)
				}
				return err
			}
			atomic.AddInt64(&k.processMessageFailures, 1)
			ir.LogError(cError.Err)

		case <-k.stopChan:
			return nil
		}
	}
}
Пример #21
0
func (input *S3OffsetInput) Run(runner pipeline.InputRunner, helper pipeline.PluginHelper) error {
	// List offset metadata index files
	// For each index D >= start and <= end
	//   Read index D
	//   Write offsets for any desired clients to offsetChan
	// Meanwhile, for each item in offsetChan
	//   Go fetch that record, inject resulting message into pipeline.

	var (
		wg          sync.WaitGroup
		i           uint32
		emptySchema Schema
	)

	if input.metaFileName != "" {
		wg.Add(1)
		go func() {
			reader, err := os.Open(input.metaFileName)
			if err != nil {
				runner.LogMessage(fmt.Sprintf("Error opening metadata file '%s': %s", input.metaFileName, err))
			}
			defer reader.Close()
			err = input.parseMessageLocations(reader, input.metaFileName)
			if err != nil {
				runner.LogMessage(fmt.Sprintf("Error reading metadata: %s", err))
			}
			// All done with metadata, close the channel
			runner.LogMessage("All done with metadata. Closing channel")
			close(input.offsetChan)
			wg.Done()
		}()
	} else if input.metaBucket != nil {
		wg.Add(1)
		go func() {
			runner.LogMessage("Starting S3 list")
		iteratorLoop:
			for r := range S3Iterator(input.metaBucket, input.S3MetaBucketPrefix, emptySchema) {
				select {
				case <-input.stop:
					runner.LogMessage("Stopping S3 list")
					break iteratorLoop
				default:
				}
				if r.Err != nil {
					runner.LogError(fmt.Errorf("Error getting S3 list: %s", r.Err))
				} else {
					base := path.Base(r.Key.Key)[0:8]
					// Check if r is in the desired date range.
					if base >= input.StartDate && base <= input.EndDate {
						err := input.grep(r)
						if err != nil {
							runner.LogMessage(fmt.Sprintf("Error reading index: %s", err))
						}
					}
				}
			}
			// All done listing, close the channel
			runner.LogMessage("All done listing. Closing channel")
			close(input.offsetChan)
			wg.Done()
		}()
	} else {
		runner.LogMessage("Nothing to do, no metadata available. Closing channel")
		close(input.offsetChan)
		wg.Done()
	}

	// Run a pool of concurrent readers.
	for i = 0; i < input.S3WorkerCount; i++ {
		wg.Add(1)
		go input.fetcher(runner, &wg, i)
	}
	wg.Wait()

	return nil
}
Пример #22
0
func (ni *NsqInput) Run(ir pipeline.InputRunner, h pipeline.PluginHelper) error {
	// Get the InputRunner's chan to receive empty PipelinePacks
	var pack *pipeline.PipelinePack
	var err error
	var dRunner pipeline.DecoderRunner
	var decoder pipeline.Decoder
	var ok bool
	var e error

	//pos := 0
	//output := make([]*Message, 2)
	packSupply := ir.InChan()

	if ni.conf.Decoder != "" {
		if dRunner, ok = h.DecoderRunner(ni.conf.Decoder); !ok {
			return fmt.Errorf("Decoder not found: %s", ni.conf.Decoder)
		}
		decoder = dRunner.Decoder()
	}

	err = ni.nsqReader.ConnectToLookupd(ni.conf.Address)
	if err != nil {
		ir.LogError(errors.New("ConnectToLookupd failed."))
	}

	header := &message.Header{}

	stopped := false
	//readLoop:
	for !stopped {
		//stopped = true
		select {
		case <-ni.stopChan:
			ir.LogError(errors.New("get ni.stopChan, set stopped=true"))
			stopped = true
		default:
			pack = <-packSupply
			m, ok1 := <-ni.handler.logChan
			if !ok1 {
				stopped = true
				break
			}

			if ni.conf.Serialize {
				if dRunner == nil {
					pack.Recycle()
					ir.LogError(errors.New("Serialize messages require a decoder."))
				}
				//header := &message.Header{}
				_, msgOk := findMessage(m.msg.Body, header, &(pack.MsgBytes))
				if msgOk {
					dRunner.InChan() <- pack
				} else {
					pack.Recycle()
					ir.LogError(errors.New("Can't find Heka message."))
				}
				header.Reset()
			} else {
				//ir.LogError(fmt.Errorf("message body: %s", m.msg.Body))
				pack.Message.SetType("nsq")
				pack.Message.SetPayload(string(m.msg.Body))
				pack.Message.SetTimestamp(time.Now().UnixNano())
				var packs []*pipeline.PipelinePack
				if decoder == nil {
					packs = []*pipeline.PipelinePack{pack}
				} else {
					packs, e = decoder.Decode(pack)
				}
				if packs != nil {
					for _, p := range packs {
						ir.Inject(p)
					}
				} else {
					if e != nil {
						ir.LogError(fmt.Errorf("Couldn't parse Nsq message: %s", m.msg.Body))
					}
					pack.Recycle()
				}
			}
			m.returnChannel <- &nsq.FinishedMessage{m.msg.Id, 0, true}
			/*
			   output[pos] = m
			   pos++
			   if pos == 2 {
			           for pos > 0 {
			                   pos--
			                   m1 := output[pos]
			                   m1.returnChannel <- &nsq.FinishedMessage{m1.msg.Id, 0, true}
			                   output[pos] = nil
			           }
			   }
			*/
		}
	}
	return nil
}
func (k *KafkaConsumerGroupInput) Run(ir pipeline.InputRunner, h pipeline.PluginHelper) (err error) {
	sRunner := ir.NewSplitterRunner("")

	defer func() {
		if err := k.consumer.Close(); err != nil {
			k.ir.LogError(fmt.Errorf("error closing the consumer: %s", err.Error()))
		}
		sRunner.Done()
	}()
	k.ir = ir

	go func() {
		for err := range k.consumer.Errors() {
			atomic.AddInt64(&k.processMessageFailures, 1)
			ir.LogError(err)
		}
	}()

	var (
		hostname = k.pConfig.Hostname()
		event    *sarama.ConsumerMessage
		ok       bool
		n        int
	)

	packDec := func(pack *pipeline.PipelinePack) {
		pack.Message.SetType("heka.kafka")
		pack.Message.SetLogger(k.name)
		pack.Message.SetHostname(hostname)
		k.addField(pack, "Key", event.Key, "")
		k.addField(pack, "Topic", event.Topic, "")
		k.addField(pack, "Partition", event.Partition, "")
		k.addField(pack, "Offset", event.Offset, "")
	}
	if !sRunner.UseMsgBytes() {
		sRunner.SetPackDecorator(packDec)
	}

	offsets := make(map[string]map[int32]int64)
	for {
		select {
		case event, ok = <-k.consumer.Messages():
			if !ok {
				return
			}

			if offsets[event.Topic] == nil {
				offsets[event.Topic] = make(map[int32]int64)
			}

			if offsets[event.Topic][event.Partition] != 0 && offsets[event.Topic][event.Partition] != event.Offset-1 {
				ir.LogError(fmt.Errorf("unexpected offset on %s:%d. Expected %d, found %d, diff %d.\n",
					event.Topic, event.Partition,
					offsets[event.Topic][event.Partition]+1, event.Offset,
					event.Offset-offsets[event.Topic][event.Partition]+1))
			}

			atomic.AddInt64(&k.processMessageCount, 1)

			if n, err = sRunner.SplitBytes(event.Value, nil); err != nil {
				ir.LogError(fmt.Errorf("processing message from topic %s: %s",
					event.Topic, err))
			}
			if n > 0 && n != len(event.Value) {
				ir.LogError(fmt.Errorf("extra data dropped in message from topic %s",
					event.Topic))
			}

			offsets[event.Topic][event.Partition] = event.Offset
			k.consumer.CommitUpto(event)
		case <-k.stopChan:
			return
		}
	}
}
Пример #24
0
// Main Logstreamer Input runner
// This runner kicks off all the other logstream inputs, and handles rescanning for
// updates to the filesystem that might affect file visibility for the logstream
// inputs
func (li *LogstreamerInput) Run(ir p.InputRunner, h p.PluginHelper) (err error) {
	var (
		ok         bool
		dRunner    p.DecoderRunner
		errs       *ls.MultipleError
		newstreams []string
	)

	// Setup the decoder runner that will be used
	if li.decoderName != "" {
		if dRunner, ok = h.DecoderRunner(li.decoderName, fmt.Sprintf("%s-%s",
			li.pluginName, li.decoderName)); !ok {

			return fmt.Errorf("Decoder not found: %s", li.decoderName)
		}
	}

	// Kick off all the current logstreams we know of
	for _, logstream := range li.plugins {
		stop := make(chan chan bool, 1)
		go logstream.Run(ir, h, stop, dRunner)
		li.stopLogstreamChans = append(li.stopLogstreamChans, stop)
	}

	ok = true
	rescan := time.Tick(li.rescanInterval)
	// Our main rescan loop that handles shutting down
	for ok {
		select {
		case <-li.stopChan:
			ok = false
			returnChans := make([]chan bool, len(li.stopLogstreamChans))
			// Send out all the stop signals
			for i, ch := range li.stopLogstreamChans {
				ret := make(chan bool)
				ch <- ret
				returnChans[i] = ret
			}

			// Wait for all the stops
			for _, ch := range returnChans {
				<-ch
			}

			// Close our own stopChan to indicate we shut down
			close(li.stopChan)
		case <-rescan:
			li.logstreamSetLock.Lock()
			newstreams, errs = li.logstreamSet.ScanForLogstreams()
			if errs.IsError() {
				ir.LogError(errs)
			}
			for _, name := range newstreams {
				stream, ok := li.logstreamSet.GetLogstream(name)
				if !ok {
					ir.LogError(fmt.Errorf("Found new logstream: %s, but couldn't fetch it.", name))
					continue
				}

				// Setup a new logstream input for this logstream and start it running
				stParser, parserFunc, _ := CreateParser(li.parser, li.delimiter,
					li.delimiterLocation, li.decoderName)

				lsi := NewLogstreamInput(stream, stParser, parserFunc, name,
					li.hostName)
				li.plugins[name] = lsi
				stop := make(chan chan bool, 1)
				go lsi.Run(ir, h, stop, dRunner)
				li.stopLogstreamChans = append(li.stopLogstreamChans, stop)
			}
			li.logstreamSetLock.Unlock()
		}
	}
	err = nil
	return
}