Beispiel #1
0
func (input *FilePollingInput) Run(runner pipeline.InputRunner,
	helper pipeline.PluginHelper) error {

	input.runner = runner
	input.hostname = helper.PipelineConfig().Hostname()
	tickChan := runner.Ticker()
	sRunner := runner.NewSplitterRunner("")
	if !sRunner.UseMsgBytes() {
		sRunner.SetPackDecorator(input.packDecorator)
	}

	for {
		select {
		case <-input.stop:
			return nil
		case <-tickChan:
		}

		f, err := os.Open(input.FilePath)
		if err != nil {
			runner.LogError(fmt.Errorf("Error opening file: %s", err.Error()))
			continue
		}
		for err == nil {
			err = sRunner.SplitStream(f, nil)
			if err != io.EOF && err != nil {
				runner.LogError(fmt.Errorf("Error reading file: %s", err.Error()))
			}
		}
	}

	return nil
}
Beispiel #2
0
// Creates deliverer and stop channel and starts the provided LogstreamInput.
func (li *LogstreamerInput) startLogstreamInput(logstream *LogstreamInput, i int,
	ir p.InputRunner, h p.PluginHelper) {

	stop := make(chan chan bool, 1)
	token := strconv.Itoa(i)
	deliverer := ir.NewDeliverer(token)
	sRunner := ir.NewSplitterRunner(token)
	li.stopLogstreamChans = append(li.stopLogstreamChans, stop)
	go logstream.Run(ir, h, stop, deliverer, sRunner)
}
Beispiel #3
0
func (input *Sqs3Input) Run(runner pipeline.InputRunner,
    helper pipeline.PluginHelper) error {

    // initialize
    input.runner = runner
    input.sqs = sqs.New(session.New())
    input.s3 = s3.New(session.New())
    queue_url, err := get_queue(input.sqs, input.SqsQueue)
    if err != nil { return err }
    input.queue_url = queue_url
    //input.hostname = helper.PipelineConfig().Hostname()
    tickChan := runner.Ticker()
    sRunner := runner.NewSplitterRunner("")
    if !sRunner.UseMsgBytes() {
        sRunner.SetPackDecorator(input.packDecorator)
    }
    defer sRunner.Done()

    for {
        select {
        case <-input.stop:
            return nil
        case <-tickChan:
        }

        receipt_handle, bucket, key, err := receive_from_queue(input.sqs, input.queue_url)
        if err != nil {
            runner.LogError(fmt.Errorf("Error reading queue: %s", err.Error()))
            continue
        }

        o, _, err := get_object(input.s3, bucket, key)
        if err != nil {
            runner.LogError(fmt.Errorf("Error opening file: %s", err.Error()))
            if aws_err := awserr.Error(err); aws_err != nil {
                f aws_err.Code == "NoSuchBucket" or aws_err.Code == "NoSuchKey" {
                    delete_message(input.sqs, input.queue_url, receipt_handle)
                }
            }
            continue
        }
        for err == nil {
            err = sRunner.SplitStream(o, nil)
            if err != io.EOF && err != nil {
                runner.LogError(fmt.Errorf("Error reading file: %s", err.Error()))
            }
        }
        o.Close()
    }
}
func (input *S3SplitFileInput) fetcher(runner pipeline.InputRunner, wg *sync.WaitGroup, workerId uint32) {
	var (
		s3Key     string
		startTime time.Time
		duration  float64
	)

	fetcherName := fmt.Sprintf("S3Reader%d", workerId)
	deliverer := runner.NewDeliverer(fetcherName)
	defer deliverer.Done()
	splitterRunner := runner.NewSplitterRunner(fetcherName)

	ok := true
	for ok {
		select {
		case s3Key, ok = <-input.listChan:
			if !ok {
				// Channel is closed => we're shutting down, exit cleanly.
				// runner.LogMessage("Fetcher all done! shutting down.")
				break
			}

			startTime = time.Now().UTC()
			err := input.readS3File(runner, &deliverer, &splitterRunner, s3Key)
			atomic.AddInt64(&input.processFileCount, 1)
			leftovers := splitterRunner.GetRemainingData()
			lenLeftovers := len(leftovers)
			if lenLeftovers > 0 {
				atomic.AddInt64(&input.processFileDiscardedBytes, int64(lenLeftovers))
				runner.LogError(fmt.Errorf("Trailing data, possible corruption: %d bytes left in stream at EOF: %s", lenLeftovers, s3Key))
			}
			if err != nil && err != io.EOF {
				runner.LogError(fmt.Errorf("Error reading %s: %s", s3Key, err))
				atomic.AddInt64(&input.processFileFailures, 1)
				continue
			}
			duration = time.Now().UTC().Sub(startTime).Seconds()
			runner.LogMessage(fmt.Sprintf("Successfully fetched %s in %.2fs ", s3Key, duration))
		case <-input.stop:
			for _ = range input.listChan {
				// Drain the channel without processing the files.
				// Technically the S3Iterator can still add one back on to the
				// channel but this ensures there is room so it won't block.
			}
			ok = false
		}
	}

	wg.Done()
}
Beispiel #5
0
func (k *KafkaInput) Run(ir pipeline.InputRunner, h pipeline.PluginHelper) (err error) {
	sRunner := ir.NewSplitterRunner("")

	defer func() {
		k.partitionConsumer.Close()
		k.consumer.Close()
		if k.checkpointFile != nil {
			k.checkpointFile.Close()
		}
		sRunner.Done()
	}()
	k.ir = ir
	k.stopChan = make(chan bool)

	var (
		hostname = k.pConfig.Hostname()
		event    *sarama.ConsumerMessage
		cError   *sarama.ConsumerError
		ok       bool
		n        int
	)

	packDec := func(pack *pipeline.PipelinePack) {
		pack.Message.SetType("heka.kafka")
		pack.Message.SetLogger(k.name)
		pack.Message.SetHostname(hostname)
		k.addField(pack, "Key", event.Key, "")
		k.addField(pack, "Topic", event.Topic, "")
		k.addField(pack, "Partition", event.Partition, "")
		k.addField(pack, "Offset", event.Offset, "")
	}
	if !sRunner.UseMsgBytes() {
		sRunner.SetPackDecorator(packDec)
	}

	eventChan := k.partitionConsumer.Messages()
	cErrChan := k.partitionConsumer.Errors()
	for {
		select {
		case event, ok = <-eventChan:
			if !ok {
				return nil
			}
			atomic.AddInt64(&k.processMessageCount, 1)
			if n, err = sRunner.SplitBytes(event.Value, nil); err != nil {
				ir.LogError(fmt.Errorf("processing message from topic %s: %s",
					event.Topic, err))
			}
			if n > 0 && n != len(event.Value) {
				ir.LogError(fmt.Errorf("extra data dropped in message from topic %s",
					event.Topic))
			}

			if k.config.OffsetMethod == "Manual" {
				if err = k.writeCheckpoint(event.Offset + 1); err != nil {
					return err
				}
			}

		case cError, ok = <-cErrChan:
			if !ok {
				// Don't exit until the eventChan is closed.
				ok = true
				continue
			}
			if cError.Err == sarama.ErrOffsetOutOfRange {
				ir.LogError(fmt.Errorf(
					"removing the out of range checkpoint file and stopping"))
				if k.checkpointFile != nil {
					k.checkpointFile.Close()
					k.checkpointFile = nil
				}
				if err := os.Remove(k.checkpointFilename); err != nil {
					ir.LogError(err)
				}
				return err
			}
			atomic.AddInt64(&k.processMessageFailures, 1)
			ir.LogError(cError.Err)

		case <-k.stopChan:
			return nil
		}
	}
}
func (input *S3OffsetInput) fetcher(runner pipeline.InputRunner, wg *sync.WaitGroup, workerId uint32) {
	var (
		loc       MessageLocation
		startTime time.Time
		duration  float64
		headers   map[string][]string
		record    []byte
		err       error
	)

	headers = map[string][]string{
		"Range": []string{""},
	}

	fetcherName := fmt.Sprintf("S3Reader%d", workerId)
	deliverer := runner.NewDeliverer(fetcherName)
	defer deliverer.Done()
	splitterRunner := runner.NewSplitterRunner(fetcherName)

	ok := true
	for ok {
		select {
		case loc, ok = <-input.offsetChan:
			if !ok {
				// Channel is closed => we're shutting down, exit cleanly.
				runner.LogMessage("Fetcher all done! shutting down.")
				break
			}

			startTime = time.Now().UTC()
			// Read one message from the given location
			headers["Range"][0] = fmt.Sprintf("bytes=%d-%d", loc.Offset, loc.Offset+loc.Length-1)
			atomic.AddInt64(&input.processMessageCount, 1)
			atomic.AddInt64(&input.processMessageBytes, int64(loc.Length))
			for attempt := uint32(1); attempt <= input.S3Retries; attempt++ {
				record, err = getClientRecord(input.bucket, &loc, headers)
				if err != nil {
					runner.LogMessage(fmt.Sprintf("Error #%d fetching %s @ %d+%d: %s\n", attempt, loc.Key, loc.Offset, loc.Length, err))
				} else {
					break
				}
			}
			if err != nil {
				atomic.AddInt64(&input.processMessageFailures, 1)
				continue
			}
			splitterRunner.DeliverRecord(record, deliverer)
			duration = time.Now().UTC().Sub(startTime).Seconds()
			runner.LogMessage(fmt.Sprintf("Successfully fetched %s in %.2fs ", loc.Key, duration))

		case <-input.stop:
			runner.LogMessage("Stopping fetcher...")
			for _ = range input.offsetChan {
				// Drain the channel without processing anything.
			}
			ok = false
		}
	}

	wg.Done()
}
func (k *KafkaConsumerGroupInput) Run(ir pipeline.InputRunner, h pipeline.PluginHelper) (err error) {
	sRunner := ir.NewSplitterRunner("")

	defer func() {
		if err := k.consumer.Close(); err != nil {
			k.ir.LogError(fmt.Errorf("error closing the consumer: %s", err.Error()))
		}
		sRunner.Done()
	}()
	k.ir = ir

	go func() {
		for err := range k.consumer.Errors() {
			atomic.AddInt64(&k.processMessageFailures, 1)
			ir.LogError(err)
		}
	}()

	var (
		hostname = k.pConfig.Hostname()
		event    *sarama.ConsumerMessage
		ok       bool
		n        int
	)

	packDec := func(pack *pipeline.PipelinePack) {
		pack.Message.SetType("heka.kafka")
		pack.Message.SetLogger(k.name)
		pack.Message.SetHostname(hostname)
		k.addField(pack, "Key", event.Key, "")
		k.addField(pack, "Topic", event.Topic, "")
		k.addField(pack, "Partition", event.Partition, "")
		k.addField(pack, "Offset", event.Offset, "")
	}
	if !sRunner.UseMsgBytes() {
		sRunner.SetPackDecorator(packDec)
	}

	offsets := make(map[string]map[int32]int64)
	for {
		select {
		case event, ok = <-k.consumer.Messages():
			if !ok {
				return
			}

			if offsets[event.Topic] == nil {
				offsets[event.Topic] = make(map[int32]int64)
			}

			if offsets[event.Topic][event.Partition] != 0 && offsets[event.Topic][event.Partition] != event.Offset-1 {
				ir.LogError(fmt.Errorf("unexpected offset on %s:%d. Expected %d, found %d, diff %d.\n",
					event.Topic, event.Partition,
					offsets[event.Topic][event.Partition]+1, event.Offset,
					event.Offset-offsets[event.Topic][event.Partition]+1))
			}

			atomic.AddInt64(&k.processMessageCount, 1)

			if n, err = sRunner.SplitBytes(event.Value, nil); err != nil {
				ir.LogError(fmt.Errorf("processing message from topic %s: %s",
					event.Topic, err))
			}
			if n > 0 && n != len(event.Value) {
				ir.LogError(fmt.Errorf("extra data dropped in message from topic %s",
					event.Topic))
			}

			offsets[event.Topic][event.Partition] = event.Offset
			k.consumer.CommitUpto(event)
		case <-k.stopChan:
			return
		}
	}
}