Example #1
0
func (p *ProspectorLog) Run() {
	logp.Debug("prospector", "Start next scan")

	p.scan()

	// It is important that a first scan is run before cleanup to make sure all new states are read first
	if p.config.CleanInactive > 0 || p.config.CleanRemoved {
		beforeCount := p.Prospector.states.Count()
		cleanedStates := p.Prospector.states.Cleanup()
		logp.Debug("prospector", "Prospector states cleaned up. Before: %d, After: %d", beforeCount, beforeCount-cleanedStates)
	}

	// Marking removed files to be cleaned up. Cleanup happens after next scan to make sure all states are updated first
	if p.config.CleanRemoved {
		for _, state := range p.Prospector.states.GetStates() {
			// os.Stat will return an error in case the file does not exist
			_, err := os.Stat(state.Source)
			if err != nil {
				// Only clean up files where state is Finished
				if state.Finished {
					state.TTL = 0
					err := p.Prospector.updateState(input.NewEvent(state))
					if err != nil {
						logp.Err("File cleanup state update error: %s", err)
					}
					logp.Debug("prospector", "Remove state for file as file removed: %s", state.Source)
				} else {
					logp.Debug("prospector", "State for file not removed because not finished: %s", state.Source)
				}
			}
		}
	}
}
Example #2
0
// handleIgnoreOlder handles states which fall under ignore older
// Based on the state information it is decided if the state information has to be updated or not
func (p *ProspectorLog) handleIgnoreOlder(lastState, newState file.State) error {
	logp.Debug("prospector", "Ignore file because ignore_older reached: %s", newState.Source)

	if !lastState.IsEmpty() {
		if !lastState.Finished {
			logp.Info("File is falling under ignore_older before harvesting is finished. Adjust your close_* settings: %s", newState.Source)
		}
		// Old state exist, no need to update it
		return nil
	}

	// Make sure file is not falling under clean_inactive yet
	if p.isCleanInactive(newState) {
		logp.Debug("prospector", "Do not write state for ignore_older because clean_inactive reached")
		return nil
	}

	// Set offset to end of file to be consistent with files which were harvested before
	// See https://github.com/elastic/beats/pull/2907
	newState.Offset = newState.Fileinfo.Size()

	// Write state for ignore_older file as none exists yet
	newState.Finished = true
	err := p.Prospector.updateState(input.NewEvent(newState))
	if err != nil {
		return err
	}

	return nil
}
Example #3
0
func (p *ProspectorLog) Init() {
	logp.Debug("prospector", "exclude_files: %s", p.config.ExcludeFiles)

	logp.Info("Load previous states from registry into memory")
	fileStates := p.Prospector.states.GetStates()

	// Make sure all states are set as finished
	for _, state := range fileStates {

		// Check if state source belongs to this prospector. If yes, update the state.
		if p.hasFile(state.Source) {
			// Set all states again to infinity TTL to make sure only removed if config still same
			// clean_inactive / clean_removed could have been changed between restarts
			state.TTL = -1

			// Update prospector states and send new states to registry
			err := p.Prospector.updateState(input.NewEvent(state))
			if err != nil {
				logp.Err("Problem putting initial state: %+v", err)
			}
		} else {
			// Only update internal state, do not report it to registry
			// Having all states could be useful in case later a file is moved into this prospector
			// TODO: Think about if this is expected or unexpected
			p.Prospector.states.Update(state)
		}
	}

	p.lastClean = time.Now()

	logp.Info("Previous states loaded: %v", p.Prospector.states.Count())
}
Example #4
0
// handleIgnoreOlder handles states which fall under ignore older
// Based on the state information it is decided if the state information has to be updated or not
func (p *ProspectorLog) handleIgnoreOlder(lastState, newState file.State) error {
	logp.Debug("prospector", "Ignore file because ignore_older reached: %s", newState.Source)

	if !lastState.IsEmpty() {
		if !lastState.Finished {
			logp.Info("File is falling under ignore_older before harvesting is finished. Adjust your close_* settings: %s", newState.Source)
		}
		// Old state exist, no need to update it
		return nil
	}

	// Make sure file is not falling under clean_inactive yet
	if p.isCleanInactive(newState) {
		logp.Debug("prospector", "Do not write state for ignore_older because clean_inactive reached")
		return nil
	}

	// Write state for ignore_older file as none exists yet
	newState.Finished = true
	err := p.Prospector.updateState(input.NewEvent(newState))
	if err != nil {
		return err
	}

	return nil
}
Example #5
0
// harvestExistingFile continues harvesting a file with a known state if needed
func (p *ProspectorLog) harvestExistingFile(newState file.State, oldState file.State) {

	logp.Debug("prospector", "Update existing file for harvesting: %s, offset: %v", newState.Source, oldState.Offset)

	// No harvester is running for the file, start a new harvester
	// It is important here that only the size is checked and not modification time, as modification time could be incorrect on windows
	// https://blogs.technet.microsoft.com/asiasupp/2010/12/14/file-date-modified-property-are-not-updating-while-modifying-a-file-without-closing-it/
	if oldState.Finished && newState.Fileinfo.Size() > oldState.Offset {
		// Resume harvesting of an old file we've stopped harvesting from
		// This could also be an issue with force_close_older that a new harvester is started after each scan but not needed?
		// One problem with comparing modTime is that it is in seconds, and scans can happen more then once a second
		logp.Debug("prospector", "Resuming harvesting of file: %s, offset: %v", newState.Source, oldState.Offset)
		err := p.Prospector.startHarvester(newState, oldState.Offset)
		if err != nil {
			logp.Err("Harvester could not be started on existing file: %s, Err: %s", newState.Source, err)
		}
		return
	}

	// File size was reduced -> truncated file
	if oldState.Finished && newState.Fileinfo.Size() < oldState.Offset {
		logp.Debug("prospector", "Old file was truncated. Starting from the beginning: %s", newState.Source)
		err := p.Prospector.startHarvester(newState, 0)
		if err != nil {
			logp.Err("Harvester could not be started on truncated file: %s, Err: %s", newState.Source, err)
		}

		filesTrucated.Add(1)
		return
	}

	// Check if file was renamed
	if oldState.Source != "" && oldState.Source != newState.Source {
		// This does not start a new harvester as it is assume that the older harvester is still running
		// or no new lines were detected. It sends only an event status update to make sure the new name is persisted.
		logp.Debug("prospector", "File rename was detected: %s -> %s, Current offset: %v", oldState.Source, newState.Source, oldState.Offset)

		if oldState.Finished {
			logp.Debug("prospector", "Updating state for renamed file: %s -> %s, Current offset: %v", oldState.Source, newState.Source, oldState.Offset)
			// Update state because of file rotation
			oldState.Source = newState.Source
			event := input.NewEvent(oldState)
			err := p.Prospector.updateState(event)
			if err != nil {
				logp.Err("File rotation state update error: %s", err)
			}

			filesRenamed.Add(1)
		} else {
			logp.Debug("prospector", "File rename detected but harvester not finished yet.")
		}
	}

	if !oldState.Finished {
		// Nothing to do. Harvester is still running and file was not renamed
		logp.Debug("prospector", "Harvester for file is still running: %s", newState.Source)
	} else {
		logp.Debug("prospector", "File didn't change: %s", newState.Source)
	}
}
Example #6
0
func (p *ProspectorLog) Run() {
	logp.Debug("prospector", "Start next scan")

	p.scan()

	// It is important that a first scan is run before cleanup to make sure all new states are read first
	if p.config.CleanInactive > 0 || p.config.CleanRemoved {
		beforeCount := p.Prospector.states.Count()
		p.Prospector.states.Cleanup()
		logp.Debug("prospector", "Prospector states cleaned up. Before: %d, After: %d", beforeCount, p.Prospector.states.Count())
	}

	// Cleanup of removed files will only happen after next scan. Otherwise it can happen that not all states
	// were updated before cleanup is called
	if p.config.CleanRemoved {
		for _, state := range p.Prospector.states.GetStates() {
			// os.Stat will return an error in case the file does not exist
			_, err := os.Stat(state.Source)
			if err != nil {
				state.TTL = 0
				event := input.NewEvent(state)
				p.Prospector.harvesterChan <- event
				logp.Debug("prospector", "Remove state for file as file removed: %s", state.Source)
			}
		}
	}
}
Example #7
0
// startHarvester starts a new harvester with the given offset
// In case the HarvesterLimit is reached, an error is returned
func (p *Prospector) startHarvester(state file.State, offset int64) error {

	if p.config.HarvesterLimit > 0 && atomic.LoadUint64(&p.harvesterCounter) >= p.config.HarvesterLimit {
		harvesterSkipped.Add(1)
		return fmt.Errorf("Harvester limit reached.")
	}

	state.Offset = offset
	// Set state to "not" finished to indicate that a harvester is running
	state.Finished = false

	// Create harvester with state
	h, err := p.createHarvester(state)
	if err != nil {
		return err
	}

	reader, err := h.Setup()
	if err != nil {
		return fmt.Errorf("Error setting up harvester: %s", err)
	}

	// State is directly updated and not through channel to make state update immediate
	// State is only updated after setup is completed successfully
	err = p.updateState(input.NewEvent(state))
	if err != nil {
		return err
	}

	p.wg.Add(1)
	// startHarvester is not run concurrently, but atomic operations are need for the decrementing of the counter
	// inside the following go routine
	atomic.AddUint64(&p.harvesterCounter, 1)
	go func() {
		defer func() {
			atomic.AddUint64(&p.harvesterCounter, ^uint64(0))
			p.wg.Done()
		}()

		// Starts harvester and picks the right type. In case type is not set, set it to defeault (log)
		h.Harvest(reader)
	}()

	return nil
}
Example #8
0
// Init sets up the prospector
// It goes through all states coming from the registry. Only the states which match the glob patterns of
// the prospector will be loaded and updated. All other states will not be touched.
func (p *ProspectorLog) Init(states []file.State) error {
	logp.Debug("prospector", "exclude_files: %s", p.config.ExcludeFiles)

	for _, state := range states {
		// Check if state source belongs to this prospector. If yes, update the state.
		if p.matchesFile(state.Source) {
			state.TTL = -1
			// Update prospector states and send new states to registry
			err := p.Prospector.updateState(input.NewEvent(state))
			if err != nil {
				logp.Err("Problem putting initial state: %+v", err)
				return err
			}
		}
	}

	logp.Info("Prospector with previous states loaded: %v", p.Prospector.states.Count())
	return nil
}
Example #9
0
// Harvest reads files line by line and sends events to the defined output
func (h *Harvester) Harvest(r reader.Reader) {

	harvesterStarted.Add(1)
	harvesterRunning.Add(1)
	defer harvesterRunning.Add(-1)

	// Makes sure file is properly closed when the harvester is stopped
	defer h.close()

	// Channel to stop internal harvester routines
	harvestDone := make(chan struct{})
	defer close(harvestDone)

	// Closes reader after timeout or when done channel is closed
	// This routine is also responsible to properly stop the reader
	go func() {
		var closeTimeout <-chan time.Time
		if h.config.CloseTimeout > 0 {
			closeTimeout = time.After(h.config.CloseTimeout)
		}

		select {
		// Applies when timeout is reached
		case <-closeTimeout:
			logp.Info("Closing harvester because close_timeout was reached: %s", h.state.Source)
		// Required for shutdown when hanging inside reader
		case <-h.done:
		// Required when reader loop returns and reader finished
		case <-harvestDone:
		}
		h.fileReader.Close()
	}()

	logp.Info("Harvester started for file: %s", h.state.Source)

	for {
		select {
		case <-h.done:
			return
		default:
		}

		message, err := r.Next()
		if err != nil {
			switch err {
			case ErrFileTruncate:
				logp.Info("File was truncated. Begin reading file from offset 0: %s", h.state.Source)
				h.state.Offset = 0
				filesTruncated.Add(1)
			case ErrRemoved:
				logp.Info("File was removed: %s. Closing because close_removed is enabled.", h.state.Source)
			case ErrRenamed:
				logp.Info("File was renamed: %s. Closing because close_renamed is enabled.", h.state.Source)
			case ErrClosed:
				logp.Info("Reader was closed: %s. Closing.", h.state.Source)
			case io.EOF:
				logp.Info("End of file reached: %s. Closing because close_eof is enabled.", h.state.Source)
			case ErrInactive:
				logp.Info("File is inactive: %s. Closing because close_inactive of %v reached.", h.state.Source, h.config.CloseInactive)
			default:
				logp.Err("Read line error: %s; File: ", err, h.state.Source)
			}
			return
		}

		// Strip UTF-8 BOM if beginning of file
		// As all BOMS are converted to UTF-8 it is enough to only remove this one
		if h.state.Offset == 0 {
			message.Content = bytes.Trim(message.Content, "\xef\xbb\xbf")
		}

		// Update offset
		h.state.Offset += int64(message.Bytes)

		// Create state event
		event := input.NewEvent(h.getState())

		text := string(message.Content)

		// Check if data should be added to event. Only export non empty events.
		if !message.IsEmpty() && h.shouldExportLine(text) {
			event.ReadTime = message.Ts
			event.Bytes = message.Bytes
			event.Text = &text
			event.JSONFields = message.Fields
			event.EventMetadata = h.config.EventMetadata
			event.InputType = h.config.InputType
			event.DocumentType = h.config.DocumentType
			event.JSONConfig = h.config.JSON
		}

		// Always send event to update state, also if lines was skipped
		// Stop harvester in case of an error
		if !h.sendEvent(event) {
			return
		}
	}
}
Example #10
0
// sendStateUpdate send an empty event with the current state to update the registry
func (h *Harvester) sendStateUpdate() bool {
	logp.Debug("harvester", "Update state: %s, offset: %v", h.state.Source, h.state.Offset)
	event := input.NewEvent(h.getState())
	return h.sendEvent(event)
}
Example #11
0
// Log harvester reads files line by line and sends events to the defined output
func (h *Harvester) Harvest() {

	harvesterStarted.Add(1)
	harvesterRunning.Add(1)
	defer harvesterRunning.Add(-1)

	h.state.Finished = false

	// Makes sure file is properly closed when the harvester is stopped
	defer h.close()

	err := h.open()
	if err != nil {
		logp.Err("Stop Harvesting. Unexpected file opening error: %s", err)
		return
	}

	logp.Info("Harvester started for file: %s", h.state.Source)

	r, err := h.newLogFileReader()
	if err != nil {
		logp.Err("Stop Harvesting. Unexpected encoding line reader error: %s", err)
		return
	}

	// Always report the state before starting a harvester
	// This is useful in case the file was renamed
	if !h.sendStateUpdate() {
		return
	}

	for {
		select {
		case <-h.done:
			return
		default:
		}

		message, err := r.Next()
		if err != nil {
			switch err {
			case ErrFileTruncate:
				logp.Info("File was truncated. Begin reading file from offset 0: %s", h.state.Source)
				h.state.Offset = 0
				filesTruncated.Add(1)
			case ErrRemoved:
				logp.Info("File was removed: %s. Closing because close_removed is enabled.", h.state.Source)
			case ErrRenamed:
				logp.Info("File was renamed: %s. Closing because close_renamed is enabled.", h.state.Source)
			case ErrClosed:
				logp.Info("Reader was closed: %s. Closing.", h.state.Source)
			case io.EOF:
				logp.Info("End of file reached: %s. Closing because close_eof is enabled.", h.state.Source)
			default:
				logp.Info("Read line error: %s", err)
			}
			return
		}

		// Update offset
		h.state.Offset += int64(message.Bytes)

		// Create state event
		event := input.NewEvent(h.getState())

		text := string(message.Content)

		// Check if data should be added to event. Only export non empty events.
		if !message.IsEmpty() && h.shouldExportLine(text) {
			event.ReadTime = message.Ts
			event.Bytes = message.Bytes
			event.Text = &text
			event.JSONFields = message.Fields
			event.EventMetadata = h.config.EventMetadata
			event.InputType = h.config.InputType
			event.DocumentType = h.config.DocumentType
			event.JSONConfig = h.config.JSON
		}

		// Always send event to update state, also if lines was skipped
		// Stop harvester in case of an error
		if !h.sendEvent(event) {
			return
		}
	}
}
Example #12
0
// Harvest reads files line by line and sends events to the defined output
func (h *Harvester) Harvest(r reader.Reader) {

	harvesterStarted.Add(1)
	harvesterRunning.Add(1)
	defer harvesterRunning.Add(-1)

	// Makes sure file is properly closed when the harvester is stopped
	defer h.close()

	logp.Info("Harvester started for file: %s", h.state.Source)

	for {
		select {
		case <-h.done:
			return
		default:
		}

		message, err := r.Next()
		if err != nil {
			switch err {
			case ErrFileTruncate:
				logp.Info("File was truncated. Begin reading file from offset 0: %s", h.state.Source)
				h.state.Offset = 0
				filesTruncated.Add(1)
			case ErrRemoved:
				logp.Info("File was removed: %s. Closing because close_removed is enabled.", h.state.Source)
			case ErrRenamed:
				logp.Info("File was renamed: %s. Closing because close_renamed is enabled.", h.state.Source)
			case ErrClosed:
				logp.Info("Reader was closed: %s. Closing.", h.state.Source)
			case io.EOF:
				logp.Info("End of file reached: %s. Closing because close_eof is enabled.", h.state.Source)
			case ErrInactive:
				logp.Info("File is inactive: %s. Closing because close_inactive of %v reached.", h.state.Source, h.config.CloseInactive)
			default:
				logp.Err("Read line error: %s; File: ", err, h.state.Source)
			}
			return
		}

		// Strip UTF-8 BOM if beginning of file
		// As all BOMS are converted to UTF-8 it is enough to only remove this one
		if h.state.Offset == 0 {
			message.Content = bytes.Trim(message.Content, "\xef\xbb\xbf")
		}

		// Update offset
		h.state.Offset += int64(message.Bytes)

		// Create state event
		event := input.NewEvent(h.getState())

		text := string(message.Content)

		// Check if data should be added to event. Only export non empty events.
		if !message.IsEmpty() && h.shouldExportLine(text) {
			event.ReadTime = message.Ts
			event.Bytes = message.Bytes
			event.Text = &text
			event.JSONFields = message.Fields
			event.EventMetadata = h.config.EventMetadata
			event.InputType = h.config.InputType
			event.DocumentType = h.config.DocumentType
			event.JSONConfig = h.config.JSON
		}

		// Always send event to update state, also if lines was skipped
		// Stop harvester in case of an error
		if !h.sendEvent(event) {
			return
		}
	}
}