func (p *ProspectorLog) Run() { logp.Debug("prospector", "Start next scan") p.scan() // It is important that a first scan is run before cleanup to make sure all new states are read first if p.config.CleanInactive > 0 || p.config.CleanRemoved { beforeCount := p.Prospector.states.Count() cleanedStates := p.Prospector.states.Cleanup() logp.Debug("prospector", "Prospector states cleaned up. Before: %d, After: %d", beforeCount, beforeCount-cleanedStates) } // Marking removed files to be cleaned up. Cleanup happens after next scan to make sure all states are updated first if p.config.CleanRemoved { for _, state := range p.Prospector.states.GetStates() { // os.Stat will return an error in case the file does not exist _, err := os.Stat(state.Source) if err != nil { // Only clean up files where state is Finished if state.Finished { state.TTL = 0 err := p.Prospector.updateState(input.NewEvent(state)) if err != nil { logp.Err("File cleanup state update error: %s", err) } logp.Debug("prospector", "Remove state for file as file removed: %s", state.Source) } else { logp.Debug("prospector", "State for file not removed because not finished: %s", state.Source) } } } } }
// handleIgnoreOlder handles states which fall under ignore older // Based on the state information it is decided if the state information has to be updated or not func (p *ProspectorLog) handleIgnoreOlder(lastState, newState file.State) error { logp.Debug("prospector", "Ignore file because ignore_older reached: %s", newState.Source) if !lastState.IsEmpty() { if !lastState.Finished { logp.Info("File is falling under ignore_older before harvesting is finished. Adjust your close_* settings: %s", newState.Source) } // Old state exist, no need to update it return nil } // Make sure file is not falling under clean_inactive yet if p.isCleanInactive(newState) { logp.Debug("prospector", "Do not write state for ignore_older because clean_inactive reached") return nil } // Set offset to end of file to be consistent with files which were harvested before // See https://github.com/elastic/beats/pull/2907 newState.Offset = newState.Fileinfo.Size() // Write state for ignore_older file as none exists yet newState.Finished = true err := p.Prospector.updateState(input.NewEvent(newState)) if err != nil { return err } return nil }
func (p *ProspectorLog) Init() { logp.Debug("prospector", "exclude_files: %s", p.config.ExcludeFiles) logp.Info("Load previous states from registry into memory") fileStates := p.Prospector.states.GetStates() // Make sure all states are set as finished for _, state := range fileStates { // Check if state source belongs to this prospector. If yes, update the state. if p.hasFile(state.Source) { // Set all states again to infinity TTL to make sure only removed if config still same // clean_inactive / clean_removed could have been changed between restarts state.TTL = -1 // Update prospector states and send new states to registry err := p.Prospector.updateState(input.NewEvent(state)) if err != nil { logp.Err("Problem putting initial state: %+v", err) } } else { // Only update internal state, do not report it to registry // Having all states could be useful in case later a file is moved into this prospector // TODO: Think about if this is expected or unexpected p.Prospector.states.Update(state) } } p.lastClean = time.Now() logp.Info("Previous states loaded: %v", p.Prospector.states.Count()) }
// handleIgnoreOlder handles states which fall under ignore older // Based on the state information it is decided if the state information has to be updated or not func (p *ProspectorLog) handleIgnoreOlder(lastState, newState file.State) error { logp.Debug("prospector", "Ignore file because ignore_older reached: %s", newState.Source) if !lastState.IsEmpty() { if !lastState.Finished { logp.Info("File is falling under ignore_older before harvesting is finished. Adjust your close_* settings: %s", newState.Source) } // Old state exist, no need to update it return nil } // Make sure file is not falling under clean_inactive yet if p.isCleanInactive(newState) { logp.Debug("prospector", "Do not write state for ignore_older because clean_inactive reached") return nil } // Write state for ignore_older file as none exists yet newState.Finished = true err := p.Prospector.updateState(input.NewEvent(newState)) if err != nil { return err } return nil }
// harvestExistingFile continues harvesting a file with a known state if needed func (p *ProspectorLog) harvestExistingFile(newState file.State, oldState file.State) { logp.Debug("prospector", "Update existing file for harvesting: %s, offset: %v", newState.Source, oldState.Offset) // No harvester is running for the file, start a new harvester // It is important here that only the size is checked and not modification time, as modification time could be incorrect on windows // https://blogs.technet.microsoft.com/asiasupp/2010/12/14/file-date-modified-property-are-not-updating-while-modifying-a-file-without-closing-it/ if oldState.Finished && newState.Fileinfo.Size() > oldState.Offset { // Resume harvesting of an old file we've stopped harvesting from // This could also be an issue with force_close_older that a new harvester is started after each scan but not needed? // One problem with comparing modTime is that it is in seconds, and scans can happen more then once a second logp.Debug("prospector", "Resuming harvesting of file: %s, offset: %v", newState.Source, oldState.Offset) err := p.Prospector.startHarvester(newState, oldState.Offset) if err != nil { logp.Err("Harvester could not be started on existing file: %s, Err: %s", newState.Source, err) } return } // File size was reduced -> truncated file if oldState.Finished && newState.Fileinfo.Size() < oldState.Offset { logp.Debug("prospector", "Old file was truncated. Starting from the beginning: %s", newState.Source) err := p.Prospector.startHarvester(newState, 0) if err != nil { logp.Err("Harvester could not be started on truncated file: %s, Err: %s", newState.Source, err) } filesTrucated.Add(1) return } // Check if file was renamed if oldState.Source != "" && oldState.Source != newState.Source { // This does not start a new harvester as it is assume that the older harvester is still running // or no new lines were detected. It sends only an event status update to make sure the new name is persisted. logp.Debug("prospector", "File rename was detected: %s -> %s, Current offset: %v", oldState.Source, newState.Source, oldState.Offset) if oldState.Finished { logp.Debug("prospector", "Updating state for renamed file: %s -> %s, Current offset: %v", oldState.Source, newState.Source, oldState.Offset) // Update state because of file rotation oldState.Source = newState.Source event := input.NewEvent(oldState) err := p.Prospector.updateState(event) if err != nil { logp.Err("File rotation state update error: %s", err) } filesRenamed.Add(1) } else { logp.Debug("prospector", "File rename detected but harvester not finished yet.") } } if !oldState.Finished { // Nothing to do. Harvester is still running and file was not renamed logp.Debug("prospector", "Harvester for file is still running: %s", newState.Source) } else { logp.Debug("prospector", "File didn't change: %s", newState.Source) } }
func (p *ProspectorLog) Run() { logp.Debug("prospector", "Start next scan") p.scan() // It is important that a first scan is run before cleanup to make sure all new states are read first if p.config.CleanInactive > 0 || p.config.CleanRemoved { beforeCount := p.Prospector.states.Count() p.Prospector.states.Cleanup() logp.Debug("prospector", "Prospector states cleaned up. Before: %d, After: %d", beforeCount, p.Prospector.states.Count()) } // Cleanup of removed files will only happen after next scan. Otherwise it can happen that not all states // were updated before cleanup is called if p.config.CleanRemoved { for _, state := range p.Prospector.states.GetStates() { // os.Stat will return an error in case the file does not exist _, err := os.Stat(state.Source) if err != nil { state.TTL = 0 event := input.NewEvent(state) p.Prospector.harvesterChan <- event logp.Debug("prospector", "Remove state for file as file removed: %s", state.Source) } } } }
// startHarvester starts a new harvester with the given offset // In case the HarvesterLimit is reached, an error is returned func (p *Prospector) startHarvester(state file.State, offset int64) error { if p.config.HarvesterLimit > 0 && atomic.LoadUint64(&p.harvesterCounter) >= p.config.HarvesterLimit { harvesterSkipped.Add(1) return fmt.Errorf("Harvester limit reached.") } state.Offset = offset // Set state to "not" finished to indicate that a harvester is running state.Finished = false // Create harvester with state h, err := p.createHarvester(state) if err != nil { return err } reader, err := h.Setup() if err != nil { return fmt.Errorf("Error setting up harvester: %s", err) } // State is directly updated and not through channel to make state update immediate // State is only updated after setup is completed successfully err = p.updateState(input.NewEvent(state)) if err != nil { return err } p.wg.Add(1) // startHarvester is not run concurrently, but atomic operations are need for the decrementing of the counter // inside the following go routine atomic.AddUint64(&p.harvesterCounter, 1) go func() { defer func() { atomic.AddUint64(&p.harvesterCounter, ^uint64(0)) p.wg.Done() }() // Starts harvester and picks the right type. In case type is not set, set it to defeault (log) h.Harvest(reader) }() return nil }
// Init sets up the prospector // It goes through all states coming from the registry. Only the states which match the glob patterns of // the prospector will be loaded and updated. All other states will not be touched. func (p *ProspectorLog) Init(states []file.State) error { logp.Debug("prospector", "exclude_files: %s", p.config.ExcludeFiles) for _, state := range states { // Check if state source belongs to this prospector. If yes, update the state. if p.matchesFile(state.Source) { state.TTL = -1 // Update prospector states and send new states to registry err := p.Prospector.updateState(input.NewEvent(state)) if err != nil { logp.Err("Problem putting initial state: %+v", err) return err } } } logp.Info("Prospector with previous states loaded: %v", p.Prospector.states.Count()) return nil }
// Harvest reads files line by line and sends events to the defined output func (h *Harvester) Harvest(r reader.Reader) { harvesterStarted.Add(1) harvesterRunning.Add(1) defer harvesterRunning.Add(-1) // Makes sure file is properly closed when the harvester is stopped defer h.close() // Channel to stop internal harvester routines harvestDone := make(chan struct{}) defer close(harvestDone) // Closes reader after timeout or when done channel is closed // This routine is also responsible to properly stop the reader go func() { var closeTimeout <-chan time.Time if h.config.CloseTimeout > 0 { closeTimeout = time.After(h.config.CloseTimeout) } select { // Applies when timeout is reached case <-closeTimeout: logp.Info("Closing harvester because close_timeout was reached: %s", h.state.Source) // Required for shutdown when hanging inside reader case <-h.done: // Required when reader loop returns and reader finished case <-harvestDone: } h.fileReader.Close() }() logp.Info("Harvester started for file: %s", h.state.Source) for { select { case <-h.done: return default: } message, err := r.Next() if err != nil { switch err { case ErrFileTruncate: logp.Info("File was truncated. Begin reading file from offset 0: %s", h.state.Source) h.state.Offset = 0 filesTruncated.Add(1) case ErrRemoved: logp.Info("File was removed: %s. Closing because close_removed is enabled.", h.state.Source) case ErrRenamed: logp.Info("File was renamed: %s. Closing because close_renamed is enabled.", h.state.Source) case ErrClosed: logp.Info("Reader was closed: %s. Closing.", h.state.Source) case io.EOF: logp.Info("End of file reached: %s. Closing because close_eof is enabled.", h.state.Source) case ErrInactive: logp.Info("File is inactive: %s. Closing because close_inactive of %v reached.", h.state.Source, h.config.CloseInactive) default: logp.Err("Read line error: %s; File: ", err, h.state.Source) } return } // Strip UTF-8 BOM if beginning of file // As all BOMS are converted to UTF-8 it is enough to only remove this one if h.state.Offset == 0 { message.Content = bytes.Trim(message.Content, "\xef\xbb\xbf") } // Update offset h.state.Offset += int64(message.Bytes) // Create state event event := input.NewEvent(h.getState()) text := string(message.Content) // Check if data should be added to event. Only export non empty events. if !message.IsEmpty() && h.shouldExportLine(text) { event.ReadTime = message.Ts event.Bytes = message.Bytes event.Text = &text event.JSONFields = message.Fields event.EventMetadata = h.config.EventMetadata event.InputType = h.config.InputType event.DocumentType = h.config.DocumentType event.JSONConfig = h.config.JSON } // Always send event to update state, also if lines was skipped // Stop harvester in case of an error if !h.sendEvent(event) { return } } }
// sendStateUpdate send an empty event with the current state to update the registry func (h *Harvester) sendStateUpdate() bool { logp.Debug("harvester", "Update state: %s, offset: %v", h.state.Source, h.state.Offset) event := input.NewEvent(h.getState()) return h.sendEvent(event) }
// Log harvester reads files line by line and sends events to the defined output func (h *Harvester) Harvest() { harvesterStarted.Add(1) harvesterRunning.Add(1) defer harvesterRunning.Add(-1) h.state.Finished = false // Makes sure file is properly closed when the harvester is stopped defer h.close() err := h.open() if err != nil { logp.Err("Stop Harvesting. Unexpected file opening error: %s", err) return } logp.Info("Harvester started for file: %s", h.state.Source) r, err := h.newLogFileReader() if err != nil { logp.Err("Stop Harvesting. Unexpected encoding line reader error: %s", err) return } // Always report the state before starting a harvester // This is useful in case the file was renamed if !h.sendStateUpdate() { return } for { select { case <-h.done: return default: } message, err := r.Next() if err != nil { switch err { case ErrFileTruncate: logp.Info("File was truncated. Begin reading file from offset 0: %s", h.state.Source) h.state.Offset = 0 filesTruncated.Add(1) case ErrRemoved: logp.Info("File was removed: %s. Closing because close_removed is enabled.", h.state.Source) case ErrRenamed: logp.Info("File was renamed: %s. Closing because close_renamed is enabled.", h.state.Source) case ErrClosed: logp.Info("Reader was closed: %s. Closing.", h.state.Source) case io.EOF: logp.Info("End of file reached: %s. Closing because close_eof is enabled.", h.state.Source) default: logp.Info("Read line error: %s", err) } return } // Update offset h.state.Offset += int64(message.Bytes) // Create state event event := input.NewEvent(h.getState()) text := string(message.Content) // Check if data should be added to event. Only export non empty events. if !message.IsEmpty() && h.shouldExportLine(text) { event.ReadTime = message.Ts event.Bytes = message.Bytes event.Text = &text event.JSONFields = message.Fields event.EventMetadata = h.config.EventMetadata event.InputType = h.config.InputType event.DocumentType = h.config.DocumentType event.JSONConfig = h.config.JSON } // Always send event to update state, also if lines was skipped // Stop harvester in case of an error if !h.sendEvent(event) { return } } }
// Harvest reads files line by line and sends events to the defined output func (h *Harvester) Harvest(r reader.Reader) { harvesterStarted.Add(1) harvesterRunning.Add(1) defer harvesterRunning.Add(-1) // Makes sure file is properly closed when the harvester is stopped defer h.close() logp.Info("Harvester started for file: %s", h.state.Source) for { select { case <-h.done: return default: } message, err := r.Next() if err != nil { switch err { case ErrFileTruncate: logp.Info("File was truncated. Begin reading file from offset 0: %s", h.state.Source) h.state.Offset = 0 filesTruncated.Add(1) case ErrRemoved: logp.Info("File was removed: %s. Closing because close_removed is enabled.", h.state.Source) case ErrRenamed: logp.Info("File was renamed: %s. Closing because close_renamed is enabled.", h.state.Source) case ErrClosed: logp.Info("Reader was closed: %s. Closing.", h.state.Source) case io.EOF: logp.Info("End of file reached: %s. Closing because close_eof is enabled.", h.state.Source) case ErrInactive: logp.Info("File is inactive: %s. Closing because close_inactive of %v reached.", h.state.Source, h.config.CloseInactive) default: logp.Err("Read line error: %s; File: ", err, h.state.Source) } return } // Strip UTF-8 BOM if beginning of file // As all BOMS are converted to UTF-8 it is enough to only remove this one if h.state.Offset == 0 { message.Content = bytes.Trim(message.Content, "\xef\xbb\xbf") } // Update offset h.state.Offset += int64(message.Bytes) // Create state event event := input.NewEvent(h.getState()) text := string(message.Content) // Check if data should be added to event. Only export non empty events. if !message.IsEmpty() && h.shouldExportLine(text) { event.ReadTime = message.Ts event.Bytes = message.Bytes event.Text = &text event.JSONFields = message.Fields event.EventMetadata = h.config.EventMetadata event.InputType = h.config.InputType event.DocumentType = h.config.DocumentType event.JSONConfig = h.config.JSON } // Always send event to update state, also if lines was skipped // Stop harvester in case of an error if !h.sendEvent(event) { return } } }