// Check if harvester for new file has to be started // For a new file the following options exist: func (p *Prospector) checkNewFile(newinfo *harvester.FileStat, file string, output chan *input.FileEvent) { logp.Debug("prospector", "Start harvesting unknown file: %s", file) // Init harvester with info h, err := harvester.NewHarvester( p.ProspectorConfig, &p.ProspectorConfig.Harvester, file, newinfo, output) if err != nil { logp.Err("Error initializing harvester: %v", err) return } // Check for unmodified time, but only if the file modification time is before the last scan started // This ensures we don't skip genuine creations with dead times less than 10s if newinfo.Fileinfo.ModTime().Before(p.lastscan) && time.Since(newinfo.Fileinfo.ModTime()) > p.ProspectorConfig.IgnoreOlderDuration { logp.Debug("prospector", "Fetching old state of file to resume: %s", file) // Call crawler if there if there exists a state for the given file offset, resuming := p.registrar.fetchState(file, newinfo.Fileinfo) // Are we resuming a dead file? We have to resume even if dead so we catch any old updates to the file // This is safe as the harvester, once it hits the EOF and a timeout, will stop harvesting // Once we detect changes again we can resume another harvester again - this keeps number of go routines to a minimum if resuming { logp.Debug("prospector", "Resuming harvester on a previously harvested file: %s", file) h.Offset = offset h.Start() } else { // Old file, skip it, but push offset of file size so we start from the end if this file changes and needs picking up logp.Debug("prospector", "Skipping file (older than ignore older of %v, %v): %s", p.ProspectorConfig.IgnoreOlderDuration, time.Since(newinfo.Fileinfo.ModTime()), file) newinfo.Skip(newinfo.Fileinfo.Size()) } } else if previousFile, err := p.getPreviousFile(file, newinfo.Fileinfo); err == nil { // This file was simply renamed (known inode+dev) - link the same harvester channel as the old file logp.Debug("prospector", "File rename was detected: %s -> %s", previousFile, file) lastinfo := p.prospectorList[previousFile] newinfo.Continue(&lastinfo) } else { // Call crawler if there if there exists a state for the given file offset, resuming := p.registrar.fetchState(file, newinfo.Fileinfo) // Are we resuming a file or is this a completely new file? if resuming { logp.Debug("prospector", "Resuming harvester on a previously harvested file: %s", file) } else { logp.Debug("prospector", "Launching harvester on new file: %s", file) } // Launch the harvester h.Offset = offset h.Start() } }