func (p *Prospector) startHarvester(info *prospectorInfo, config *core.FileConfig) { var offset int64 if p.from_beginning { offset = 0 } else { offset = info.identity.Stat().Size() } // Send a new file event to allow registrar to begin persisting for this harvester p.registrar_spool.Add(registrar.NewDiscoverEvent(info, info.file, offset, info.identity.Stat())) p.startHarvesterWithOffset(info, config, offset) }
func (p *Prospector) scan(path string, config *core.FileConfig) { // Evaluate the path as a wildcards/shell glob matches, err := filepath.Glob(path) if err != nil { log.Error("glob(%s) failed: %v", path, err) return } // Check any matched files to see if we need to start a harvester for _, file := range matches { // Check the current info against our index info, is_known := p.prospectorindex[file] // Stat the file, following any symlinks // TODO: Low priority. Trigger loadFileId here for Windows instead of // waiting for Harvester or Registrar to do it fileinfo, err := os.Stat(file) if err == nil { if fileinfo.IsDir() { err = newProspectorSkipError("Directory") } } if err != nil { // Do we know this entry? if is_known { if info.status != Status_Invalid { // The current entry is not an error, orphan it so we can log one info.orphaned = Orphaned_Yes } else if info.err != err { // The error is different, remove this entry we'll log a new one delete(p.prospectors, info) } else { // The same error occurred - don't log it again info.update(nil, p.iteration) continue } } // This is a new error info = newProspectorInfoInvalid(file, err) info.update(nil, p.iteration) // Print a friendly log message if _, ok := err.(*ProspectorSkipError); ok { log.Info("Skipping %s: %s", file, err) } else { log.Error("Error prospecting %s: %s", file, err) } p.prospectors[info] = info p.prospectorindex[file] = info continue } else if is_known && info.status == Status_Invalid { // We have an error stub and we've just successfully got fileinfo // Mark is_known so we treat as a new file still is_known = false } // Conditions for starting a new harvester: // - file path hasn't been seen before // - the file's inode or device changed if !is_known { // Check for dead time, but only if the file modification time is before the last scan started // This ensures we don't skip genuine creations with dead times less than 10s if previous, previousinfo := p.lookupFileIds(file, fileinfo); previous != "" { // Symlinks could mean we see the same file twice - skip if we have if previousinfo == nil { p.flagDuplicateError(file, info) continue } // This file was simply renamed (known inode+dev) - link the same harvester channel as the old file log.Info("File rename was detected: %s -> %s", previous, file) info = previousinfo info.file = file p.registrar_spool.Add(registrar.NewRenamedEvent(info, file)) } else { // This is a new entry info = newProspectorInfoFromFileInfo(file, fileinfo) if fileinfo.ModTime().Before(p.lastscan) && time.Since(fileinfo.ModTime()) > config.DeadTime { // Old file, skip it, but push offset of file size so we start from the end if this file changes and needs picking up log.Info("Skipping file (older than dead time of %v): %s", config.DeadTime, file) // Store the offset that we should resume from if we notice a modification info.finish_offset = fileinfo.Size() p.registrar_spool.Add(registrar.NewDiscoverEvent(info, file, fileinfo.Size(), fileinfo)) } else { // Process new file log.Info("Launching harvester on new file: %s", file) p.startHarvester(info, config) } } // Store the new entry p.prospectors[info] = info } else { if !info.identity.SameAs(fileinfo) { // Keep the old file in case we find it again shortly info.orphaned = Orphaned_Maybe if previous, previousinfo := p.lookupFileIds(file, fileinfo); previous != "" { // Symlinks could mean we see the same file twice - skip if we have if previousinfo == nil { p.flagDuplicateError(file, nil) continue } // This file was renamed from another file we know - link the same harvester channel as the old file log.Info("File rename was detected: %s -> %s", previous, file) info = previousinfo info.file = file p.registrar_spool.Add(registrar.NewRenamedEvent(info, file)) } else { // File is not the same file we saw previously, it must have rotated and is a new file log.Info("Launching harvester on rotated file: %s", file) // Forget about the previous harvester and let it continue on the old file - so start a new channel to use with the new harvester info = newProspectorInfoFromFileInfo(file, fileinfo) // Process new file p.startHarvester(info, config) } // Store it p.prospectors[info] = info } } // Resume stopped harvesters resume := !info.isRunning() if resume { if info.status == Status_Resume { // This is a filestate that was saved, resume the harvester log.Info("Resuming harvester on a previously harvested file: %s", file) } else if info.status == Status_Failed { // Last attempt we failed to start, try again log.Info("Attempting to restart failed harvester: %s", file) } else if info.identity.Stat().ModTime() != fileinfo.ModTime() { // Resume harvesting of an old file we've stopped harvesting from log.Info("Resuming harvester on an old file that was just modified: %s", file) } else { resume = false } } info.update(fileinfo, p.iteration) if resume { p.startHarvesterWithOffset(info, config, info.finish_offset) } p.prospectorindex[file] = info } // for each file matched by the glob }