Ejemplo n.º 1
0
func (p *Prospector) startHarvester(info *prospectorInfo, config *core.FileConfig) {
	var offset int64

	if p.from_beginning {
		offset = 0
	} else {
		offset = info.identity.Stat().Size()
	}

	// Send a new file event to allow registrar to begin persisting for this harvester
	p.registrar_spool.Add(registrar.NewDiscoverEvent(info, info.file, offset, info.identity.Stat()))

	p.startHarvesterWithOffset(info, config, offset)
}
Ejemplo n.º 2
0
func (p *Prospector) scan(path string, config *core.FileConfig) {
	// Evaluate the path as a wildcards/shell glob
	matches, err := filepath.Glob(path)
	if err != nil {
		log.Error("glob(%s) failed: %v", path, err)
		return
	}

	// Check any matched files to see if we need to start a harvester
	for _, file := range matches {
		// Check the current info against our index
		info, is_known := p.prospectorindex[file]

		// Stat the file, following any symlinks
		// TODO: Low priority. Trigger loadFileId here for Windows instead of
		//       waiting for Harvester or Registrar to do it
		fileinfo, err := os.Stat(file)
		if err == nil {
			if fileinfo.IsDir() {
				err = newProspectorSkipError("Directory")
			}
		}

		if err != nil {
			// Do we know this entry?
			if is_known {
				if info.status != Status_Invalid {
					// The current entry is not an error, orphan it so we can log one
					info.orphaned = Orphaned_Yes
				} else if info.err != err {
					// The error is different, remove this entry we'll log a new one
					delete(p.prospectors, info)
				} else {
					// The same error occurred - don't log it again
					info.update(nil, p.iteration)
					continue
				}
			}

			// This is a new error
			info = newProspectorInfoInvalid(file, err)
			info.update(nil, p.iteration)

			// Print a friendly log message
			if _, ok := err.(*ProspectorSkipError); ok {
				log.Info("Skipping %s: %s", file, err)
			} else {
				log.Error("Error prospecting %s: %s", file, err)
			}

			p.prospectors[info] = info
			p.prospectorindex[file] = info
			continue
		} else if is_known && info.status == Status_Invalid {
			// We have an error stub and we've just successfully got fileinfo
			// Mark is_known so we treat as a new file still
			is_known = false
		}

		// Conditions for starting a new harvester:
		// - file path hasn't been seen before
		// - the file's inode or device changed
		if !is_known {
			// Check for dead time, but only if the file modification time is before the last scan started
			// This ensures we don't skip genuine creations with dead times less than 10s
			if previous, previousinfo := p.lookupFileIds(file, fileinfo); previous != "" {
				// Symlinks could mean we see the same file twice - skip if we have
				if previousinfo == nil {
					p.flagDuplicateError(file, info)
					continue
				}

				// This file was simply renamed (known inode+dev) - link the same harvester channel as the old file
				log.Info("File rename was detected: %s -> %s", previous, file)
				info = previousinfo
				info.file = file

				p.registrar_spool.Add(registrar.NewRenamedEvent(info, file))
			} else {
				// This is a new entry
				info = newProspectorInfoFromFileInfo(file, fileinfo)

				if fileinfo.ModTime().Before(p.lastscan) && time.Since(fileinfo.ModTime()) > config.DeadTime {
					// Old file, skip it, but push offset of file size so we start from the end if this file changes and needs picking up
					log.Info("Skipping file (older than dead time of %v): %s", config.DeadTime, file)

					// Store the offset that we should resume from if we notice a modification
					info.finish_offset = fileinfo.Size()
					p.registrar_spool.Add(registrar.NewDiscoverEvent(info, file, fileinfo.Size(), fileinfo))
				} else {
					// Process new file
					log.Info("Launching harvester on new file: %s", file)
					p.startHarvester(info, config)
				}
			}

			// Store the new entry
			p.prospectors[info] = info
		} else {
			if !info.identity.SameAs(fileinfo) {
				// Keep the old file in case we find it again shortly
				info.orphaned = Orphaned_Maybe

				if previous, previousinfo := p.lookupFileIds(file, fileinfo); previous != "" {
					// Symlinks could mean we see the same file twice - skip if we have
					if previousinfo == nil {
						p.flagDuplicateError(file, nil)
						continue
					}

					// This file was renamed from another file we know - link the same harvester channel as the old file
					log.Info("File rename was detected: %s -> %s", previous, file)
					info = previousinfo
					info.file = file

					p.registrar_spool.Add(registrar.NewRenamedEvent(info, file))
				} else {
					// File is not the same file we saw previously, it must have rotated and is a new file
					log.Info("Launching harvester on rotated file: %s", file)

					// Forget about the previous harvester and let it continue on the old file - so start a new channel to use with the new harvester
					info = newProspectorInfoFromFileInfo(file, fileinfo)

					// Process new file
					p.startHarvester(info, config)
				}

				// Store it
				p.prospectors[info] = info
			}
		}

		// Resume stopped harvesters
		resume := !info.isRunning()
		if resume {
			if info.status == Status_Resume {
				// This is a filestate that was saved, resume the harvester
				log.Info("Resuming harvester on a previously harvested file: %s", file)
			} else if info.status == Status_Failed {
				// Last attempt we failed to start, try again
				log.Info("Attempting to restart failed harvester: %s", file)
			} else if info.identity.Stat().ModTime() != fileinfo.ModTime() {
				// Resume harvesting of an old file we've stopped harvesting from
				log.Info("Resuming harvester on an old file that was just modified: %s", file)
			} else {
				resume = false
			}
		}

		info.update(fileinfo, p.iteration)

		if resume {
			p.startHarvesterWithOffset(info, config, info.finish_offset)
		}

		p.prospectorindex[file] = info
	} // for each file matched by the glob
}