示例#1
0
// Check if harvester for new file has to be started
// For a new file the following options exist:
func (p *Prospector) checkNewFile(newinfo *harvester.FileStat, file string, output chan *input.FileEvent) {

	logp.Debug("prospector", "Start harvesting unknown file: %s", file)

	// Init harvester with info
	h, err := harvester.NewHarvester(
		p.ProspectorConfig, &p.ProspectorConfig.Harvester, file, newinfo, output)
	if err != nil {
		logp.Err("Error initializing harvester: %v", err)
		return
	}

	// Check for unmodified time, but only if the file modification time is before the last scan started
	// This ensures we don't skip genuine creations with dead times less than 10s
	if newinfo.Fileinfo.ModTime().Before(p.lastscan) &&
		time.Since(newinfo.Fileinfo.ModTime()) > p.ProspectorConfig.IgnoreOlderDuration {

		logp.Debug("prospector", "Fetching old state of file to resume: %s", file)
		// Call crawler if there if there exists a state for the given file
		offset, resuming := p.registrar.fetchState(file, newinfo.Fileinfo)

		// Are we resuming a dead file? We have to resume even if dead so we catch any old updates to the file
		// This is safe as the harvester, once it hits the EOF and a timeout, will stop harvesting
		// Once we detect changes again we can resume another harvester again - this keeps number of go routines to a minimum
		if resuming {
			logp.Debug("prospector", "Resuming harvester on a previously harvested file: %s", file)

			h.Offset = offset
			h.Start()
		} else {
			// Old file, skip it, but push offset of file size so we start from the end if this file changes and needs picking up
			logp.Debug("prospector", "Skipping file (older than ignore older of %v, %v): %s",
				p.ProspectorConfig.IgnoreOlderDuration,
				time.Since(newinfo.Fileinfo.ModTime()),
				file)
			newinfo.Skip(newinfo.Fileinfo.Size())
		}
	} else if previousFile, err := p.getPreviousFile(file, newinfo.Fileinfo); err == nil {
		// This file was simply renamed (known inode+dev) - link the same harvester channel as the old file
		logp.Debug("prospector", "File rename was detected: %s -> %s", previousFile, file)
		lastinfo := p.prospectorList[previousFile]
		newinfo.Continue(&lastinfo)
	} else {

		// Call crawler if there if there exists a state for the given file
		offset, resuming := p.registrar.fetchState(file, newinfo.Fileinfo)

		// Are we resuming a file or is this a completely new file?
		if resuming {
			logp.Debug("prospector", "Resuming harvester on a previously harvested file: %s", file)
		} else {
			logp.Debug("prospector", "Launching harvester on new file: %s", file)
		}

		// Launch the harvester
		h.Offset = offset
		h.Start()
	}
}