Example #1
0
// checkExistingFile checks if a harvester has to be started for a already known file
// For existing files the following options exist:
// * Last reading position is 0, no harvester has to be started as old harvester probably still busy
// * The old known modification time is older then the current one. Start at last known position
// * The new file is not the same as the old file, means file was renamed
// ** New file is actually really a new file, start a new harvester
// ** Renamed file has a state, continue there
func (p ProspectorLog) checkExistingFile(h *harvester.Harvester, newFile *input.File, oldFile *input.File) {

	logp.Debug("prospector", "Update existing file for harvesting: %s", h.Path)

	// We assume it is the same file, but it wasn't
	if !oldFile.IsSameFile(newFile) {

		logp.Debug("prospector", "File previously found: %s", h.Path)

		if previousFile, err := p.getPreviousFile(h.Path, h.Stat.Fileinfo); err == nil {
			p.continueExistingFile(h, previousFile)
		} else {
			// File is not the same file we saw previously, it must have rotated and is a new file
			logp.Debug("prospector", "Launching harvester on rotated file: %s", h.Path)

			// Forget about the previous harvester and let it continue on the old file - so start a new channel to use with the new harvester
			h.Stat.Ignore()

			// Start a new harvester on the path
			h.Start()
		}

		// Keep the old file in missingFiles so we don't rescan it if it was renamed and we've not yet reached the new filename
		// We only need to keep it for the remainder of this iteration then we can assume it was deleted and forget about it
		p.missingFiles[h.Path] = oldFile.FileInfo

	} else if h.Stat.Finished() && oldFile.FileInfo.ModTime() != h.Stat.Fileinfo.ModTime() {
		// Resume harvesting of an old file we've stopped harvesting from
		// Start a harvester on the path; a file was just modified and it doesn't have a harvester
		// The offset to continue from will be stored in the harvester channel - so take that to use and also clear the channel
		p.resumeHarvesting(h, <-h.Stat.Return)
	} else {
		logp.Debug("prospector", "Not harvesting, file didn't change: %s", h.Path)
	}
}
Example #2
0
// checkExistingFile checks if a harvester has to be started for a already known file
// For existing files the following options exist:
// * Last reading position is 0, no harvester has to be started as old harvester probably still busy
// * The old known modification time is older then the current one. Start at last known position
// * The new file is not the same as the old file, means file was renamed
// ** New file is actually really a new file, start a new harvester
// ** Renamed file has a state, continue there
func (p *Prospector) checkExistingFile(newinfo *harvester.FileStat, newFile *input.File, oldFile *input.File, file string, output chan *input.FileEvent, oldState oldState) {

	logp.Debug("prospector", "Update existing file for harvesting: %s", file)

	h, err := harvester.NewHarvester(
		p.ProspectorConfig, &p.ProspectorConfig.Harvester,
		file, newinfo, output)
	if err != nil {
		logp.Err("Error initializing harvester: %v", err)
		return
	}

	if !oldFile.IsSameFile(newFile) {

		if previousFile, err := p.getPreviousFile(file, newinfo.Fileinfo); err == nil {
			// This file was renamed from another file we know - link the same harvester channel as the old file
			logp.Debug("prospector", "File rename was detected, existing file: %s -> %s", previousFile, file)
			logp.Debug("prospector", "Launching harvester on renamed file: %s", file)

			h.SetOffset(oldState.offset)
			h.SetPath(file)

			p.registrar.Persist <- h.GetState()
		} else {
			// File is not the same file we saw previously, it must have rotated and is a new file
			logp.Debug("prospector", "Launching harvester on new file: %s. Old file was probably rotated", file)

			// Forget about the previous harvester and let it continue on the old file - so start a new channel to use with the new harvester
			newinfo.Ignore()

			// Start a new harvester on the path
			h.Start()
			p.registrar.Persist <- h.GetState()
		}

		// Keep the old file in missingFiles so we don't rescan it if it was renamed and we've not yet reached the new filename
		// We only need to keep it for the remainder of this iteration then we can assume it was deleted and forget about it
		p.missingFiles[file] = oldFile.FileInfo

	} else if newinfo.Finished() && oldFile.FileInfo.ModTime() != newinfo.Fileinfo.ModTime() {
		// Resume harvesting of an old file we've stopped harvesting from
		logp.Debug("prospector", "Resuming harvester on an old file that was just modified: %s", file)

		// Start a harvester on the path; an old file was just modified and it doesn't have a harvester
		// The offset to continue from will be stored in the harvester channel - so take that to use and also clear the channel
		h.SetOffset(<-newinfo.Return)
		h.Start()
		p.registrar.Persist <- h.GetState()
	} else {
		logp.Debug("prospector", "Not harvesting, file didn't change: %s", file)
	}
}