Esempio n. 1
0
func (p *Prospector) stdinRun(spoolChan chan *input.FileEvent) {
	h, err := harvester.NewHarvester(
		p.ProspectorConfig,
		&p.ProspectorConfig.Harvester,
		"-",
		nil,
		spoolChan,
	)

	if err != nil {
		logp.Err("Error initializing stdin harvester: %v", err)
		return
	}

	// This signals we finished considering the previous state
	event := &input.FileState{
		Source: nil,
	}
	p.registrar.Persist <- event

	h.Start()

	for {
		if !p.running {
			break
		}
		// Wait time during endless loop
		oneSecond, _ := time.ParseDuration("1s")
		time.Sleep(oneSecond)
	}
}
Esempio n. 2
0
func (p *Prospector) createHarvester(file string, stat *input.FileStat) (*harvester.Harvester, error) {

	h, err := harvester.NewHarvester(
		&p.ProspectorConfig.Harvester, file, stat, p.harvesterChan)

	return h, err
}
Esempio n. 3
0
func (p *Prospector) AddHarvester(file string, stat *harvester.FileStat) (*harvester.Harvester, error) {

	h, err := harvester.NewHarvester(
		&p.ProspectorConfig.Harvester, file, stat, p.channel)

	return h, err
}
Esempio n. 4
0
// Check if harvester for new file has to be started
// For a new file the following options exist:
func (p *Prospector) checkNewFile(newinfo *harvester.FileStat, file string, output chan *input.FileEvent) {

	logp.Debug("prospector", "Start harvesting unknown file: %s", file)

	// Init harvester with info
	h, err := harvester.NewHarvester(
		p.ProspectorConfig, &p.ProspectorConfig.Harvester, file, newinfo, output)
	if err != nil {
		logp.Err("Error initializing harvester: %v", err)
		return
	}

	// Check for unmodified time, but only if the file modification time is before the last scan started
	// This ensures we don't skip genuine creations with dead times less than 10s
	if newinfo.Fileinfo.ModTime().Before(p.lastscan) &&
		time.Since(newinfo.Fileinfo.ModTime()) > p.ProspectorConfig.IgnoreOlderDuration {

		logp.Debug("prospector", "Fetching old state of file to resume: %s", file)
		// Call crawler if there if there exists a state for the given file
		offset, resuming := p.registrar.fetchState(file, newinfo.Fileinfo)

		// Are we resuming a dead file? We have to resume even if dead so we catch any old updates to the file
		// This is safe as the harvester, once it hits the EOF and a timeout, will stop harvesting
		// Once we detect changes again we can resume another harvester again - this keeps number of go routines to a minimum
		if resuming {
			logp.Debug("prospector", "Resuming harvester on a previously harvested file: %s", file)

			h.Offset = offset
			h.Start()
		} else {
			// Old file, skip it, but push offset of file size so we start from the end if this file changes and needs picking up
			logp.Debug("prospector", "Skipping file (older than ignore older of %v, %v): %s",
				p.ProspectorConfig.IgnoreOlderDuration,
				time.Since(newinfo.Fileinfo.ModTime()),
				file)
			newinfo.Skip(newinfo.Fileinfo.Size())
		}
	} else if previousFile, err := p.getPreviousFile(file, newinfo.Fileinfo); err == nil {
		// This file was simply renamed (known inode+dev) - link the same harvester channel as the old file
		logp.Debug("prospector", "File rename was detected: %s -> %s", previousFile, file)
		lastinfo := p.prospectorList[previousFile]
		newinfo.Continue(&lastinfo)
	} else {

		// Call crawler if there if there exists a state for the given file
		offset, resuming := p.registrar.fetchState(file, newinfo.Fileinfo)

		// Are we resuming a file or is this a completely new file?
		if resuming {
			logp.Debug("prospector", "Resuming harvester on a previously harvested file: %s", file)
		} else {
			logp.Debug("prospector", "Launching harvester on new file: %s", file)
		}

		// Launch the harvester
		h.Offset = offset
		h.Start()
	}
}
Esempio n. 5
0
// createHarvester creates a new harvester instance from the given state
func (p *Prospector) createHarvester(state file.State) (*harvester.Harvester, error) {

	h, err := harvester.NewHarvester(
		p.cfg,
		state,
		p.harvesterChan,
		p.done,
	)

	return h, err
}
Esempio n. 6
0
// createHarvester creates a new harvester instance from the given state
func (p *Prospector) createHarvester(state input.FileState) (*harvester.Harvester, error) {

	h, err := harvester.NewHarvester(
		&p.ProspectorConfig.Harvester,
		state.Source,
		state,
		p.harvesterChan,
		state.Offset,
	)

	return h, err
}
Esempio n. 7
0
// checkExistingFile checks if a harvester has to be started for a already known file
// For existing files the following options exist:
// * Last reading position is 0, no harvester has to be started as old harvester probably still busy
// * The old known modification time is older then the current one. Start at last known position
// * The new file is not the same as the old file, means file was renamed
// ** New file is actually really a new file, start a new harvester
// ** Renamed file has a state, continue there
func (p *Prospector) checkExistingFile(newinfo *harvester.FileStat, newFile *input.File, oldFile *input.File, file string, output chan *input.FileEvent, oldState oldState) {

	logp.Debug("prospector", "Update existing file for harvesting: %s", file)

	h, err := harvester.NewHarvester(
		p.ProspectorConfig, &p.ProspectorConfig.Harvester,
		file, newinfo, output)
	if err != nil {
		logp.Err("Error initializing harvester: %v", err)
		return
	}

	if !oldFile.IsSameFile(newFile) {

		if previousFile, err := p.getPreviousFile(file, newinfo.Fileinfo); err == nil {
			// This file was renamed from another file we know - link the same harvester channel as the old file
			logp.Debug("prospector", "File rename was detected, existing file: %s -> %s", previousFile, file)
			logp.Debug("prospector", "Launching harvester on renamed file: %s", file)

			h.SetOffset(oldState.offset)
			h.SetPath(file)

			p.registrar.Persist <- h.GetState()
		} else {
			// File is not the same file we saw previously, it must have rotated and is a new file
			logp.Debug("prospector", "Launching harvester on new file: %s. Old file was probably rotated", file)

			// Forget about the previous harvester and let it continue on the old file - so start a new channel to use with the new harvester
			newinfo.Ignore()

			// Start a new harvester on the path
			h.Start()
			p.registrar.Persist <- h.GetState()
		}

		// Keep the old file in missingFiles so we don't rescan it if it was renamed and we've not yet reached the new filename
		// We only need to keep it for the remainder of this iteration then we can assume it was deleted and forget about it
		p.missingFiles[file] = oldFile.FileInfo

	} else if newinfo.Finished() && oldFile.FileInfo.ModTime() != newinfo.Fileinfo.ModTime() {
		// Resume harvesting of an old file we've stopped harvesting from
		logp.Debug("prospector", "Resuming harvester on an old file that was just modified: %s", file)

		// Start a harvester on the path; an old file was just modified and it doesn't have a harvester
		// The offset to continue from will be stored in the harvester channel - so take that to use and also clear the channel
		h.SetOffset(<-newinfo.Return)
		h.Start()
		p.registrar.Persist <- h.GetState()
	} else {
		logp.Debug("prospector", "Not harvesting, file didn't change: %s", file)
	}
}
Esempio n. 8
0
func NewProspectorStdin(config cfg.ProspectorConfig, channel chan *input.FileEvent) (*ProspectorStdin, error) {

	prospectorer := &ProspectorStdin{}

	var err error
	prospectorer.harvester, err = harvester.NewHarvester(
		config,
		&config.Harvester,
		"-",
		nil,
		channel,
	)

	if err != nil {
		return nil, fmt.Errorf("Error initializing stdin harvester: %v", err)
	}

	return prospectorer, nil
}
Esempio n. 9
0
// Scans the specific path which can be a glob (/**/**/*.log)
// For all found files it is checked if a harvester should be started
func (p ProspectorLog) scanGlob(glob string) {

	logp.Debug("prospector", "scan path %s", glob)

	// Evaluate the path as a wildcards/shell glob
	matches, err := filepath.Glob(glob)
	if err != nil {
		logp.Debug("prospector", "glob(%s) failed: %v", glob, err)
		return
	}

	p.missingFiles = map[string]os.FileInfo{}

	// Check any matched files to see if we need to start a harvester
	for _, file := range matches {
		logp.Debug("prospector", "Check file for harvesting: %s", file)

		// check if the file is in the exclude_files list
		if p.isFileExcluded(file) {
			logp.Debug("prospector", "Exclude file: %s", file)
			continue
		}

		// Stat the file, following any symlinks.
		fileinfo, err := os.Stat(file)
		if err != nil {
			logp.Debug("prospector", "stat(%s) failed: %s", file, err)
			continue
		}

		newFile := input.NewFile(fileinfo)

		if newFile.FileInfo.IsDir() {
			logp.Debug("prospector", "Skipping directory: %s", file)
			continue
		}

		// Check the current info against p.prospectorinfo[file]
		lastinfo, isKnown := p.prospectorList[file]

		oldFile := input.NewFile(lastinfo.Fileinfo)

		// Create a new prospector info with the stat info for comparison
		newInfo := harvester.NewFileStat(newFile.FileInfo, p.iteration)

		// Init harvester with info
		h, err := harvester.NewHarvester(
			p.config, &p.config.Harvester, file, newInfo, p.channel)
		if err != nil {
			logp.Err("Error initializing harvester: %v", err)
			continue
		}

		// Conditions for starting a new harvester:
		// - file path hasn't been seen before
		// - the file's inode or device changed
		if !isKnown {
			p.checkNewFile(h)
		} else {
			h.Stat.Continue(&lastinfo)
			p.checkExistingFile(h, &newFile, &oldFile)
		}

		// Track the stat data for this file for later comparison to check for
		// rotation/etc
		p.prospectorList[h.Path] = *h.Stat
	}
}