示例#1
0
文件: index.go 项目: cardi/timefind
func subIndex(cfg *config.Configuration,
	subDir string) (*Index, error) {
	// cfg - Configuration file
	// subDir - What subDirectory we're on in our indexing.

	filename := filepath.Join(cfg.IndexDir, subDir, cfg.Name+".csv")

	idx := &Index{
		Filename: filename,
		Config:   cfg,
		subDir:   subDir,
		entries:  map[string]Entry{},
		Period:   tf_time.Times{},
		Modified: time.Time{},
	}

	// Open the index file for reading.
	f, err := os.Open(filename)
	if err != nil {
		return idx, nil
	}
	defer f.Close()

	// Make sure a reasonable processor exists
	if _, ok := processor.Processors[cfg.Type]; ok != true {
		return nil, errors.New("Configuration specified unknown data type.")
	}

	idxStat, err := os.Stat(filename)
	idx.Modified = idxStat.ModTime()

	cr := csv.NewReader(f)
	idx.Filename = filename

	// Read in all the existing entries
	for {
		recs, err := cr.Read()
		switch err {
		case nil:
		case io.EOF:
			return idx, nil
		default:
			return nil, err
		}
		if len(recs) < 3 {
			return nil, fmt.Errorf("Bad formatting in index %s", filename)
		}

		entry := Entry{}
		entry.Path = recs[0]

		if entry.Period.Earliest, err = tf_time.UnmarshalTime([]byte(recs[1])); err != nil {
			return nil, err
		}

		if entry.Period.Latest, err = tf_time.UnmarshalTime([]byte(recs[2])); err != nil {
			return nil, err
		}

		// The old format didn't include modification times
		if len(recs) == 4 {
			if entry.Modified, err = tf_time.UnmarshalTime([]byte(recs[3])); err != nil {
				return nil, err
			}
		}

		if idx.Period.Earliest.IsZero() ||
			entry.Period.Earliest.Before(idx.Period.Earliest) {
			idx.Period.Earliest = entry.Period.Earliest
		}
		if idx.Period.Latest.IsZero() ||
			entry.Period.Latest.After(idx.Period.Latest) {
			idx.Period.Latest = entry.Period.Latest
		}

		// If the file path isn't absolute, this should be a subdirectory.
		if filepath.IsAbs(entry.Path) == false {
			subDir := filepath.Join(idx.subDir, entry.Path)
			subidx_path := filepath.Join(cfg.IndexDir, subDir)

			// Make sure the index subdirectory exists and is a directory.
			info, err := os.Stat(subidx_path)
			if (err == nil || os.IsExist(err)) && info.IsDir() {
				subidx, err := subIndex(cfg, subDir)
				if err != nil {
					log.Print("Could not read index from subdirectory: ", subDir)
				}
				entry.subIndex = subidx
			}
		}

		vlog("idx - ", entry.Period)
		idx.entries[recs[0]] = entry
	}

	return idx, nil
}
示例#2
0
func process_fsdb(filename string, col int) (times tf_time.Times, err error) {

	var reader io.Reader

	f, err := os.Open(filename)
	if err != nil {
		return times, err
	}
	defer f.Close()

	if strings.Contains(filename, ".gz") {
		// handle gzip
		gf, err := gzip.NewReader(f)
		if err != nil {
			f.Seek(0, 0)
			reader = f
		} else {
			reader = gf
			defer gf.Close()
		}
	} else if strings.Contains(filename, ".xz") {
		// handle xz
		xf, err := xz.NewReader(f, 0)
		if err != nil {
			log.Printf("error reading .xz file = %s, skipping...\n", err)
			return times, err
		} else {
			reader = xf
			// XXX xz has no xz.Close()
		}
	} else {
		// just a plain .fsdb file
		reader = f
	}

	// now process files
	scanner := bufio.NewScanner(reader)
	for scanner.Scan() {
		// read line
		line := scanner.Text()

		if strings.HasPrefix(line, "#") {
			// if a comment or header, continue
			continue
		}

		// only want the column # "col"
		ts := strings.SplitN(line, "\t", col+1)[col-1]

		// convert unixtimestamp into golang time
		// accepts both second and nanosecond precision
		tm, err := tf_time.UnmarshalTime([]byte(ts))
		if err != nil {
			return times, err
		}

		if times.Earliest.IsZero() || tm.Before(times.Earliest) {
			times.Earliest = tm
		}
		if times.Latest.IsZero() || tm.After(times.Latest) {
			times.Latest = tm
		}
	}

	return times, err
}