func subIndex(cfg *config.Configuration, subDir string) (*Index, error) { // cfg - Configuration file // subDir - What subDirectory we're on in our indexing. filename := filepath.Join(cfg.IndexDir, subDir, cfg.Name+".csv") idx := &Index{ Filename: filename, Config: cfg, subDir: subDir, entries: map[string]Entry{}, Period: tf_time.Times{}, Modified: time.Time{}, } // Open the index file for reading. f, err := os.Open(filename) if err != nil { return idx, nil } defer f.Close() // Make sure a reasonable processor exists if _, ok := processor.Processors[cfg.Type]; ok != true { return nil, errors.New("Configuration specified unknown data type.") } idxStat, err := os.Stat(filename) idx.Modified = idxStat.ModTime() cr := csv.NewReader(f) idx.Filename = filename // Read in all the existing entries for { recs, err := cr.Read() switch err { case nil: case io.EOF: return idx, nil default: return nil, err } if len(recs) < 3 { return nil, fmt.Errorf("Bad formatting in index %s", filename) } entry := Entry{} entry.Path = recs[0] if entry.Period.Earliest, err = tf_time.UnmarshalTime([]byte(recs[1])); err != nil { return nil, err } if entry.Period.Latest, err = tf_time.UnmarshalTime([]byte(recs[2])); err != nil { return nil, err } // The old format didn't include modification times if len(recs) == 4 { if entry.Modified, err = tf_time.UnmarshalTime([]byte(recs[3])); err != nil { return nil, err } } if idx.Period.Earliest.IsZero() || entry.Period.Earliest.Before(idx.Period.Earliest) { idx.Period.Earliest = entry.Period.Earliest } if idx.Period.Latest.IsZero() || entry.Period.Latest.After(idx.Period.Latest) { idx.Period.Latest = entry.Period.Latest } // If the file path isn't absolute, this should be a subdirectory. if filepath.IsAbs(entry.Path) == false { subDir := filepath.Join(idx.subDir, entry.Path) subidx_path := filepath.Join(cfg.IndexDir, subDir) // Make sure the index subdirectory exists and is a directory. info, err := os.Stat(subidx_path) if (err == nil || os.IsExist(err)) && info.IsDir() { subidx, err := subIndex(cfg, subDir) if err != nil { log.Print("Could not read index from subdirectory: ", subDir) } entry.subIndex = subidx } } vlog("idx - ", entry.Period) idx.entries[recs[0]] = entry } return idx, nil }
func process_fsdb(filename string, col int) (times tf_time.Times, err error) { var reader io.Reader f, err := os.Open(filename) if err != nil { return times, err } defer f.Close() if strings.Contains(filename, ".gz") { // handle gzip gf, err := gzip.NewReader(f) if err != nil { f.Seek(0, 0) reader = f } else { reader = gf defer gf.Close() } } else if strings.Contains(filename, ".xz") { // handle xz xf, err := xz.NewReader(f, 0) if err != nil { log.Printf("error reading .xz file = %s, skipping...\n", err) return times, err } else { reader = xf // XXX xz has no xz.Close() } } else { // just a plain .fsdb file reader = f } // now process files scanner := bufio.NewScanner(reader) for scanner.Scan() { // read line line := scanner.Text() if strings.HasPrefix(line, "#") { // if a comment or header, continue continue } // only want the column # "col" ts := strings.SplitN(line, "\t", col+1)[col-1] // convert unixtimestamp into golang time // accepts both second and nanosecond precision tm, err := tf_time.UnmarshalTime([]byte(ts)) if err != nil { return times, err } if times.Earliest.IsZero() || tm.Before(times.Earliest) { times.Earliest = tm } if times.Latest.IsZero() || tm.After(times.Latest) { times.Latest = tm } } return times, err }