func whisperWorker() { for path := range whisperFiles { fd, err := os.Open(path) if err != nil { fmt.Fprintf(os.Stderr, "ERROR: Failed to open whisper file '%s': %s\n", path, err.Error()) if !skipWhisperErrors { return } } w, err := whisper.OpenWhisper(fd) if err != nil { fmt.Fprintf(os.Stderr, "ERROR: Failed to open whisper file '%s': %s\n", path, err.Error()) if !skipWhisperErrors { return } } var earliestArchiveTimestamp uint32 numTotalPoints := uint32(0) for i := range w.Header.Archives { numTotalPoints += w.Header.Archives[i].Points } points := make(map[uint32]whisper.Point) // iterate over archives from high-res to low-res, remembering // the earliest timestamp and filtering out those rows from // subsequent archives so as to never have duplicates. for i, archive := range w.Header.Archives { allPoints, err := w.DumpArchive(i) if err != nil { fmt.Fprintf(os.Stderr, "ERROR: Failed to read archive %d in '%s', skipping: %s\n", i, path, err.Error()) if !skipWhisperErrors { return } } var earliestTimestamp uint32 = 0 var latestTimestamp uint32 = 0 n := 0 for _, point := range allPoints { // we have to filter out the "None" records (where we didn't fill in data) explicitly here! if point.Timestamp != 0 { if earliestArchiveTimestamp == 0 || point.Timestamp < earliestArchiveTimestamp { point.Value = point.Value points[point.Timestamp] = point n += 1 if earliestTimestamp > point.Timestamp || earliestTimestamp == 0 { earliestTimestamp = point.Timestamp } if latestTimestamp < point.Timestamp || latestTimestamp == 0 { latestTimestamp = point.Timestamp } } } } // And now we need to remove all points that are latest - retention if earliestTimestamp < latestTimestamp-archive.SecondsPerPoint*archive.Points { earliestArchiveTimestamp = latestTimestamp - archive.SecondsPerPoint*archive.Points } else { earliestArchiveTimestamp = earliestTimestamp } n = 0 for ts, _ := range points { if ts < earliestArchiveTimestamp { delete(points, ts) n += 1 } } } w.Close() serie := &abstractSerie{path, points} series <- serie } whisperWorkersWg.Done() }
func whisperWorker() { for path := range whisperFiles { fd, err := os.Open(path) if err != nil { fmt.Fprintf(os.Stderr, "ERROR: Failed to open whisper file '%s': %s\n", path, err.Error()) if skipWhisperErrors { continue } else { exit <- 2 } } w, err := whisper.OpenWhisper(fd) if err != nil { fmt.Fprintf(os.Stderr, "ERROR: Failed to open whisper file '%s': %s\n", path, err.Error()) if skipWhisperErrors { continue } else { exit <- 2 } } pre := time.Now() var duration time.Duration var points []whisper.Point if all { numTotalPoints := uint32(0) for i := range w.Header.Archives { numTotalPoints += w.Header.Archives[i].Points } points = make([]whisper.Point, 0, numTotalPoints) // iterate in backwards archive order (low res to high res) // so that if you write points of multiple archives to the same series, the high res ones will overwrite the low res ones for i := len(w.Header.Archives) - 1; i >= 0; i-- { allPoints, err := w.DumpArchive(i) if err != nil { fmt.Fprintf(os.Stderr, "ERROR: Failed to read archive %d in '%s', skipping: %s\n", i, path, err.Error()) if skipWhisperErrors { continue } else { exit <- 2 } } for _, point := range allPoints { // we have to filter out the "None" records (where we didn't fill in data) explicitly here! if point.Timestamp != 0 { points = append(points, point) } } } duration = time.Since(pre) } else { // not sure how it works, but i've emperically verified that this ignores null records, which is what we want // i.e. if whisper has a slot every minute, but you only have data every 3 minutes, we'll only process those records _, points, err = w.FetchUntil(fromTime, untilTime) duration = time.Since(pre) if err != nil { fmt.Fprintf(os.Stderr, "ERROR: Failed to read file '%s' from %d to %d, skipping: %s (operation took %v)\n", path, fromTime, untilTime, err.Error(), duration) if skipWhisperErrors { w.Close() continue } else { exit <- 2 } } } w.Close() whisperReadTimer.Update(duration) serie := &abstractSerie{path, points} influxSeries <- serie } whisperWorkersWg.Done() }