func mergeSegment(newDB, db *gumshoe.DB, segment *timestampSegment) error { // NOTE(caleb): Have to do more nasty float conversion in this function. See NOTE(caleb) in migrate.go. at := float64(segment.at.Unix()) rows := make([]gumshoe.UnpackedRow, 0, len(segment.Bytes)/db.RowSize) for i := 0; i < len(segment.Bytes); i += db.RowSize { row := gumshoe.RowBytes(segment.Bytes[i : i+db.RowSize]) unpacked := db.DeserializeRow(row) unpacked.RowMap[db.TimestampColumn.Name] = at for _, dim := range db.Schema.DimensionColumns { if dim.String { continue } value := unpacked.RowMap[dim.Name] if value == nil { continue } convertValueToFloat64(unpacked.RowMap, dim.Name) } for _, dim := range db.Schema.MetricColumns { convertValueToFloat64(unpacked.RowMap, dim.Name) } rows = append(rows, unpacked) } return newDB.InsertUnpacked(rows) }
func getColumnExtrema(db *gumshoe.DB, parallelism int) (mins, maxes []gumshoe.Untyped) { resp := db.MakeRequest() defer resp.Done() numColumns := len(db.DimensionColumns) + len(db.MetricColumns) allSegments := findSegments(resp.StaticTable) progress := NewProgress("segments processed", len(allSegments)) progress.Print() segments := make(chan *timestampSegment) partialResults := make([]minsMaxes, parallelism) var wg sync.WaitGroup wg.Add(parallelism) for i := 0; i < parallelism; i++ { i := i go func() { defer wg.Done() partial := minsMaxes{ Mins: make([]gumshoe.Untyped, numColumns), Maxes: make([]gumshoe.Untyped, numColumns), } for segment := range segments { for j := 0; j < len(segment.Bytes); j += db.RowSize { dimensions := gumshoe.DimensionBytes(segment.Bytes[j+db.DimensionStartOffset : j+db.MetricStartOffset]) for k, col := range db.DimensionColumns { if dimensions.IsNil(k) { continue } value := gumshoe.NumericCellValue(unsafe.Pointer(&dimensions[db.DimensionOffsets[k]]), col.Type) partial.update(value, k) } metrics := gumshoe.MetricBytes(segment.Bytes[j+db.MetricStartOffset : j+db.RowSize]) for k, col := range db.MetricColumns { value := gumshoe.NumericCellValue(unsafe.Pointer(&metrics[db.MetricOffsets[k]]), col.Type) partial.update(value, k+len(db.DimensionColumns)) } } progress.Add(1) } partialResults[i] = partial }() } for _, segment := range allSegments { segments <- segment } close(segments) wg.Wait() return combinePartialStats(partialResults) }
func migrateSegment(newDB, oldDB *gumshoe.DB, segment *timestampSegment, convert func(gumshoe.UnpackedRow)) error { at := uint32(segment.at.Unix()) rows := make([]gumshoe.UnpackedRow, 0, len(segment.Bytes)/oldDB.RowSize) for i := 0; i < len(segment.Bytes); i += oldDB.RowSize { row := gumshoe.RowBytes(segment.Bytes[i : i+oldDB.RowSize]) unpacked := oldDB.DeserializeRow(row) // Attach a timestamp unpacked.RowMap[oldDB.TimestampColumn.Name] = at convert(unpacked) rows = append(rows, unpacked) } return newDB.InsertUnpacked(rows) }
func mergeDB(newDB, db *gumshoe.DB, parallelism, flushSegments int) error { resp := db.MakeRequest() defer resp.Done() allSegments := findSegments(resp.StaticTable) progress := NewProgress("segments processed", len(allSegments)) progress.Print() segments := make(chan *timestampSegment) var wg wait.Group for i := 0; i < parallelism; i++ { wg.Go(func(quit <-chan struct{}) error { for { select { case <-quit: return nil case segment, ok := <-segments: if !ok { return nil } if err := mergeSegment(newDB, db, segment); err != nil { return err } progress.Add(1) } } }) } wg.Go(func(quit <-chan struct{}) error { flushSegmentCount := 0 for _, segment := range allSegments { select { case <-quit: return nil default: select { case <-quit: return nil case segments <- segment: flushSegmentCount++ if flushSegmentCount == flushSegments { flushSegmentCount = 0 if err := newDB.Flush(); err != nil { return err } } } } } close(segments) return nil }) err := wg.Wait() if err != nil { return err } return newDB.Flush() }
func migrateDBs(newDB, oldDB *gumshoe.DB, parallelism, flushSegments int) error { resp := oldDB.MakeRequest() defer resp.Done() allSegments := findSegments(resp.StaticTable) progress := NewProgress("segments processed", len(allSegments)) progress.Print() segments := make(chan *timestampSegment) shutdown := make(chan struct{}) var workerErr error errc := make(chan error) var wg sync.WaitGroup wg.Add(parallelism) conversionFunc, err := makeConversionFunc(newDB, oldDB) if err != nil { return err } for i := 0; i < parallelism; i++ { go func() { defer wg.Done() for { select { case segment := <-segments: if err := migrateSegment(newDB, oldDB, segment, conversionFunc); err != nil { select { case errc <- err: case <-shutdown: } return } progress.Add(1) case <-shutdown: return } } }() } flushSegmentCount := 0 outer: for _, segment := range allSegments { select { case err := <-errc: workerErr = err break outer default: select { case segments <- segment: flushSegmentCount++ if flushSegmentCount == flushSegments { flushSegmentCount = 0 if err := newDB.Flush(); err != nil { workerErr = err break outer } } case err := <-errc: workerErr = err break outer } } } close(shutdown) wg.Wait() if workerErr != nil { return workerErr } return newDB.Flush() }