func mergeDB(newDB, db *gumshoe.DB, parallelism, flushSegments int) error { resp := db.MakeRequest() defer resp.Done() allSegments := findSegments(resp.StaticTable) progress := NewProgress("segments processed", len(allSegments)) progress.Print() segments := make(chan *timestampSegment) var wg wait.Group for i := 0; i < parallelism; i++ { wg.Go(func(quit <-chan struct{}) error { for { select { case <-quit: return nil case segment, ok := <-segments: if !ok { return nil } if err := mergeSegment(newDB, db, segment); err != nil { return err } progress.Add(1) } } }) } wg.Go(func(quit <-chan struct{}) error { flushSegmentCount := 0 for _, segment := range allSegments { select { case <-quit: return nil default: select { case <-quit: return nil case segments <- segment: flushSegmentCount++ if flushSegmentCount == flushSegments { flushSegmentCount = 0 if err := newDB.Flush(); err != nil { return err } } } } } close(segments) return nil }) err := wg.Wait() if err != nil { return err } return newDB.Flush() }
func getColumnExtrema(db *gumshoe.DB, parallelism int) (mins, maxes []gumshoe.Untyped) { resp := db.MakeRequest() defer resp.Done() numColumns := len(db.DimensionColumns) + len(db.MetricColumns) allSegments := findSegments(resp.StaticTable) progress := NewProgress("segments processed", len(allSegments)) progress.Print() segments := make(chan *timestampSegment) partialResults := make([]minsMaxes, parallelism) var wg sync.WaitGroup wg.Add(parallelism) for i := 0; i < parallelism; i++ { i := i go func() { defer wg.Done() partial := minsMaxes{ Mins: make([]gumshoe.Untyped, numColumns), Maxes: make([]gumshoe.Untyped, numColumns), } for segment := range segments { for j := 0; j < len(segment.Bytes); j += db.RowSize { dimensions := gumshoe.DimensionBytes(segment.Bytes[j+db.DimensionStartOffset : j+db.MetricStartOffset]) for k, col := range db.DimensionColumns { if dimensions.IsNil(k) { continue } value := gumshoe.NumericCellValue(unsafe.Pointer(&dimensions[db.DimensionOffsets[k]]), col.Type) partial.update(value, k) } metrics := gumshoe.MetricBytes(segment.Bytes[j+db.MetricStartOffset : j+db.RowSize]) for k, col := range db.MetricColumns { value := gumshoe.NumericCellValue(unsafe.Pointer(&metrics[db.MetricOffsets[k]]), col.Type) partial.update(value, k+len(db.DimensionColumns)) } } progress.Add(1) } partialResults[i] = partial }() } for _, segment := range allSegments { segments <- segment } close(segments) wg.Wait() return combinePartialStats(partialResults) }
func migrateDBs(newDB, oldDB *gumshoe.DB, parallelism, flushSegments int) error { resp := oldDB.MakeRequest() defer resp.Done() allSegments := findSegments(resp.StaticTable) progress := NewProgress("segments processed", len(allSegments)) progress.Print() segments := make(chan *timestampSegment) shutdown := make(chan struct{}) var workerErr error errc := make(chan error) var wg sync.WaitGroup wg.Add(parallelism) conversionFunc, err := makeConversionFunc(newDB, oldDB) if err != nil { return err } for i := 0; i < parallelism; i++ { go func() { defer wg.Done() for { select { case segment := <-segments: if err := migrateSegment(newDB, oldDB, segment, conversionFunc); err != nil { select { case errc <- err: case <-shutdown: } return } progress.Add(1) case <-shutdown: return } } }() } flushSegmentCount := 0 outer: for _, segment := range allSegments { select { case err := <-errc: workerErr = err break outer default: select { case segments <- segment: flushSegmentCount++ if flushSegmentCount == flushSegments { flushSegmentCount = 0 if err := newDB.Flush(); err != nil { workerErr = err break outer } } case err := <-errc: workerErr = err break outer } } } close(shutdown) wg.Wait() if workerErr != nil { return workerErr } return newDB.Flush() }