Esempio n. 1
0
func mergeSegment(newDB, db *gumshoe.DB, segment *timestampSegment) error {
	// NOTE(caleb): Have to do more nasty float conversion in this function. See NOTE(caleb) in migrate.go.
	at := float64(segment.at.Unix())
	rows := make([]gumshoe.UnpackedRow, 0, len(segment.Bytes)/db.RowSize)
	for i := 0; i < len(segment.Bytes); i += db.RowSize {
		row := gumshoe.RowBytes(segment.Bytes[i : i+db.RowSize])
		unpacked := db.DeserializeRow(row)
		unpacked.RowMap[db.TimestampColumn.Name] = at
		for _, dim := range db.Schema.DimensionColumns {
			if dim.String {
				continue
			}
			value := unpacked.RowMap[dim.Name]
			if value == nil {
				continue
			}
			convertValueToFloat64(unpacked.RowMap, dim.Name)
		}
		for _, dim := range db.Schema.MetricColumns {
			convertValueToFloat64(unpacked.RowMap, dim.Name)
		}
		rows = append(rows, unpacked)
	}
	return newDB.InsertUnpacked(rows)
}
Esempio n. 2
0
func getColumnExtrema(db *gumshoe.DB, parallelism int) (mins, maxes []gumshoe.Untyped) {
	resp := db.MakeRequest()
	defer resp.Done()

	numColumns := len(db.DimensionColumns) + len(db.MetricColumns)
	allSegments := findSegments(resp.StaticTable)
	progress := NewProgress("segments processed", len(allSegments))
	progress.Print()
	segments := make(chan *timestampSegment)
	partialResults := make([]minsMaxes, parallelism)

	var wg sync.WaitGroup
	wg.Add(parallelism)
	for i := 0; i < parallelism; i++ {
		i := i
		go func() {
			defer wg.Done()

			partial := minsMaxes{
				Mins:  make([]gumshoe.Untyped, numColumns),
				Maxes: make([]gumshoe.Untyped, numColumns),
			}

			for segment := range segments {
				for j := 0; j < len(segment.Bytes); j += db.RowSize {
					dimensions := gumshoe.DimensionBytes(segment.Bytes[j+db.DimensionStartOffset : j+db.MetricStartOffset])
					for k, col := range db.DimensionColumns {
						if dimensions.IsNil(k) {
							continue
						}
						value := gumshoe.NumericCellValue(unsafe.Pointer(&dimensions[db.DimensionOffsets[k]]), col.Type)
						partial.update(value, k)
					}
					metrics := gumshoe.MetricBytes(segment.Bytes[j+db.MetricStartOffset : j+db.RowSize])
					for k, col := range db.MetricColumns {
						value := gumshoe.NumericCellValue(unsafe.Pointer(&metrics[db.MetricOffsets[k]]), col.Type)
						partial.update(value, k+len(db.DimensionColumns))
					}
				}
				progress.Add(1)
			}
			partialResults[i] = partial
		}()
	}

	for _, segment := range allSegments {
		segments <- segment
	}
	close(segments)
	wg.Wait()

	return combinePartialStats(partialResults)
}
Esempio n. 3
0
func migrateSegment(newDB, oldDB *gumshoe.DB, segment *timestampSegment,
	convert func(gumshoe.UnpackedRow)) error {

	at := uint32(segment.at.Unix())
	rows := make([]gumshoe.UnpackedRow, 0, len(segment.Bytes)/oldDB.RowSize)
	for i := 0; i < len(segment.Bytes); i += oldDB.RowSize {
		row := gumshoe.RowBytes(segment.Bytes[i : i+oldDB.RowSize])
		unpacked := oldDB.DeserializeRow(row)
		// Attach a timestamp
		unpacked.RowMap[oldDB.TimestampColumn.Name] = at
		convert(unpacked)
		rows = append(rows, unpacked)
	}
	return newDB.InsertUnpacked(rows)
}
Esempio n. 4
0
func mergeDB(newDB, db *gumshoe.DB, parallelism, flushSegments int) error {
	resp := db.MakeRequest()
	defer resp.Done()

	allSegments := findSegments(resp.StaticTable)
	progress := NewProgress("segments processed", len(allSegments))
	progress.Print()
	segments := make(chan *timestampSegment)
	var wg wait.Group
	for i := 0; i < parallelism; i++ {
		wg.Go(func(quit <-chan struct{}) error {
			for {
				select {
				case <-quit:
					return nil
				case segment, ok := <-segments:
					if !ok {
						return nil
					}
					if err := mergeSegment(newDB, db, segment); err != nil {
						return err
					}
					progress.Add(1)
				}
			}
		})
	}

	wg.Go(func(quit <-chan struct{}) error {
		flushSegmentCount := 0
		for _, segment := range allSegments {
			select {
			case <-quit:
				return nil
			default:
				select {
				case <-quit:
					return nil
				case segments <- segment:
					flushSegmentCount++
					if flushSegmentCount == flushSegments {
						flushSegmentCount = 0
						if err := newDB.Flush(); err != nil {
							return err
						}
					}
				}
			}
		}
		close(segments)
		return nil
	})

	err := wg.Wait()
	if err != nil {
		return err
	}
	return newDB.Flush()
}
Esempio n. 5
0
func migrateDBs(newDB, oldDB *gumshoe.DB, parallelism, flushSegments int) error {
	resp := oldDB.MakeRequest()
	defer resp.Done()

	allSegments := findSegments(resp.StaticTable)
	progress := NewProgress("segments processed", len(allSegments))
	progress.Print()
	segments := make(chan *timestampSegment)
	shutdown := make(chan struct{})
	var workerErr error
	errc := make(chan error)
	var wg sync.WaitGroup
	wg.Add(parallelism)
	conversionFunc, err := makeConversionFunc(newDB, oldDB)
	if err != nil {
		return err
	}

	for i := 0; i < parallelism; i++ {
		go func() {
			defer wg.Done()
			for {
				select {
				case segment := <-segments:
					if err := migrateSegment(newDB, oldDB, segment, conversionFunc); err != nil {
						select {
						case errc <- err:
						case <-shutdown:
						}
						return
					}
					progress.Add(1)
				case <-shutdown:
					return
				}
			}
		}()
	}

	flushSegmentCount := 0

outer:
	for _, segment := range allSegments {
		select {
		case err := <-errc:
			workerErr = err
			break outer
		default:
			select {
			case segments <- segment:
				flushSegmentCount++
				if flushSegmentCount == flushSegments {
					flushSegmentCount = 0
					if err := newDB.Flush(); err != nil {
						workerErr = err
						break outer
					}
				}
			case err := <-errc:
				workerErr = err
				break outer
			}
		}
	}

	close(shutdown)
	wg.Wait()
	if workerErr != nil {
		return workerErr
	}
	return newDB.Flush()
}