Beispiel #1
0
func schemaFixture(testSchema *migrateTestSchema) *gumshoe.Schema {
	atColumn, err := gumshoe.MakeMetricColumn("at", "uint32")
	if err != nil {
		panic(err)
	}
	schema := &gumshoe.Schema{
		TimestampColumn:  gumshoe.Column(atColumn),
		SegmentSize:      100,
		IntervalDuration: time.Hour,
	}
	for _, dimension := range testSchema.Dimensions {
		dimColumn, err := gumshoe.MakeDimensionColumn(dimension.Name, dimension.Type, dimension.String)
		if err != nil {
			panic(err)
		}
		schema.DimensionColumns = append(schema.DimensionColumns, dimColumn)
	}
	for _, metric := range testSchema.Metrics {
		metricColumn, err := gumshoe.MakeMetricColumn(metric.Name, metric.Type)
		if err != nil {
			panic(err)
		}
		schema.MetricColumns = append(schema.MetricColumns, metricColumn)
	}
	return schema
}
Beispiel #2
0
// Produces a gumshoe Schema based on a Config's values.
func (c *Config) makeSchema() (*gumshoe.Schema, error) {
	dir := ""
	diskBacked := true
	switch c.DatabaseDir {
	case "":
		return nil, errors.New("database directory must be provided. Use 'MEMORY' to specify an in-memory DB.")
	case "MEMORY":
		diskBacked = false
	default:
		dir = c.DatabaseDir
	}

	segmentSize, err := humanize.ParseBytes(c.Schema.SegmentSize)
	if err != nil {
		return nil, err
	}

	name, typ, isString := parseColumn(c.Schema.TimestampColumn)
	if typ != "uint32" {
		return nil, fmt.Errorf("timestamp column (%q) must be uint32", name)
	}
	if isString {
		return nil, errors.New("timestamp column cannot be a string")
	}
	timestampColumn, err := gumshoe.MakeDimensionColumn(name, typ, isString)
	if err != nil {
		return nil, err
	}

	dimensions := make([]gumshoe.DimensionColumn, len(c.Schema.DimensionColumns))
	for i, colPair := range c.Schema.DimensionColumns {
		name, typ, isString := parseColumn(colPair)
		if isString {
			switch typ {
			case "uint8", "uint16", "uint32":
			default:
				return nil, fmt.Errorf("got type %q for column %q (must be unsigned int type)", typ, name)
			}
		}
		col, err := gumshoe.MakeDimensionColumn(name, typ, isString)
		if err != nil {
			return nil, err
		}
		dimensions[i] = col
	}

	if len(c.Schema.MetricColumns) == 0 {
		return nil, fmt.Errorf("schema must include at least one metric column")
	}
	metrics := make([]gumshoe.MetricColumn, len(c.Schema.MetricColumns))
	for i, colPair := range c.Schema.MetricColumns {
		name, typ, isString := parseColumn(colPair)
		if isString {
			return nil, fmt.Errorf("metric column (%q) has string type; not allowed for metric columns", name)
		}
		col, err := gumshoe.MakeMetricColumn(name, typ)
		if err != nil {
			return nil, err
		}
		metrics[i] = col
	}

	// Check that we haven't duplicated any column names
	names := map[string]bool{timestampColumn.Name: true}
	for _, col := range dimensions {
		if names[col.Name] {
			return nil, fmt.Errorf("duplicate column name %q", col.Name)
		}
		names[col.Name] = true
	}
	for _, col := range metrics {
		if names[col.Name] {
			return nil, fmt.Errorf("duplicate column name %q", col.Name)
		}
		names[col.Name] = true
	}

	// Sanity checks
	if c.FlushInterval.Duration < time.Second {
		return nil, fmt.Errorf("flush interval is too small: %s", c.FlushInterval)
	}
	if c.QueryParallelism < 1 {
		return nil, fmt.Errorf("bad query parallelism (must be positive): %d", c.QueryParallelism)
	}
	if c.RetentionDays < 1 {
		return nil, fmt.Errorf("retention days is too small: %d", c.RetentionDays)
	}
	if segmentSize < 100 {
		return nil, fmt.Errorf("segment size seems too small: %s", c.Schema.SegmentSize)
	}
	if c.Schema.IntervalDuration.Duration < time.Minute {
		return nil, fmt.Errorf("interval duration is too short: %s", c.Schema.IntervalDuration)
	}

	return &gumshoe.Schema{
		TimestampColumn:  timestampColumn.Column,
		DimensionColumns: dimensions,
		MetricColumns:    metrics,
		SegmentSize:      int(segmentSize),
		IntervalDuration: c.Schema.IntervalDuration.Duration,
		DiskBacked:       diskBacked,
		Dir:              dir,
		RunConfig: gumshoe.RunConfig{
			FixedRetention: true,
			Retention:      time.Duration(c.RetentionDays) * 24 * time.Hour,
		},
	}, nil
}
Beispiel #3
0
func BenchmarkInsertion(b *testing.B) {
	var err error
	dimensions := make([]gumshoe.DimensionColumn, numDimensions)
	for i := range dimensions {
		dimensions[i], err = gumshoe.MakeDimensionColumn(dimColumn(i), "uint16", false)
		if err != nil {
			b.Fatal(err)
		}
	}
	metrics := make([]gumshoe.MetricColumn, numMetrics)
	for i := range metrics {
		metrics[i], err = gumshoe.MakeMetricColumn(metricColumn(i), "uint32")
		if err != nil {
			b.Fatal(err)
		}
	}
	timestampColumn, err := gumshoe.MakeDimensionColumn("at", "uint32", false)
	if err != nil {
		b.Fatal(err)
	}
	_ = os.RemoveAll(tempDirName)
	schema := &gumshoe.Schema{
		TimestampColumn:  timestampColumn.Column,
		DimensionColumns: dimensions,
		MetricColumns:    metrics,
		SegmentSize:      5e5, // 500KB
		IntervalDuration: time.Hour,
		DiskBacked:       true,
		Dir:              tempDirName,
	}
	defer os.RemoveAll(tempDirName)

	db, err := gumshoe.NewDB(schema)
	if err != nil {
		b.Fatal(err)
	}
	defer func() {
		if err := db.Close(); err != nil {
			b.Fatal(err)
		}
	}()

	// Generate the fixed rows the table will start with
	if err := db.Insert(nRandomRows(numInsertRows)); err != nil {
		b.Fatal(err)
	}

	// Generate the test rows to insert during the benchmark
	rows := nRandomRows(numInsertRows)

	b.SetBytes(int64(db.RowSize))
	b.ResetTimer()

	// NOTE(caleb): Flushing every 50k lines (somewhat arbitrarily) and at the end. Note that this could lead
	// to some quirks (steps) in benchmark results. Pay attention to the number of iterations the benchmark
	// runs.
	for i := 0; i < b.N; i++ {
		if err := db.Insert([]gumshoe.RowMap{rows[i%len(rows)]}); err != nil {
			b.Fatal(err)
		}
		if i%50000 == 0 {
			if err := db.Flush(); err != nil {
				b.Fatal(err)
			}
		}
	}
	if err := db.Flush(); err != nil {
		b.Fatal(err)
	}
}