func schemaFixture(testSchema *migrateTestSchema) *gumshoe.Schema { atColumn, err := gumshoe.MakeMetricColumn("at", "uint32") if err != nil { panic(err) } schema := &gumshoe.Schema{ TimestampColumn: gumshoe.Column(atColumn), SegmentSize: 100, IntervalDuration: time.Hour, } for _, dimension := range testSchema.Dimensions { dimColumn, err := gumshoe.MakeDimensionColumn(dimension.Name, dimension.Type, dimension.String) if err != nil { panic(err) } schema.DimensionColumns = append(schema.DimensionColumns, dimColumn) } for _, metric := range testSchema.Metrics { metricColumn, err := gumshoe.MakeMetricColumn(metric.Name, metric.Type) if err != nil { panic(err) } schema.MetricColumns = append(schema.MetricColumns, metricColumn) } return schema }
// Produces a gumshoe Schema based on a Config's values. func (c *Config) makeSchema() (*gumshoe.Schema, error) { dir := "" diskBacked := true switch c.DatabaseDir { case "": return nil, errors.New("database directory must be provided. Use 'MEMORY' to specify an in-memory DB.") case "MEMORY": diskBacked = false default: dir = c.DatabaseDir } segmentSize, err := humanize.ParseBytes(c.Schema.SegmentSize) if err != nil { return nil, err } name, typ, isString := parseColumn(c.Schema.TimestampColumn) if typ != "uint32" { return nil, fmt.Errorf("timestamp column (%q) must be uint32", name) } if isString { return nil, errors.New("timestamp column cannot be a string") } timestampColumn, err := gumshoe.MakeDimensionColumn(name, typ, isString) if err != nil { return nil, err } dimensions := make([]gumshoe.DimensionColumn, len(c.Schema.DimensionColumns)) for i, colPair := range c.Schema.DimensionColumns { name, typ, isString := parseColumn(colPair) if isString { switch typ { case "uint8", "uint16", "uint32": default: return nil, fmt.Errorf("got type %q for column %q (must be unsigned int type)", typ, name) } } col, err := gumshoe.MakeDimensionColumn(name, typ, isString) if err != nil { return nil, err } dimensions[i] = col } if len(c.Schema.MetricColumns) == 0 { return nil, fmt.Errorf("schema must include at least one metric column") } metrics := make([]gumshoe.MetricColumn, len(c.Schema.MetricColumns)) for i, colPair := range c.Schema.MetricColumns { name, typ, isString := parseColumn(colPair) if isString { return nil, fmt.Errorf("metric column (%q) has string type; not allowed for metric columns", name) } col, err := gumshoe.MakeMetricColumn(name, typ) if err != nil { return nil, err } metrics[i] = col } // Check that we haven't duplicated any column names names := map[string]bool{timestampColumn.Name: true} for _, col := range dimensions { if names[col.Name] { return nil, fmt.Errorf("duplicate column name %q", col.Name) } names[col.Name] = true } for _, col := range metrics { if names[col.Name] { return nil, fmt.Errorf("duplicate column name %q", col.Name) } names[col.Name] = true } // Sanity checks if c.FlushInterval.Duration < time.Second { return nil, fmt.Errorf("flush interval is too small: %s", c.FlushInterval) } if c.QueryParallelism < 1 { return nil, fmt.Errorf("bad query parallelism (must be positive): %d", c.QueryParallelism) } if c.RetentionDays < 1 { return nil, fmt.Errorf("retention days is too small: %d", c.RetentionDays) } if segmentSize < 100 { return nil, fmt.Errorf("segment size seems too small: %s", c.Schema.SegmentSize) } if c.Schema.IntervalDuration.Duration < time.Minute { return nil, fmt.Errorf("interval duration is too short: %s", c.Schema.IntervalDuration) } return &gumshoe.Schema{ TimestampColumn: timestampColumn.Column, DimensionColumns: dimensions, MetricColumns: metrics, SegmentSize: int(segmentSize), IntervalDuration: c.Schema.IntervalDuration.Duration, DiskBacked: diskBacked, Dir: dir, RunConfig: gumshoe.RunConfig{ FixedRetention: true, Retention: time.Duration(c.RetentionDays) * 24 * time.Hour, }, }, nil }
func BenchmarkInsertion(b *testing.B) { var err error dimensions := make([]gumshoe.DimensionColumn, numDimensions) for i := range dimensions { dimensions[i], err = gumshoe.MakeDimensionColumn(dimColumn(i), "uint16", false) if err != nil { b.Fatal(err) } } metrics := make([]gumshoe.MetricColumn, numMetrics) for i := range metrics { metrics[i], err = gumshoe.MakeMetricColumn(metricColumn(i), "uint32") if err != nil { b.Fatal(err) } } timestampColumn, err := gumshoe.MakeDimensionColumn("at", "uint32", false) if err != nil { b.Fatal(err) } _ = os.RemoveAll(tempDirName) schema := &gumshoe.Schema{ TimestampColumn: timestampColumn.Column, DimensionColumns: dimensions, MetricColumns: metrics, SegmentSize: 5e5, // 500KB IntervalDuration: time.Hour, DiskBacked: true, Dir: tempDirName, } defer os.RemoveAll(tempDirName) db, err := gumshoe.NewDB(schema) if err != nil { b.Fatal(err) } defer func() { if err := db.Close(); err != nil { b.Fatal(err) } }() // Generate the fixed rows the table will start with if err := db.Insert(nRandomRows(numInsertRows)); err != nil { b.Fatal(err) } // Generate the test rows to insert during the benchmark rows := nRandomRows(numInsertRows) b.SetBytes(int64(db.RowSize)) b.ResetTimer() // NOTE(caleb): Flushing every 50k lines (somewhat arbitrarily) and at the end. Note that this could lead // to some quirks (steps) in benchmark results. Pay attention to the number of iterations the benchmark // runs. for i := 0; i < b.N; i++ { if err := db.Insert([]gumshoe.RowMap{rows[i%len(rows)]}); err != nil { b.Fatal(err) } if i%50000 == 0 { if err := db.Flush(); err != nil { b.Fatal(err) } } } if err := db.Flush(); err != nil { b.Fatal(err) } }