func runMigrateTestCase(t *testing.T, testCase *migrateTestCase) { oldDB, err := gumshoe.NewDB(schemaFixture(testCase.OldSchema)) if err != nil { t.Fatal(err) } defer oldDB.Close() newDB, err := gumshoe.NewDB(schemaFixture(testCase.NewSchema)) if err != nil { t.Fatal(err) } defer newDB.Close() if err := oldDB.Insert(testCase.InsertRows); err != nil { t.Fatal(err) } if err := oldDB.Flush(); err != nil { t.Fatal(err) } if err := migrateDBs(newDB, oldDB, 4, 10); err != nil { t.Fatal(err) } a.Assert(t, testCase.ExpectedRows, util.DeepConvertibleEquals, newDB.GetDebugRows()) }
func merge(args []string) { flags := flag.NewFlagSet("gumtool merge", flag.ExitOnError) var ( newConfigFilename string oldDBPaths stringsFlag parallelism int numOpenFiles int flushSegments int ) flags.StringVar(&newConfigFilename, "new-db-config", "", "Filename of the new DB config") flags.Var(&oldDBPaths, "db-paths", "Paths to dirs of DBs to merge") flags.IntVar(¶llelism, "parallelism", 4, "Parallelism for merge workers") flags.IntVar(&numOpenFiles, "rlimit-nofile", 10000, "Value for RLIMIT_NOFILE") flags.IntVar(&flushSegments, "flush-segments", 500, "Flush after merging each N segments") flags.Parse(args) if len(oldDBPaths) == 0 { log.Fatalln("Need at least one entry in -db-paths; got 0") } setRlimit(numOpenFiles) f, err := os.Open(newConfigFilename) if err != nil { log.Fatal(err) } defer f.Close() _, schema, err := config.LoadTOMLConfig(f) if err != nil { log.Fatal(err) } newDB, err := gumshoe.NewDB(schema) if err != nil { log.Fatal(err) } defer newDB.Close() dbs := make([]*gumshoe.DB, len(oldDBPaths)) for i, path := range oldDBPaths { db, err := gumshoe.OpenDBDir(path) if err != nil { log.Fatalf("Error opening DB at %s: %s", path, err) } if err := db.Schema.Equivalent(schema); err != nil { log.Fatalf("Schema of DB at %s didn't match config at %s: %s", path, newConfigFilename, err) } dbs[i] = db } for _, db := range dbs { log.Printf("Merging db %s", db.Schema.Dir) if err := mergeDB(newDB, db, parallelism, flushSegments); err != nil { log.Fatalln("Error merging:", err) } db.Close() } }
// loadDB opens the database if it exists, or else creates a new one. func (s *Server) loadDB(schema *gumshoe.Schema) { dir := s.Config.DatabaseDir Log.Printf(`Trying to load %q...`, dir) db, err := gumshoe.OpenDB(schema) if err != nil { if err != gumshoe.DBDoesNotExistErr { Log.Fatal(err) } Log.Printf(`Database %q does not exist; creating`, dir) db, err = gumshoe.NewDB(schema) if err != nil { Log.Fatal(err) } Log.Printf("Database at %q created successfully", dir) } else { stats := db.GetDebugStats() Log.Printf("Loaded database with %d rows", stats.Rows) } s.DB = db }
func migrate(args []string) { flags := flag.NewFlagSet("gumtool migrate", flag.ExitOnError) oldDBPath := flags.String("old-db-path", "", "Path of old DB directory") newConfigFilename := flags.String("new-db-config", "", "Filename of new DB config file") parallelism := flags.Int("parallelism", 4, "Parallelism for reading old DB") numOpenFiles := flags.Int("rlimit-nofile", 10000, "The value to set RLIMIT_NOFILE") flushSegments := flags.Int("flush-segments", 500, "Flush after every N (old) segments") flags.Parse(args) // Attempt to raise the open file limit; necessary for big migrations setRlimit(*numOpenFiles) oldDB, err := gumshoe.OpenDBDir(*oldDBPath) if err != nil { log.Fatal(err) } defer oldDB.Close() f, err := os.Open(*newConfigFilename) if err != nil { log.Fatal(err) } defer f.Close() _, schema, err := config.LoadTOMLConfig(f) if err != nil { log.Fatal(err) } newDB, err := gumshoe.NewDB(schema) if err != nil { log.Fatal(err) } defer newDB.Close() if err := migrateDBs(newDB, oldDB, *parallelism, *flushSegments); err != nil { log.Fatal(err) } fmt.Println("done") }
func BenchmarkInsertion(b *testing.B) { var err error dimensions := make([]gumshoe.DimensionColumn, numDimensions) for i := range dimensions { dimensions[i], err = gumshoe.MakeDimensionColumn(dimColumn(i), "uint16", false) if err != nil { b.Fatal(err) } } metrics := make([]gumshoe.MetricColumn, numMetrics) for i := range metrics { metrics[i], err = gumshoe.MakeMetricColumn(metricColumn(i), "uint32") if err != nil { b.Fatal(err) } } timestampColumn, err := gumshoe.MakeDimensionColumn("at", "uint32", false) if err != nil { b.Fatal(err) } _ = os.RemoveAll(tempDirName) schema := &gumshoe.Schema{ TimestampColumn: timestampColumn.Column, DimensionColumns: dimensions, MetricColumns: metrics, SegmentSize: 5e5, // 500KB IntervalDuration: time.Hour, DiskBacked: true, Dir: tempDirName, } defer os.RemoveAll(tempDirName) db, err := gumshoe.NewDB(schema) if err != nil { b.Fatal(err) } defer func() { if err := db.Close(); err != nil { b.Fatal(err) } }() // Generate the fixed rows the table will start with if err := db.Insert(nRandomRows(numInsertRows)); err != nil { b.Fatal(err) } // Generate the test rows to insert during the benchmark rows := nRandomRows(numInsertRows) b.SetBytes(int64(db.RowSize)) b.ResetTimer() // NOTE(caleb): Flushing every 50k lines (somewhat arbitrarily) and at the end. Note that this could lead // to some quirks (steps) in benchmark results. Pay attention to the number of iterations the benchmark // runs. for i := 0; i < b.N; i++ { if err := db.Insert([]gumshoe.RowMap{rows[i%len(rows)]}); err != nil { b.Fatal(err) } if i%50000 == 0 { if err := db.Flush(); err != nil { b.Fatal(err) } } } if err := db.Flush(); err != nil { b.Fatal(err) } }