Example #1
0
func runMigrateTestCase(t *testing.T, testCase *migrateTestCase) {
	oldDB, err := gumshoe.NewDB(schemaFixture(testCase.OldSchema))
	if err != nil {
		t.Fatal(err)
	}
	defer oldDB.Close()
	newDB, err := gumshoe.NewDB(schemaFixture(testCase.NewSchema))
	if err != nil {
		t.Fatal(err)
	}
	defer newDB.Close()

	if err := oldDB.Insert(testCase.InsertRows); err != nil {
		t.Fatal(err)
	}
	if err := oldDB.Flush(); err != nil {
		t.Fatal(err)
	}

	if err := migrateDBs(newDB, oldDB, 4, 10); err != nil {
		t.Fatal(err)
	}

	a.Assert(t, testCase.ExpectedRows, util.DeepConvertibleEquals, newDB.GetDebugRows())
}
Example #2
0
func merge(args []string) {
	flags := flag.NewFlagSet("gumtool merge", flag.ExitOnError)
	var (
		newConfigFilename string
		oldDBPaths        stringsFlag
		parallelism       int
		numOpenFiles      int
		flushSegments     int
	)
	flags.StringVar(&newConfigFilename, "new-db-config", "", "Filename of the new DB config")
	flags.Var(&oldDBPaths, "db-paths", "Paths to dirs of DBs to merge")
	flags.IntVar(&parallelism, "parallelism", 4, "Parallelism for merge workers")
	flags.IntVar(&numOpenFiles, "rlimit-nofile", 10000, "Value for RLIMIT_NOFILE")
	flags.IntVar(&flushSegments, "flush-segments", 500, "Flush after merging each N segments")
	flags.Parse(args)

	if len(oldDBPaths) == 0 {
		log.Fatalln("Need at least one entry in -db-paths; got 0")
	}

	setRlimit(numOpenFiles)

	f, err := os.Open(newConfigFilename)
	if err != nil {
		log.Fatal(err)
	}
	defer f.Close()
	_, schema, err := config.LoadTOMLConfig(f)
	if err != nil {
		log.Fatal(err)
	}
	newDB, err := gumshoe.NewDB(schema)
	if err != nil {
		log.Fatal(err)
	}
	defer newDB.Close()

	dbs := make([]*gumshoe.DB, len(oldDBPaths))
	for i, path := range oldDBPaths {
		db, err := gumshoe.OpenDBDir(path)
		if err != nil {
			log.Fatalf("Error opening DB at %s: %s", path, err)
		}
		if err := db.Schema.Equivalent(schema); err != nil {
			log.Fatalf("Schema of DB at %s didn't match config at %s: %s", path, newConfigFilename, err)
		}
		dbs[i] = db
	}

	for _, db := range dbs {
		log.Printf("Merging db %s", db.Schema.Dir)
		if err := mergeDB(newDB, db, parallelism, flushSegments); err != nil {
			log.Fatalln("Error merging:", err)
		}
		db.Close()
	}
}
Example #3
0
// loadDB opens the database if it exists, or else creates a new one.
func (s *Server) loadDB(schema *gumshoe.Schema) {
	dir := s.Config.DatabaseDir
	Log.Printf(`Trying to load %q...`, dir)
	db, err := gumshoe.OpenDB(schema)
	if err != nil {
		if err != gumshoe.DBDoesNotExistErr {
			Log.Fatal(err)
		}
		Log.Printf(`Database %q does not exist; creating`, dir)
		db, err = gumshoe.NewDB(schema)
		if err != nil {
			Log.Fatal(err)
		}
		Log.Printf("Database at %q created successfully", dir)
	} else {
		stats := db.GetDebugStats()
		Log.Printf("Loaded database with %d rows", stats.Rows)
	}
	s.DB = db
}
Example #4
0
func migrate(args []string) {
	flags := flag.NewFlagSet("gumtool migrate", flag.ExitOnError)
	oldDBPath := flags.String("old-db-path", "", "Path of old DB directory")
	newConfigFilename := flags.String("new-db-config", "", "Filename of new DB config file")
	parallelism := flags.Int("parallelism", 4, "Parallelism for reading old DB")
	numOpenFiles := flags.Int("rlimit-nofile", 10000, "The value to set RLIMIT_NOFILE")
	flushSegments := flags.Int("flush-segments", 500, "Flush after every N (old) segments")
	flags.Parse(args)

	// Attempt to raise the open file limit; necessary for big migrations
	setRlimit(*numOpenFiles)

	oldDB, err := gumshoe.OpenDBDir(*oldDBPath)
	if err != nil {
		log.Fatal(err)
	}
	defer oldDB.Close()

	f, err := os.Open(*newConfigFilename)
	if err != nil {
		log.Fatal(err)
	}
	defer f.Close()
	_, schema, err := config.LoadTOMLConfig(f)
	if err != nil {
		log.Fatal(err)
	}
	newDB, err := gumshoe.NewDB(schema)
	if err != nil {
		log.Fatal(err)
	}
	defer newDB.Close()

	if err := migrateDBs(newDB, oldDB, *parallelism, *flushSegments); err != nil {
		log.Fatal(err)
	}
	fmt.Println("done")
}
Example #5
0
func BenchmarkInsertion(b *testing.B) {
	var err error
	dimensions := make([]gumshoe.DimensionColumn, numDimensions)
	for i := range dimensions {
		dimensions[i], err = gumshoe.MakeDimensionColumn(dimColumn(i), "uint16", false)
		if err != nil {
			b.Fatal(err)
		}
	}
	metrics := make([]gumshoe.MetricColumn, numMetrics)
	for i := range metrics {
		metrics[i], err = gumshoe.MakeMetricColumn(metricColumn(i), "uint32")
		if err != nil {
			b.Fatal(err)
		}
	}
	timestampColumn, err := gumshoe.MakeDimensionColumn("at", "uint32", false)
	if err != nil {
		b.Fatal(err)
	}
	_ = os.RemoveAll(tempDirName)
	schema := &gumshoe.Schema{
		TimestampColumn:  timestampColumn.Column,
		DimensionColumns: dimensions,
		MetricColumns:    metrics,
		SegmentSize:      5e5, // 500KB
		IntervalDuration: time.Hour,
		DiskBacked:       true,
		Dir:              tempDirName,
	}
	defer os.RemoveAll(tempDirName)

	db, err := gumshoe.NewDB(schema)
	if err != nil {
		b.Fatal(err)
	}
	defer func() {
		if err := db.Close(); err != nil {
			b.Fatal(err)
		}
	}()

	// Generate the fixed rows the table will start with
	if err := db.Insert(nRandomRows(numInsertRows)); err != nil {
		b.Fatal(err)
	}

	// Generate the test rows to insert during the benchmark
	rows := nRandomRows(numInsertRows)

	b.SetBytes(int64(db.RowSize))
	b.ResetTimer()

	// NOTE(caleb): Flushing every 50k lines (somewhat arbitrarily) and at the end. Note that this could lead
	// to some quirks (steps) in benchmark results. Pay attention to the number of iterations the benchmark
	// runs.
	for i := 0; i < b.N; i++ {
		if err := db.Insert([]gumshoe.RowMap{rows[i%len(rows)]}); err != nil {
			b.Fatal(err)
		}
		if i%50000 == 0 {
			if err := db.Flush(); err != nil {
				b.Fatal(err)
			}
		}
	}
	if err := db.Flush(); err != nil {
		b.Fatal(err)
	}
}