func main() { configFile := flag.String("config", "config.toml", "path to a DB config (to get the schema)") shardsFlag := flag.String("shards", "", "comma-separated list of shard addresses (with ports)") port := flag.Int("port", 9090, "port on which to listen") flag.Parse() shardAddrs := strings.Split(*shardsFlag, ",") if *shardsFlag == "" || len(shardAddrs) == 0 { Log.Fatal("At least one shard required") } f, err := os.Open(*configFile) if err != nil { Log.Fatal(err) } defer f.Close() _, schema, err := config.LoadTOMLConfig(f) if err != nil { Log.Fatal(err) } schema.Initialize() r := NewRouter(shardAddrs, schema) addr := fmt.Sprintf(":%d", *port) server := &http.Server{ Addr: addr, Handler: r, } Log.Println("Now serving on", addr) Log.Fatal(server.ListenAndServe()) }
func TestSanity(t *testing.T) { const configText = ` listen_addr = "" database_dir = "MEMORY" flush_interval = "1h" statsd_addr = "localhost:8125" open_file_limit = 1000 query_parallelism = 10 retention_days = 7 [schema] segment_size = "1MB" interval_duration = "1h" timestamp_column = ["at", "uint32"] dimension_columns = [["dim1", "uint32"]] metric_columns = [["metric1", "uint32"]] ` conf, schema, err := config.LoadTOMLConfig(strings.NewReader(configText)) if err != nil { t.Fatal(err) } server := httptest.NewServer(NewServer(conf, schema)) defer server.Close() resp, err := http.Get(server.URL + "/") if err != nil { t.Fatal(err) } if resp.StatusCode != 200 { t.Error("Expected 200 at /") } resp.Body.Close() }
func merge(args []string) { flags := flag.NewFlagSet("gumtool merge", flag.ExitOnError) var ( newConfigFilename string oldDBPaths stringsFlag parallelism int numOpenFiles int flushSegments int ) flags.StringVar(&newConfigFilename, "new-db-config", "", "Filename of the new DB config") flags.Var(&oldDBPaths, "db-paths", "Paths to dirs of DBs to merge") flags.IntVar(¶llelism, "parallelism", 4, "Parallelism for merge workers") flags.IntVar(&numOpenFiles, "rlimit-nofile", 10000, "Value for RLIMIT_NOFILE") flags.IntVar(&flushSegments, "flush-segments", 500, "Flush after merging each N segments") flags.Parse(args) if len(oldDBPaths) == 0 { log.Fatalln("Need at least one entry in -db-paths; got 0") } setRlimit(numOpenFiles) f, err := os.Open(newConfigFilename) if err != nil { log.Fatal(err) } defer f.Close() _, schema, err := config.LoadTOMLConfig(f) if err != nil { log.Fatal(err) } newDB, err := gumshoe.NewDB(schema) if err != nil { log.Fatal(err) } defer newDB.Close() dbs := make([]*gumshoe.DB, len(oldDBPaths)) for i, path := range oldDBPaths { db, err := gumshoe.OpenDBDir(path) if err != nil { log.Fatalf("Error opening DB at %s: %s", path, err) } if err := db.Schema.Equivalent(schema); err != nil { log.Fatalf("Schema of DB at %s didn't match config at %s: %s", path, newConfigFilename, err) } dbs[i] = db } for _, db := range dbs { log.Printf("Merging db %s", db.Schema.Dir) if err := mergeDB(newDB, db, parallelism, flushSegments); err != nil { log.Fatalln("Error merging:", err) } db.Close() } }
func main() { flag.Parse() f, err := os.Open(*configFile) if err != nil { Log.Fatal(err) } defer f.Close() conf, schema, err := config.LoadTOMLConfig(f) if err != nil { Log.Fatal(err) } // Try to set the RLIMIT_NOFILE to the config value. This might fail if the binary lacks sufficient // permissions/capabilities, or on non-Linux OSes. rlimit := &syscall.Rlimit{uint64(conf.OpenFileLimit), uint64(conf.OpenFileLimit)} if err := syscall.Setrlimit(syscall.RLIMIT_NOFILE, rlimit); err != nil { Log.Println("Error raising RLIMIT_NOFILE:", err) } // Set up the pprof server, if enabled. if *profileAddr != "" { go func() { Log.Println("Pprof listening on", *profileAddr) Log.Printf("Go to http://%s/debug/pprof to see more", *profileAddr) Log.Fatal(http.ListenAndServe(*profileAddr, nil)) }() } // Configure the gumshoe logger gumshoe.Log = log.New(os.Stdout, "[gumshoe] ", logFlags) // Configure the statsd client statsd, err = gostc.NewClient(conf.StatsdAddr) if err != nil { Log.Fatal(err) } // Listen for signals so we can try to flush before shutdown go func() { c := make(chan os.Signal) signal.Notify(c, syscall.SIGINT, syscall.SIGTERM) <-c close(shutdown) }() server := NewServer(conf, schema) Log.Fatal(server.ListenAndServe()) }
func migrate(args []string) { flags := flag.NewFlagSet("gumtool migrate", flag.ExitOnError) oldDBPath := flags.String("old-db-path", "", "Path of old DB directory") newConfigFilename := flags.String("new-db-config", "", "Filename of new DB config file") parallelism := flags.Int("parallelism", 4, "Parallelism for reading old DB") numOpenFiles := flags.Int("rlimit-nofile", 10000, "The value to set RLIMIT_NOFILE") flushSegments := flags.Int("flush-segments", 500, "Flush after every N (old) segments") flags.Parse(args) // Attempt to raise the open file limit; necessary for big migrations setRlimit(*numOpenFiles) oldDB, err := gumshoe.OpenDBDir(*oldDBPath) if err != nil { log.Fatal(err) } defer oldDB.Close() f, err := os.Open(*newConfigFilename) if err != nil { log.Fatal(err) } defer f.Close() _, schema, err := config.LoadTOMLConfig(f) if err != nil { log.Fatal(err) } newDB, err := gumshoe.NewDB(schema) if err != nil { log.Fatal(err) } defer newDB.Close() if err := migrateDBs(newDB, oldDB, *parallelism, *flushSegments); err != nil { log.Fatal(err) } fmt.Println("done") }
func balance(args []string) { flags := flag.NewFlagSet("gumtool balance", flag.ExitOnError) var ( username string sourceDBDirs stringsFlag destDBConfigs stringsFlag binDir string workDir string ) flags.StringVar(&username, "username", "", "SSH username") flags.Var(&sourceDBDirs, "source-db-dirs", "Dirs of source DBs in form host.com:/path/to/db; comma-separated") flags.Var(&destDBConfigs, "dest-db-configs", "Filenames of dest DB configs in form host.com:/path/to/conf.toml; comma-separated") flags.StringVar(&binDir, "bindir", "", "Path to dir containing gumshoedb binaries (same on all servers)") flags.StringVar(&workDir, "workdir", "", "Path to dir for temporary partial DBs (same on all servers)") flags.Parse(args) if len(sourceDBDirs) == 0 { log.Fatal("Need at least one dir in -source-db-dirs") } if len(destDBConfigs) == 0 { log.Fatal("Need at least one config in -dest-db-configs") } if binDir == "" { log.Fatal("-bindir must be given") } if workDir == "" { log.Fatal("-workdir must be given") } sources := make([]balanceSource, len(sourceDBDirs)) for i, hostDir := range sourceDBDirs { parts := strings.SplitN(hostDir, ":", 2) if len(parts) != 2 { log.Fatalln("Bad host:dir:", hostDir) } sources[i] = balanceSource{parts[0], parts[1]} } dests := make([]balanceDest, len(destDBConfigs)) for i, hostConfig := range destDBConfigs { parts := strings.SplitN(hostConfig, ":", 2) if len(parts) != 2 { log.Fatalln("Bad host:config:", hostConfig) } dests[i] = balanceDest{parts[0], parts[1]} } s := NewSSH(username) // // 1. Load all the schemas from the source DB dirs and check mutual equivalence. // // These *gumshoeDBs only have schemas and static table metadata filled out. sourceDBs := make([]*gumshoe.DB, len(sources)) for i, source := range sources { sftpClient, err := s.getSFTPClient(source.host) if err != nil { log.Fatal(err) } dbJSONPath := path.Join(source.dbDir, "db.json") f, err := sftpClient.Open(dbJSONPath) if err != nil { log.Fatalf("Cannot open %s:%s: %s", source.host, dbJSONPath, err) } db := new(gumshoe.DB) if err := json.NewDecoder(f).Decode(db); err != nil { log.Fatalf("Error loading db.json schema from %s: %s", source.host, err) } f.Close() sourceDBs[i] = db if i > 0 { if err := sourceDBs[0].Schema.Equivalent(db.Schema); err != nil { log.Fatalln("Found non-equivalent schemas amongst source DBs:", err) } } } log.Printf("Loaded %d source DBs and confirmed equivalence", len(sourceDBs)) // // 2. Load all the dest DB configs and check equivalence. // for _, dest := range dests { sftpClient, err := s.getSFTPClient(dest.host) if err != nil { log.Fatal(err) } f, err := sftpClient.Open(dest.config) if err != nil { log.Fatalf("Error opening toml config at %s:%s: %s", dest.host, dest.config, err) } _, schema, err := config.LoadTOMLConfig(f) if err != nil { log.Fatalf("Error loading config TOML from %s: %s", dest.host, err) } f.Close() if err := sourceDBs[0].Schema.Equivalent(schema); err != nil { log.Fatalf("Found schema in DB config on %s that is not equivalent to the source DBs: %s", dest.host, err) } } log.Printf("Checked %d dest DB configs and confirmed equivalence to source DBs", len(dests)) // // 3. Do the per-source work (in parallel). For each source DB: // a. Partition the DB intervals by dest DB // b. Create new DB dirs called /tmp/gumshoedb/db.partial.shardN // c. Copy in the segment files for the interval to the appropriate partial DB // d. Copy in the source dimension files to each partial DB // e. Write out a db.json for each partial DB // f. Gzip each partial DB // g. SCP each partial DB to the dest DB host at /tmp/gumshoedb/db.partial.fromhostXXX.gz // if err := MakeWorkDirs(sources, dests, workDir, s); err != nil { log.Fatal(err) } var ( wg sync.WaitGroup mu sync.Mutex // protects partialsByDest // Map each dest to all the sources that have a partial for it. partialsByDest = make(map[balanceDest][]balanceSource) ) for i := range sourceDBs { source := sources[i] db := sourceDBs[i] wg.Add(1) go func() { defer wg.Done() partialDests, err := PreparePartials(source, dests, db, workDir, s) if err != nil { log.Fatal(err) } mu.Lock() for _, dest := range partialDests { partialsByDest[dest] = append(partialsByDest[dest], source) } mu.Unlock() }() } wg.Wait() // // 4. Do the per-dest work (in parallel). For each dest DB: // a. Un-gzip each partial DB // b. gumtool merge the partial DBs // for i := range dests { dest := dests[i] sources := partialsByDest[dest] wg.Add(1) go func() { defer wg.Done() if err := MergeDBs(dest, sources, workDir, binDir, s); err != nil { log.Fatal(err) } }() } wg.Wait() if err := RemoveWorkDirs(sources, dests, workDir, s); err != nil { log.Fatal(err) } }