func (r *RDPG) registerConsulWatches() (err error) { log.Info(`rdpg.RDPG#registerConsulWatches() TODO: Registering Consul Watches...`) /* "type": "service", "service": "haproxy", "handler": "/var/vcap/jobs/rdpgd-service/bin/consul-watch-notification" "type": "service", "service": "postgres", "handler": "/var/vcap/jobs/rdpgd-service/bin/consul-watch-notification" */ return }
func service() (err error) { log.Info(`Starting with 'service' role...`) err = bootstrap() if err != nil { log.Error(fmt.Sprintf(`main.service() bootstrap() ! %s`, err)) os.Exit(1) } go admin.API() go tasks.Scheduler() go tasks.Work() err = signalHandler() return }
func manager() (err error) { log.Info(`Starting with 'manager' role...`) err = bootstrap() if err != nil { log.Error(fmt.Sprintf(`main.manager() bootstrap() ! %s`, err)) os.Exit(1) } go admin.API() go cfsb.API() go tasks.Scheduler(Role) go tasks.Work(Role) err = signalHandler() return }
func signalHandler() (err error) { ch := make(chan os.Signal, 1) signal.Notify(ch, os.Interrupt, syscall.SIGTERM) for sig := range ch { log.Info(fmt.Sprintf("main.signalHandler() Received signal %v, shutting down gracefully...", sig)) if _, err := os.Stat(pidFile); err == nil { if err := os.Remove(pidFile); err != nil { log.Error(err.Error()) os.Exit(1) } } os.Exit(0) } return }
//Work - Select a task from the queue for this server func Work() { mcUser = os.Getenv(`RDPGD_PG_USER`) mcPass = os.Getenv(`RDPGD_PG_PASS`) mcPort = os.Getenv(`RDPGD_PG_PORT`) mcIP = os.Getenv(`RDPGD_PG_IP`) mcConsulIP = os.Getenv(`RDPGD_CONSUL_IP`) pgbFrequency, _ = strconv.Atoi(os.Getenv(`RDPGD_FREQUENCY`)) for { err := configureGlobalPGBouncer() if err != nil { log.Error(fmt.Sprintf(`gpb.configureGlobalPGBouncer() ! Error: %s`, err)) } log.Info(fmt.Sprintf(`Time goes by, sleeping for %d seconds...`, pgbFrequency)) time.Sleep(time.Duration(pgbFrequency) * time.Second) } }
// InitSchema - Initialize the rdpg system database schemas. func (r *RDPG) InitSchema(role string) (err error) { log.Trace(fmt.Sprintf(`rdpg.RDPG<%s>#InitSchema() Initializing Schema for Cluster...`, ClusterID)) var name string p := pg.NewPG(`127.0.0.1`, pgPort, `rdpg`, `rdpg`, pgPass) db, err := p.Connect() if err != nil { log.Error(fmt.Sprintf(`rdpg.RDPG#InitSchema(%s) Opening db connection ! %s`, role, err)) return err } defer db.Close() _, err = db.Exec(`SELECT bdr.bdr_node_join_wait_for_ready();`) if err != nil { log.Error(fmt.Sprintf(`RDPG#initSchema() bdr.bdr_node_join_wait_for_ready ! %s`, err)) } ddlLockRE := regexp.MustCompile(`cannot acquire DDL lock|Database is locked against DDL operations`) for { // Retry loop for acquiring DDL schema lock. log.Trace(fmt.Sprintf("RDPG#initSchema() SQL[%s]", "rdpg_schemas")) _, err = db.Exec(SQL["rdpg_schemas"]) if err != nil { if ddlLockRE.MatchString(err.Error()) { log.Trace("RDPG#initSchema() DDL Lock not available, waiting...") time.Sleep(1 * time.Second) continue } log.Error(fmt.Sprintf("RDPG#initSchema() ! %s", err)) } break } keys := []string{ "create_table_cfsb_services", "create_table_cfsb_plans", "create_table_cfsb_instances", "create_table_cfsb_bindings", "create_table_cfsb_credentials", "create_table_tasks_schedules", "create_table_tasks_tasks", "create_table_rdpg_consul_watch_notifications", "create_table_rdpg_events", "create_table_rdpg_config", "create_table_backups_file_history", } for _, key := range keys { k := strings.Split(strings.Replace(strings.Replace(key, "create_table_", "", 1), "_", ".", 1), ".") sq := fmt.Sprintf(`SELECT table_name FROM information_schema.tables where table_schema='%s' AND table_name='%s';`, k[0], k[1]) log.Trace(fmt.Sprintf("RDPG#initSchema() %s", sq)) if err := db.QueryRow(sq).Scan(&name); err != nil { if err == sql.ErrNoRows { log.Trace(fmt.Sprintf("RDPG#initSchema() SQL[%s]", key)) _, err = db.Exec(SQL[key]) if err != nil { log.Error(fmt.Sprintf("RDPG#initSchema() ! %s", err)) } } else { log.Error(fmt.Sprintf("rdpg.initSchema() ! %s", err)) } } } err = insertDefaultSchedules(role, db) if err != nil { log.Error(fmt.Sprintf(`rdpg.initSchema() service task schedules ! %s`, err)) } sq := fmt.Sprintf(`INSERT INTO rdpg.config (key,cluster_id,value) VALUES ('BackupsPath', '%s','/var/vcap/store/pgbdr/backups'), ('BackupPort', '%s', '7432'), ('pgDumpBinaryLocation', '%s', '/var/vcap/packages/pgbdr/bin/pg_dump'),('defaultDaysToKeepFileHistory', '%s', '180')`, ClusterID, ClusterID, ClusterID, ClusterID) log.Trace(fmt.Sprintf(`rdpg.InitSchema() > %s`, sq)) _, err = db.Exec(sq) if err != nil { log.Error(fmt.Sprintf(`rdpg.initSchema() service task schedules ! %s`, err)) } // TODO: Move initial population of services out of rdpg to Admin API. if err := db.QueryRow(`SELECT name FROM cfsb.services WHERE name='rdpg' LIMIT 1;`).Scan(&name); err != nil { if err == sql.ErrNoRows { if _, err = db.Exec(SQL["insert_default_cfsb_services"]); err != nil { log.Error(fmt.Sprintf("rdpg.initSchema(insert_default_cfsb_services) %s", err)) } } else { log.Error(fmt.Sprintf("rdpg.initSchema() ! %s", err)) } } // TODO: Move initial population of services out of rdpg to Admin API. if err = db.QueryRow(`SELECT name FROM cfsb.plans WHERE name='shared' LIMIT 1;`).Scan(&name); err != nil { if err == sql.ErrNoRows { if _, err = db.Exec(SQL["insert_default_cfsb_plans"]); err != nil { log.Error(fmt.Sprintf("rdpg.initSchema(insert_default_cfsb_plans) %s", err)) } } else { log.Error(fmt.Sprintf("rdpg.initSchema() ! %s", err)) } } db.Close() cluster, err := NewCluster(ClusterID, r.ConsulClient) for _, pg := range cluster.Nodes { pg.PG.Set(`database`, `postgres`) db, err := pg.PG.Connect() if err != nil { log.Error(fmt.Sprintf("RDPG#DropUser(%s) %s ! %s", name, pg.PG.IP, err)) } log.Trace(fmt.Sprintf("RDPG#initSchema() SQL[%s]", "postgres_schemas")) _, err = db.Exec(SQL["postgres_schemas"]) if err != nil { log.Error(fmt.Sprintf("RDPG#initSchema() ! %s", err)) } keys = []string{ // These are for the postgres database only "create_function_rdpg_disable_database", } for _, key := range keys { k := strings.Split(strings.Replace(strings.Replace(key, "create_function_", "", 1), "_", ".", 1), ".") // TODO: move this into a pg.PG#FunctionExists() sq := fmt.Sprintf(`SELECT routine_name FROM information_schema.routines WHERE routine_type='FUNCTION' AND routine_schema='%s' AND routine_name='%s';`, k[0], k[1]) log.Trace(fmt.Sprintf("RDPG#initSchema() %s", sq)) if err := db.QueryRow(sq).Scan(&name); err != nil { if err == sql.ErrNoRows { log.Trace(fmt.Sprintf("RDPG#initSchema() SQL[%s]", key)) _, err = db.Exec(SQL[key]) if err != nil { log.Error(fmt.Sprintf("RDPG#initSchema() %s", err)) } } else { log.Error(fmt.Sprintf("rdpg.initSchema() %s", err)) db.Close() return err } } } db.Close() } log.Info(fmt.Sprintf(`rdpg.RDPG<%s>#InitSchema() Schema Initialized.`, ClusterID)) return nil }
// Bootstrap the RDPG Database and associated services. func Bootstrap() (err error) { r := newRDPG() log.Info(fmt.Sprintf(`rdpg.RDPG<%s>#Bootstrap() Bootstrapping Cluster Node...`, ClusterID)) err = r.initialBootstrap() if err != nil { log.Error(fmt.Sprintf(`rdpg.RDPG<%s>#Bootstrap() r.initialBootstrap() ! %s`, ClusterID, err)) return } // Record clusterService in consul kv := r.ConsulClient.KV() key := fmt.Sprintf(`rdpg/%s/cluster/service`, ClusterID) kvp := &consulapi.KVPair{Key: key, Value: []byte(globals.ClusterService)} _, err = kv.Put(kvp, &consulapi.WriteOptions{}) if err != nil { log.Error(fmt.Sprintf(`rdpg.RDPG<%s>#BootStrap(): key=%s globals.ClusterService=%s ! %s`, ClusterID, key, globals.ClusterService, err)) } s, err := services.NewService(globals.ClusterService) // postgresql or pgbdr err = s.Configure() if err != nil { log.Error(fmt.Sprintf(`rdpg.RDPG<%s>#Bootstrap() s.Configure(%s) ! %s`, ClusterID, globals.ClusterService, err)) } if globals.ClusterService == "pgbdr" { r.bdrBootstrap() } else { // TODO: This will be a switch statement when we have more than 2 service types. err = r.serviceClusterCapacityStore() if err != nil { log.Error(fmt.Sprintf(`rdpg.RDPG<%s>#bootstrapSystem() Store Service CLuster Instance Capacity in Consul KeyValue! %s`, ClusterID, err)) return } err = r.bootstrapSystem() if err != nil { log.Error(fmt.Sprintf(`rdpg.RDPG<%s>#bdrLeaderBootstrap() r.bootstrapSystem(%s,%s) ! %s`, ClusterID, globals.ServiceRole, globals.ClusterService, err)) return } } svcs := []string{`pgbouncer`, `haproxy`} for index := range svcs { s, err := services.NewService(svcs[index]) err = s.Configure() if err != nil { log.Error(fmt.Sprintf(`rdpg.RDPG<%s>#Bootstrap() s.Configure(%s) ! %s`, ClusterID, svcs[index], err)) } } err = r.registerConsulServices() if err != nil { log.Error(fmt.Sprintf(`rdpg.RDPG<%s>#Bootstrap() r.registerConsulServices() ! %s`, ClusterID, err)) } err = r.registerConsulWatches() if err != nil { log.Error(fmt.Sprintf(`rdpg.RDPG<%s>#Bootstrap() r.registerConsulWatches() ! %s`, ClusterID, err)) } log.Trace(fmt.Sprintf(`rdpg.RDPG<%s>#Bootstrap() Bootstrapping Cluster Node Completed.`, ClusterID)) return }
func pgbouncer() (err error) { log.Info(`Starting with 'pgbouncer' role...`) go gpb.Work() err = signalHandler() return }
func configureGlobalPGBouncer() (err error) { iniHeaderFile := `/var/vcap/jobs/global-pgbouncer/config/pgbouncer.ini.header` iniOutputFile := `/var/vcap/store/global-pgbouncer/config/pgbouncer.ini` userHeaderFile := `/var/vcap/jobs/global-pgbouncer/config/users.header` userOutputFile := `/var/vcap/store/global-pgbouncer/config/users` log.Info("gpb.configureGlobalPGBouncer()...") dir := `/var/vcap/jobs/rdpgd-global-pgbouncer` if _, err := os.Stat(dir); os.IsNotExist(err) { log.Trace(fmt.Sprintf("gpb.configureGlobalPGBouncer() Not a global pgbouncer node since %s doesn't exist, skipping.", dir)) return nil } instances, err := active() if err != nil { log.Error(fmt.Sprintf("gpb.configureGlobalPGBouncer() ! %s", err)) return err } pgbIni, err := ioutil.ReadFile(iniHeaderFile) if err != nil { log.Error(fmt.Sprintf("gpb.configureGlobalPGBouncer() Attempted to read file %s ! %s", iniHeaderFile, err)) return err } pgbUsers, err := ioutil.ReadFile(userHeaderFile) if err != nil { log.Error(fmt.Sprintf("gpb.configureGlobalPGBouncer() Attempted to read file %s ! %s", userHeaderFile, err)) return err } pi := []string{string(pgbIni)} pu := []string{string(pgbUsers)} for index := range instances { i := instances[index] log.Trace(fmt.Sprintf("gpb.configureGlobalPGBouncer() Looking up master IP for database %s on cluster %s", i.Database, i.ClusterID)) hostIP, err := getMasterIP(i.ClusterID) if err != nil { log.Error(fmt.Sprintf("gpb.configureGlobalPGBouncer() Could not resolve master ip for database %s on cluster %s, pgbouncer.ini will not be overwritten or reloaded", i.Database, i.ClusterID)) return err } log.Trace(fmt.Sprintf("gpb.configureGlobalPGBouncer() master ip for database %s on cluster! %s", hostIP, i.Database)) pi = append(pi, fmt.Sprintf(`%s = host=%s port=%s dbname=%s`, i.Database, hostIP, "7432", i.Database)) pu = append(pu, fmt.Sprintf(`"%s" "%s"`, i.User, i.Pass)) } pi = append(pi, "") pu = append(pu, "") beforeChecksum, _ := getFileChecksum(iniOutputFile) err = ioutil.WriteFile(iniOutputFile, []byte(strings.Join(pi, "\n")), 0640) if err != nil { log.Error(fmt.Sprintf("gpb.configureGlobalPGBouncer() ! %s", err)) return err } afterChecksum, err := getFileChecksum(iniOutputFile) if err != nil { log.Error(fmt.Sprintf("gpb.configureGlobalPGBouncer() Could not determine the checksum of the pgbouncer.ini file ! %s", err)) return err } err = ioutil.WriteFile(userOutputFile, []byte(strings.Join(pu, "\n")), 0640) if err != nil { log.Error(fmt.Sprintf("gpb.configureGlobalPGBouncer() ! %s", err)) return err } if bytes.Equal(beforeChecksum, afterChecksum) { log.Info(fmt.Sprintf("gpb.configureGlobalPGBouncer() Checksum before: %x after: %x, since there are no changes not reloading pgBouncer", beforeChecksum, afterChecksum)) } else { log.Info(fmt.Sprintf("gpb.configureGlobalPGBouncer() Checksum before: %x after: %x, since there are changes reloading pgBouncer", beforeChecksum, afterChecksum)) cmd := exec.Command("/var/vcap/jobs/global-pgbouncer/bin/control", "reload") err = cmd.Run() if err != nil { log.Error(fmt.Sprintf("gpb.configureGlobalPGBouncer() ! %s", err)) return err } } return }
// Bootstrap the RDPG Database and associated services. func Bootstrap(role string) (err error) { r := newRDPG() log.Info(fmt.Sprintf(`rdpg.RDPG<%s>#Bootstrap() Bootstrapping Cluster Node...`, ClusterID)) err = r.initialBootstrap() if err != nil { log.Error(fmt.Sprintf(`rdpg.RDPG<%s>#Bootstrap() r.initialBootstrap() ! %s`, ClusterID, err)) return } _, err = r.bootstrapLock() if err != nil { log.Error(fmt.Sprintf(`rdpg.RDPG<%s>#Bootstrap() r.bootstrapLock() ! %s`, ClusterID, err)) return } defer r.bootstrapUnlock() leader := false key := fmt.Sprintf(`rdpg/%s/bdr/join/ip`, ClusterID) bdrJoinIP, err = r.getKey(key) if err != nil { log.Error(fmt.Sprintf(`rdpg.RDPG<%s>#Bootstrap() kv.getKey(%s) ! %s ...`, ClusterID, key, err)) return err } if len(bdrJoinIP) == 0 { log.Trace(fmt.Sprintf(`rdpg.RDPG<%s>#Bootstrap() kv.getKey(%s) BDR Join IP has not been set`, ClusterID, key)) leader = true } else { log.Trace(fmt.Sprintf(`rdpg.RDPG<%s>#Bootstrap() kv.getKey(%s) BDR Join Node IP has been set to %s`, ClusterID, key, bdrJoinIP)) leader = false } if leader { err = r.leaderBootstrap(role) if err != nil { log.Error(fmt.Sprintf(`rdpg.RDPG<%s>#Bootstrap() r.leaderBootstrap() ! %s`, ClusterID, err)) } } else { err = r.nonLeaderBootstrap(role) if err != nil { log.Error(fmt.Sprintf(`rdpg.RDPG<%s>#Bootstrap() r.nonLeaderBootstrap() ! %s`, ClusterID, err)) } } err = r.reconfigureServices() if err != nil { log.Error(fmt.Sprintf(`rdpg.RDPG<%s>#Bootstrap() reconfigureServices() ! %s`, ClusterID, err)) } err = r.registerConsulServices() if err != nil { log.Error(fmt.Sprintf(`rdpg.RDPG<%s>#Bootstrap() r.registerConsulServices() ! %s`, ClusterID, err)) } err = r.registerConsulWatches() if err != nil { log.Error(fmt.Sprintf(`rdpg.RDPG<%s>#Bootstrap() r.registerConsulWatches() ! %s`, ClusterID, err)) } log.Trace(fmt.Sprintf(`rdpg.RDPG<%s>#Bootstrap() Bootstrapping Cluster Node Completed.`, ClusterID)) return }