/* Refresh a server object */ func (sm *ServerMonitor) refresh() error { err := sm.Conn.Ping() if err != nil { return err } sv, err := dbhelper.GetVariables(sm.Conn) if err != nil { return err } sm.BinlogPos = sv["GTID_BINLOG_POS"] sm.Strict = sv["GTID_STRICT_MODE"] sm.LogBin = sv["LOG_BIN"] sm.ReadOnly = sv["READ_ONLY"] sm.CurrentGtid = sv["GTID_CURRENT_POS"] sm.SlaveGtid = sv["GTID_SLAVE_POS"] sid, _ := strconv.ParseUint(sv["SERVER_ID"], 10, 0) sm.ServerId = uint(sid) slaveStatus, err := dbhelper.GetSlaveStatus(sm.Conn) if err != nil { return err } sm.UsingGtid = slaveStatus.Using_Gtid sm.IOThread = slaveStatus.Slave_IO_Running sm.SQLThread = slaveStatus.Slave_SQL_Running sm.Delay = slaveStatus.Seconds_Behind_Master sm.MasterServerId = slaveStatus.Master_Server_Id sm.MasterHost = slaveStatus.Master_Host return err }
/* Refresh a server object */ func (server *ServerMonitor) refresh() error { err := server.Conn.Ping() if err != nil { // we want the failed state for masters to be set by the monitor if server.State != stateMaster { server.State = stateFailed // remove from slave list server.delete(&slaves) } return err } sv, err := dbhelper.GetVariables(server.Conn) if err != nil { return err } server.PrevState = server.State server.BinlogPos = sv["GTID_BINLOG_POS"] server.Strict = sv["GTID_STRICT_MODE"] server.LogBin = sv["LOG_BIN"] server.ReadOnly = sv["READ_ONLY"] server.CurrentGtid = sv["GTID_CURRENT_POS"] server.SlaveGtid = sv["GTID_SLAVE_POS"] sid, _ := strconv.ParseUint(sv["SERVER_ID"], 10, 0) server.ServerID = uint(sid) slaveStatus, err := dbhelper.GetSlaveStatus(server.Conn) if err != nil { // If we reached this stage with a previously failed server, reintroduce // it as unconnected server. if server.State == stateFailed { server.State = stateUnconn if autorejoin { if verbose { logprint("INFO : Rejoining previously failed server", server.URL) } err := server.rejoin() if err != nil { logprint("ERROR: Failed to autojoin previously failed server", server.URL) } } } return err } server.UsingGtid = slaveStatus.Using_Gtid server.IOThread = slaveStatus.Slave_IO_Running server.SQLThread = slaveStatus.Slave_SQL_Running server.Delay = slaveStatus.Seconds_Behind_Master server.MasterServerID = slaveStatus.Master_Server_Id server.MasterHost = slaveStatus.Master_Host // In case of state change, reintroduce the server in the slave list if server.PrevState == stateFailed || server.PrevState == stateUnconn { server.State = stateSlave slaves = append(slaves, server) } return err }
/* Returns a candidate from a list of slaves. If there's only one slave it will be the de facto candidate. */ func (master *ServerMonitor) electCandidate(l []*ServerMonitor) int { ll := len(l) if *verbose { logprintf("DEBUG: Processing %d candidates", ll) } seqList := make([]uint64, ll) i := 0 hiseq := 0 for _, sl := range l { if *failover == "" { if *verbose { logprintf("DEBUG: Checking eligibility of slave server %s", sl.URL) } if dbhelper.CheckSlavePrerequisites(sl.Conn, sl.Host) == false { continue } if dbhelper.CheckBinlogFilters(master.Conn, sl.Conn) == false { logprintf("WARN : Binlog filters differ on master and slave %s. Skipping", sl.URL) continue } if dbhelper.CheckReplicationFilters(master.Conn, sl.Conn) == false { logprintf("WARN : Replication filters differ on master and slave %s. Skipping", sl.URL) continue } ss, _ := dbhelper.GetSlaveStatus(sl.Conn) if ss.Seconds_Behind_Master.Valid == false { logprintf("WARN : Slave %s is stopped. Skipping", sl.URL) continue } if ss.Seconds_Behind_Master.Int64 > *maxDelay { logprintf("WARN : Slave %s has more than %d seconds of replication delay (%d). Skipping", sl.URL, *maxDelay, ss.Seconds_Behind_Master.Int64) continue } if *gtidCheck && dbhelper.CheckSlaveSync(sl.Conn, master.Conn) == false { logprintf("WARN : Slave %s not in sync. Skipping", sl.URL) continue } } /* If server is in the ignore list, do not elect it */ if contains(ignoreList, sl.URL) { if *verbose { logprintf("DEBUG: %s is in the ignore list. Skipping", sl.URL) } continue } /* Rig the election if the examined slave is preferred candidate master */ if sl.URL == *prefMaster { if *verbose { logprintf("DEBUG: Election rig: %s elected as preferred master", sl.URL) } return i } seqList[i] = getSeqFromGtid(dbhelper.GetVariableByName(sl.Conn, "GTID_CURRENT_POS")) var max uint64 if i == 0 { max = seqList[0] } else if seqList[i] > max { max = seqList[i] hiseq = i } i++ } if i > 0 { /* Return key of slave with the highest seqno. */ return hiseq } else { log.Println("ERROR: No suitable candidates found.") return -1 } }
func main() { flag.Parse() if *version == true { fmt.Println("MariaDB Replication Manager version", repmgrVersion) } // if slaves option has been supplied, split into a slice. if *hosts != "" { hostList = strings.Split(*hosts, ",") } else { log.Fatal("ERROR: No hosts list specified.") } // validate users. if *user == "" { log.Fatal("ERROR: No master user/pair specified.") } dbUser, dbPass = splitPair(*user) if *rpluser == "" { log.Fatal("ERROR: No replication user/pair specified.") } rplUser, rplPass = splitPair(*rpluser) // Create a connection to each host. hostCount := len(hostList) hhdls = make([]*ServerMonitor, hostCount) slaveCount := 0 for k, url := range hostList { var err error hhdls[k], err = newServerMonitor(url) if *verbose { log.Printf("DEBUG: Creating new server: %v", hhdls[k].URL) } if err != nil { if *state == "dead" { log.Printf("INFO: Server %s is dead. Assuming old master.", hhdls[k].URL) master = hhdls[k] continue } log.Fatalln("ERROR: Error when establishing initial connection to host", err) } defer hhdls[k].Conn.Close() if *verbose { log.Printf("DEBUG: Checking if server %s is slave", hhdls[k].URL) } ss, err := dbhelper.GetSlaveStatus(hhdls[k].Conn) if ss.Master_Host != "" { log.Printf("INFO : Server %s is configured as a slave", hhdls[k].URL) slave = append(slave, hhdls[k]) slaveCount++ } else { log.Printf("INFO : Server %s is not a slave. Assuming master status.", hhdls[k].URL) master = hhdls[k] } } if (hostCount - slaveCount) == 0 { log.Fatalln("ERROR: Multi-master topologies are not yet supported.") } for _, sl := range slave { if *verbose { log.Printf("DEBUG: Checking if server %s is a slave of server %s", sl.Host, master.Host) } if dbhelper.IsSlaveof(sl.Conn, sl.Host, master.IP) == false { log.Fatalf("ERROR: Server %s is not a slave of declared master %s", master.URL, master.Host) } } // Check if preferred master is included in Host List ret := func() bool { for _, v := range hostList { if v == *prefMaster { return true } } return false } if ret() == false && *prefMaster != "" { log.Fatal("ERROR: Preferred master is not included in the hosts option") } // Do failover or switchover interactively, else start the interactive monitor. if *state == "dead" { master.failover() } else if *interactive == false { master.switchover() } else { MainLoop: err := termbox.Init() if err != nil { log.Fatalln("Termbox initialization error", err) } termboxChan := new_tb_chan() interval := time.Second ticker := time.NewTicker(interval * 3) var command string for exit == false { select { case <-ticker.C: drawHeader() master.refresh() master.drawMaster() vy = 6 for k, _ := range slave { slave[k].refresh() slave[k].drawSlave(&vy) } drawFooter(&vy) termbox.Flush() case event := <-termboxChan: switch event.Type { case termbox.EventKey: if event.Key == termbox.KeyCtrlS { command = "switchover" exit = true } if event.Key == termbox.KeyCtrlF { command = "failover" exit = true } if event.Key == termbox.KeyCtrlQ { exit = true } } switch event.Ch { case 's': termbox.Sync() } } } termbox.Close() switch command { case "switchover": nmUrl, nsKey := master.switchover() if nmUrl != "" && nsKey >= 0 { if *verbose { log.Printf("DEBUG: Reinstancing new master: %s and new slave: %s [%d]", nmUrl, slave[nsKey].URL, nsKey) } master, err = newServerMonitor(nmUrl) slave[nsKey], err = newServerMonitor(slave[nsKey].URL) } log.Println("###### Restarting monitor console in 5 seconds. Press Ctrl-C to exit") time.Sleep(5 * time.Second) exit = false goto MainLoop case "failover": nmUrl, _ := master.failover() if nmUrl != "" { if *verbose { log.Printf("DEBUG: Reinstancing new master: %s", nmUrl) } master, err = newServerMonitor(nmUrl) } log.Println("###### Restarting monitor console in 5 seconds. Press Ctrl-C to exit") time.Sleep(5 * time.Second) exit = false goto MainLoop } } }
func main() { flag.Parse() if *version == true { fmt.Println("MariaDB Tools version 0.0.1") os.Exit(0) } var address string if *socket != "" { address = "unix(" + *socket + ")" } if *host != "" { address = "tcp(" + *host + ":" + *port + ")" } db, _ := sqlx.Open("mysql", *user+":"+*password+"@"+address+"/") err := db.Ping() if err != nil { log.Fatal(err) } defer db.Close() status = dbhelper.GetStatusAsInt(db) variable, _ = dbhelper.GetVariables(db) out, err := exec.Command("uname", "-srm").Output() if err != nil { log.Fatal(err) } hostname, _ := os.Hostname() fmt.Printf("### MariaDB Server report for host %s\n", hostname) fmt.Printf("### %-25s%s", "Kernel version", out) fmt.Printf("### %-25s%s\n", "System Time", time.Now().Format("2006-01-02 at 03:04 (MST)")) fmt.Println(common.DrawHashline("General", 60)) var server_version string db.QueryRow("SELECT VERSION()").Scan(&server_version) pPrintStr("Version", server_version) now := time.Now().Unix() uptime := status["UPTIME"] start_time := time.Unix(now-uptime, 0).Local() pPrintStr("Started", humanize.Time(start_time)) var count int64 db.Get(&count, "SELECT COUNT(*) FROM information_schema.schemata") pPrintInt("Databases", count) db.Get(&count, "SELECT COUNT(*) FROM information_schema.tables") pPrintInt("Tables", count) /* Potentially unsafe for large systems */ pPrintStr("Datadir", variable["DATADIR"]) pPrintStr("Binary Log", variable["LOG_BIN"]) if variable["LOG_BIN"] == "ON" { pPrintStr("Binlog writes per hour", humanize.IBytes(uint64(status["BINLOG_BYTES_WRITTEN"]/status["UPTIME"])*3600)) } // Add stuff for slow logs slaveStatus, err := dbhelper.GetSlaveStatus(db) if err != nil { slaveIO := slaveStatus.Slave_IO_Running slaveSQL := slaveStatus.Slave_SQL_Running var slaveState string if slaveIO == "Yes" && slaveSQL == "Yes" { slaveState = "Slave configured, threads running" } else { slaveState = "Slave configured, threads stopped" } pPrintStr("Replication", slaveState) } else { pPrintStr("Replication", "Not configured") } // InnoDB fmt.Println(common.DrawHashline("InnoDB", 60)) ibps := humanize.IBytes(common.StrtoUint(variable["INNODB_BUFFER_POOL_SIZE"])) pPrintStr("InnoDB Buffer Pool", ibps) ibpsPages := float64(status["INNODB_BUFFER_POOL_PAGES_TOTAL"]) ibpsFree := float64(status["INNODB_BUFFER_POOL_PAGES_FREE"]) ibpsUsed := common.DecimaltoPctLow(ibpsFree, ibpsPages) pPrintStr("InnoDB Buffer Used", strconv.Itoa(ibpsUsed)+"%") ibpsDirty := float64(status["INNODB_BUFFER_POOL_PAGES_DIRTY"]) ibpsDirtyPct := common.DecimaltoPct(ibpsDirty, ibpsPages) pPrintStr("InnoDB Buffer Dirty", strconv.Itoa(ibpsDirtyPct)+"%") pPrintStr("InnoDB Log Files", string(variable["INNODB_LOG_FILES_IN_GROUP"])+" files of "+humanize.IBytes(common.StrtoUint(variable["INNODB_LOG_FILE_SIZE"]))) pPrintStr("InnoDB log writes per hour", humanize.IBytes(uint64(status["INNODB_OS_LOG_WRITTEN"]/status["UPTIME"])*3600)) pPrintStr("InnoDB Log Buffer", humanize.IBytes(common.StrtoUint(variable["INNODB_LOG_BUFFER_SIZE"]))) var iftc string switch variable["INNODB_FLUSH_LOG_AT_TRX_COMMIT"] { case "0": iftc = "0 - Flush log and write buffer every sec" case "1": iftc = "1 - Write buffer and Flush log at each trx commit" case "2": iftc = "2 - Write buffer at each trx commit, Flush log every sec" } pPrintStr("InnoDB Flush Log", iftc) ifm := variable["INNODB_FLUSH_METHOD"] if ifm == "" { ifm = "fsync" } pPrintStr("InnoDB Flush Method", ifm) pPrintStr("InnoDB IO Capacity", variable["INNODB_IO_CAPACITY"]) // MyISAM fmt.Println(common.DrawHashline("MyISAM", 60)) kbs := humanize.IBytes(common.StrtoUint(variable["KEY_BUFFER_SIZE"])) pPrintStr("MyISAM Key Cache", kbs) kbs_free := float64(status["KEY_BLOCKS_UNUSED"]) kbs_used := float64(status["KEY_BLOCKS_USED"]) kbsUsedPct := int(((1 - (kbs_free / (kbs_free + kbs_used))) * 100) + 0.5) pPrintStr("MyISAM Cache Used", strconv.Itoa(kbsUsedPct)+"%") // Handlers pPrintInt("Open tables", status["OPEN_TABLES"]) pPrintInt("Open files", status["OPEN_FILES"]) }