Esempio n. 1
0
func main() {
	flag.Parse()
	if *version == true {
		fmt.Println("MariaDB Replication Manager version", repmgrVersion)
	}
	// if slaves option has been supplied, split into a slice.
	if *hosts != "" {
		hostList = strings.Split(*hosts, ",")
	} else {
		log.Fatal("ERROR: No hosts list specified.")
	}
	// validate users.
	if *user == "" {
		log.Fatal("ERROR: No master user/pair specified.")
	}
	dbUser, dbPass = splitPair(*user)
	if *rpluser == "" {
		log.Fatal("ERROR: No replication user/pair specified.")
	}
	rplUser, rplPass = splitPair(*rpluser)

	// Create a connection to each host.
	hostCount := len(hostList)
	hhdls = make([]*ServerMonitor, hostCount)
	slaveCount := 0
	for k, url := range hostList {
		var err error
		hhdls[k], err = newServerMonitor(url)
		if *verbose {
			log.Printf("DEBUG: Creating new server: %v", hhdls[k].URL)
		}
		if err != nil {
			if *state == "dead" {
				log.Printf("INFO: Server %s is dead. Assuming old master.", hhdls[k].URL)
				master = hhdls[k]
				continue
			}
			log.Fatalln("ERROR: Error when establishing initial connection to host", err)
		}
		defer hhdls[k].Conn.Close()
		if *verbose {
			log.Printf("DEBUG: Checking if server %s is slave", hhdls[k].URL)
		}
		ss, err := dbhelper.GetSlaveStatus(hhdls[k].Conn)
		if ss.Master_Host != "" {
			log.Printf("INFO : Server %s is configured as a slave", hhdls[k].URL)
			slave = append(slave, hhdls[k])
			slaveCount++
		} else {
			log.Printf("INFO : Server %s is not a slave. Assuming master status.", hhdls[k].URL)
			master = hhdls[k]
		}
	}
	if (hostCount - slaveCount) == 0 {
		log.Fatalln("ERROR: Multi-master topologies are not yet supported.")
	}

	for _, sl := range slave {
		if *verbose {
			log.Printf("DEBUG: Checking if server %s is a slave of server %s", sl.Host, master.Host)
		}
		if dbhelper.IsSlaveof(sl.Conn, sl.Host, master.IP) == false {
			log.Fatalf("ERROR: Server %s is not a slave of declared master %s", master.URL, master.Host)
		}
	}

	// Check if preferred master is included in Host List
	ret := func() bool {
		for _, v := range hostList {
			if v == *prefMaster {
				return true
			}
		}
		return false
	}
	if ret() == false && *prefMaster != "" {
		log.Fatal("ERROR: Preferred master is not included in the hosts option")
	}

	// Do failover or switchover interactively, else start the interactive monitor.
	if *state == "dead" {
		master.failover()
	} else if *interactive == false {
		master.switchover()
	} else {
	MainLoop:
		err := termbox.Init()
		if err != nil {
			log.Fatalln("Termbox initialization error", err)
		}
		termboxChan := new_tb_chan()
		interval := time.Second
		ticker := time.NewTicker(interval * 3)
		var command string
		for exit == false {
			select {
			case <-ticker.C:
				drawHeader()
				master.refresh()
				master.drawMaster()
				vy = 6
				for k, _ := range slave {
					slave[k].refresh()
					slave[k].drawSlave(&vy)
				}
				drawFooter(&vy)
				termbox.Flush()
			case event := <-termboxChan:
				switch event.Type {
				case termbox.EventKey:
					if event.Key == termbox.KeyCtrlS {
						command = "switchover"
						exit = true
					}
					if event.Key == termbox.KeyCtrlF {
						command = "failover"
						exit = true
					}
					if event.Key == termbox.KeyCtrlQ {
						exit = true
					}
				}
				switch event.Ch {
				case 's':
					termbox.Sync()
				}
			}
		}
		termbox.Close()
		switch command {
		case "switchover":
			nmUrl, nsKey := master.switchover()
			if nmUrl != "" && nsKey >= 0 {
				if *verbose {
					log.Printf("DEBUG: Reinstancing new master: %s and new slave: %s [%d]", nmUrl, slave[nsKey].URL, nsKey)
				}
				master, err = newServerMonitor(nmUrl)
				slave[nsKey], err = newServerMonitor(slave[nsKey].URL)
			}
			log.Println("###### Restarting monitor console in 5 seconds. Press Ctrl-C to exit")
			time.Sleep(5 * time.Second)
			exit = false
			goto MainLoop
		case "failover":
			nmUrl, _ := master.failover()
			if nmUrl != "" {
				if *verbose {
					log.Printf("DEBUG: Reinstancing new master: %s", nmUrl)
				}
				master, err = newServerMonitor(nmUrl)
			}
			log.Println("###### Restarting monitor console in 5 seconds. Press Ctrl-C to exit")
			time.Sleep(5 * time.Second)
			exit = false
			goto MainLoop
		}
	}
}
func main() {
	var errLog = mysql.Logger(log.New(ioutil.Discard, "", 0))
	mysql.SetLogger(errLog)
	flag.Parse()
	if version == true {
		fmt.Println("MariaDB Replication Manager version", repmgrVersion)
		os.Exit(0)
	}
	if logfile != "" {
		var err error
		logPtr, err = os.Create(logfile)
		if err != nil {
			log.Println("ERROR: Error opening logfile, disabling for the rest of the session.")
			logfile = ""
		}
	}
	// if slaves option has been supplied, split into a slice.
	if hosts != "" {
		hostList = strings.Split(hosts, ",")
	} else {
		log.Fatal("ERROR: No hosts list specified.")
	}
	// validate users.
	if user == "" {
		log.Fatal("ERROR: No master user/pair specified.")
	}
	dbUser, dbPass = splitPair(user)
	if rpluser == "" {
		log.Fatal("ERROR: No replication user/pair specified.")
	}
	rplUser, rplPass = splitPair(rpluser)

	// Check that failover and switchover modes are set correctly.
	if switchover == "" && failover == "" {
		log.Fatal("ERROR: None of the switchover or failover modes are set.")
	}
	if switchover != "" && failover != "" {
		log.Fatal("ERROR: Both switchover and failover modes are set.")
	}
	if !contains(failOptions, failover) && failover != "" {
		log.Fatalf("ERROR: Incorrect failover mode: %s", failover)
	}
	if !contains(switchOptions, switchover) && switchover != "" {
		log.Fatalf("ERROR: Incorrect switchover mode: %s", switchover)
	}
	// Forced failover implies interactive == false
	if failover == "force" && interactive == true {
		interactive = false
	}

	if ignoreSrv != "" {
		ignoreList = strings.Split(ignoreSrv, ",")
	}

	// Create a connection to each host and build list of slaves.
	hostCount := len(hostList)
	servers = make([]*ServerMonitor, hostCount)
	slaveCount := 0
	for k, url := range hostList {
		var err error
		servers[k], err = newServerMonitor(url)
		if verbose {
			log.Printf("DEBUG: Creating new server: %v", servers[k].URL)
		}
		if err != nil {
			if driverErr, ok := err.(*mysql.MySQLError); ok {
				if driverErr.Number == 1045 {
					log.Fatalln("ERROR: Database access denied:", err.Error())
				}
			}
			if verbose {
				log.Println("ERROR:", err)
			}
			log.Printf("INFO : Server %s is dead.", servers[k].URL)
			servers[k].State = stateFailed
			continue
		}
		defer servers[k].Conn.Close()
		if verbose {
			log.Printf("DEBUG: Checking if server %s is slave", servers[k].URL)
		}

		servers[k].refresh()
		if servers[k].UsingGtid != "" {
			if verbose {
				log.Printf("DEBUG: Server %s is configured as a slave", servers[k].URL)
			}
			servers[k].State = stateSlave
			slaves = append(slaves, servers[k])
			slaveCount++
		} else {
			if verbose {
				log.Printf("DEBUG: Server %s is not a slave. Setting aside", servers[k].URL)
			}
			servers[k].State = stateUnconn
		}
	}

	// If no slaves are detected, then bail out
	if len(slaves) == 0 {
		log.Fatal("ERROR: No slaves were detected.")
	}

	// Check that all slave servers have the same master.
	for _, sl := range slaves {
		if sl.hasSiblings(slaves) == false {
			log.Fatalln("ERROR: Multi-master topologies are not yet supported.")
		}
	}

	// Check user privileges on live servers
	for _, sv := range servers {
		if sv.State != stateFailed {
			priv, err := dbhelper.GetPrivileges(sv.Conn, dbUser, sv.Host)
			if err != nil {
				log.Fatalf("ERROR: Error getting privileges for user %s on host %s: %s", dbUser, sv.Host, err)
			}
			if priv.Repl_client_priv == "N" {
				log.Fatalln("ERROR: User must have REPLICATION_CLIENT privilege")
			} else if priv.Repl_slave_priv == "N" {
				log.Fatalln("ERROR: User must have REPLICATION_SLAVE privilege")
			} else if priv.Super_priv == "N" {
				log.Fatalln("ERROR: User must have SUPER privilege")
			}
		}
	}

	// Depending if we are doing a failover or a switchover, we will find the master in the list of
	// dead hosts or unconnected hosts.
	if switchover != "" || failover == "monitor" {
		// First of all, get a server id from the slaves slice, they should be all the same
		sid := slaves[0].MasterServerID
		for k, s := range servers {
			if s.State == stateUnconn {
				if s.ServerID == sid {
					master = servers[k]
					master.State = stateMaster
					if verbose {
						log.Printf("DEBUG: Server %s was autodetected as a master", s.URL)
					}
					break
				}
			}
		}
	} else {
		// Slave master_host variable must point to dead master
		smh := slaves[0].MasterHost
		for k, s := range servers {
			if s.State == stateFailed {
				if s.Host == smh || s.IP == smh {
					master = servers[k]
					master.State = stateMaster
					if verbose {
						log.Printf("DEBUG: Server %s was autodetected as a master", s.URL)
					}
					break
				}
			}
		}
	}
	// Final check if master has been found
	if master == nil {
		if switchover != "" || failover == "monitor" {
			log.Fatalln("ERROR: Could not autodetect a master!")
		} else {
			log.Fatalln("ERROR: Could not autodetect a failed master!")
		}
	}

	for _, sl := range slaves {
		if verbose {
			log.Printf("DEBUG: Checking if server %s is a slave of server %s", sl.Host, master.Host)
		}
		if dbhelper.IsSlaveof(sl.Conn, sl.Host, master.IP) == false {
			log.Printf("WARN : Server %s is not a slave of declared master %s", master.URL, master.Host)
		}
	}

	// Check if preferred master is included in Host List
	ret := func() bool {
		for _, v := range hostList {
			if v == prefMaster {
				return true
			}
		}
		return false
	}
	if ret() == false && prefMaster != "" {
		log.Fatal("ERROR: Preferred master is not included in the hosts option")
	}

	// Do failover or switchover manually, or start the interactive monitor.

	if failover == "force" {
		masterFailover(true)
	} else if switchover != "" && interactive == false {
		masterFailover(false)
	} else {
		err := termbox.Init()
		if err != nil {
			log.Fatalln("Termbox initialization error", err)
		}
		_, termlength = termbox.Size()
		loglen := termlength - 9 - (hostCount * 3)
		tlog = NewTermLog(loglen)
		if failover != "" {
			tlog.Add("Monitor started in failover mode")
		} else {
			tlog.Add("Monitor started in switchover mode")
		}
		termboxChan := newTbChan()
		interval := time.Second
		ticker := time.NewTicker(interval * 1)
		for exit == false {
			select {
			case <-ticker.C:
				display()
			case event := <-termboxChan:
				switch event.Type {
				case termbox.EventKey:
					if event.Key == termbox.KeyCtrlS {
						if master.State != stateFailed || failCount > 0 {
							masterFailover(false)
						} else {
							logprint("ERROR: Master failed, cannot initiate switchover")
						}
					}
					if event.Key == termbox.KeyCtrlF {
						if master.State == stateFailed {
							masterFailover(true)
						} else {
							logprint("ERROR: Master not failed, cannot initiate failover")
						}
					}
					if event.Key == termbox.KeyCtrlD {
						for k, v := range servers {
							logprint("Servers", k, v)
						}
						logprint("Master", master)
						for k, v := range slaves {
							logprint("Slaves", k, v)
						}
					}
					if event.Key == termbox.KeyCtrlR {
						logprint("INFO: Setting slaves read-only")
						for _, sl := range slaves {
							dbhelper.SetReadOnly(sl.Conn, true)
						}
					}
					if event.Key == termbox.KeyCtrlW {
						logprint("INFO: Setting slaves read-write")
						for _, sl := range slaves {
							dbhelper.SetReadOnly(sl.Conn, false)
						}
					}
					if event.Key == termbox.KeyCtrlQ {
						exit = true
					}
				}
				switch event.Ch {
				case 's':
					termbox.Sync()
				}
			}
			if master.State == stateFailed && interactive == false {
				rem := (failoverTs + failtime) - time.Now().Unix()
				if (failtime == 0) || (failtime > 0 && (rem <= 0 || failoverCtr == 0)) {
					masterFailover(true)
					if failoverCtr == faillimit {
						exitMsg = "INFO : Failover limit reached. Exiting on failover completion."
						exit = true
					}
				} else if failtime > 0 && rem%10 == 0 {
					logprintf("WARN : Failover time limit enforced. Next failover available in %d seconds.", rem)
				}
			}
		}
		termbox.Close()
		if exitMsg != "" {
			log.Println(exitMsg)
		}
	}
}
func main() {
	flag.Parse()
	if *version == true {
		fmt.Println("MariaDB Replication Manager version", repmgrVersion)
	}
	// if slaves option has been supplied, split into a slice.
	if *hosts != "" {
		hostList = strings.Split(*hosts, ",")
	} else {
		log.Fatal("ERROR: No hosts list specified.")
	}
	// validate users.
	if *user == "" {
		log.Fatal("ERROR: No master user/pair specified.")
	}
	dbUser, dbPass = splitPair(*user)
	if *rpluser == "" {
		log.Fatal("ERROR: No replication user/pair specified.")
	}
	rplUser, rplPass = splitPair(*rpluser)

	// Check that failover and switchover modes are set correctly.
	if *switchover == "" && *failover == "" {
		log.Fatal("ERROR: None of the switchover or failover modes are set.")
	}
	if *switchover != "" && *failover != "" {
		log.Fatal("ERROR: Both switchover and failover modes are set.")
	}
	if !contains(failOptions, *failover) && *failover != "" {
		log.Fatalf("ERROR: Incorrect failover mode: %s", *failover)
	}
	if !contains(switchOptions, *switchover) && *switchover != "" {
		log.Fatalf("ERROR: Incorrect switchover mode: %s", *switchover)
	}

	if *ignoreSrv != "" {
		ignoreList = strings.Split(*ignoreSrv, ",")
	}

	// Create a connection to each host and build list of slaves.
	hostCount := len(hostList)
	servers = make([]*ServerMonitor, hostCount)
	slaveCount := 0
	for k, url := range hostList {
		var err error
		servers[k], err = newServerMonitor(url)
		if *verbose {
			log.Printf("DEBUG: Creating new server: %v", servers[k].URL)
		}
		if err != nil {
			log.Printf("INFO : Server %s is dead.", servers[k].URL)
			servers[k].State = STATE_FAILED
			continue
		}
		defer servers[k].Conn.Close()
		if *verbose {
			log.Printf("DEBUG: Checking if server %s is slave", servers[k].URL)
		}

		servers[k].refresh()
		if servers[k].UsingGtid != "" {
			if *verbose {
				log.Printf("DEBUG: Server %s is configured as a slave", servers[k].URL)
			}
			servers[k].State = STATE_SLAVE
			slaves = append(slaves, servers[k])
			slaveCount++
		} else {
			if *verbose {
				log.Printf("DEBUG: Server %s is not a slave. Setting aside", servers[k].URL)
			}
		}
	}

	// Check that all slave servers have the same master.
	for _, sl := range slaves {
		if sl.hasSiblings(slaves) == false {
			log.Fatalln("ERROR: Multi-master topologies are not yet supported.")
		}
	}

	// Depending if we are doing a failover or a switchover, we will find the master in the list of
	// dead hosts or unconnected hosts.
	if *switchover != "" || *failover == "monitor" {
		// First of all, get a server id from the slaves slice, they should be all the same
		sid := slaves[0].MasterServerId
		for k, s := range servers {
			if s.State == STATE_UNCONN {
				if s.ServerId == sid {
					master = servers[k]
					master.State = STATE_MASTER
					if *verbose {
						log.Printf("DEBUG: Server %s was autodetected as a master", s.URL)
					}
					break
				}
			}
		}
	} else {
		// Slave master_host variable must point to dead master
		smh := slaves[0].MasterHost
		for k, s := range servers {
			if s.State == STATE_FAILED {
				if s.Host == smh || s.IP == smh {
					master = servers[k]
					master.State = STATE_MASTER
					if *verbose {
						log.Printf("DEBUG: Server %s was autodetected as a master", s.URL)
					}
					break
				}
			}
		}
	}
	// Final check if master has been found
	if master == nil {
		log.Fatalln("ERROR: Could not autodetect a master!")
	}

	for _, sl := range slaves {
		if *verbose {
			log.Printf("DEBUG: Checking if server %s is a slave of server %s", sl.Host, master.Host)
		}
		if dbhelper.IsSlaveof(sl.Conn, sl.Host, master.IP) == false {
			log.Printf("WARN : Server %s is not a slave of declared master %s", master.URL, master.Host)
		}
	}

	// Check if preferred master is included in Host List
	ret := func() bool {
		for _, v := range hostList {
			if v == *prefMaster {
				return true
			}
		}
		return false
	}
	if ret() == false && *prefMaster != "" {
		log.Fatal("ERROR: Preferred master is not included in the hosts option")
	}

	// Do failover or switchover manually, or start the interactive monitor.

	if *failover == "force" {
		master.failover()
	} else if *switchover != "" && *interactive == false {
		master.switchover()
	} else {
	MainLoop:
		err := termbox.Init()
		if err != nil {
			log.Fatalln("Termbox initialization error", err)
		}
		tlog = NewTermLog(20)
		if *failover != "" {
			tlog.Add("Monitor started in failover mode")
		} else {
			tlog.Add("Monitor started in switchover mode")
		}
		termboxChan := new_tb_chan()
		interval := time.Second
		ticker := time.NewTicker(interval * 3)
		var command string
		for exit == false {
			select {
			case <-ticker.C:
				display()
			case event := <-termboxChan:
				switch event.Type {
				case termbox.EventKey:
					if event.Key == termbox.KeyCtrlS {
						nmUrl, nsKey := master.switchover()
						if nmUrl != "" && nsKey >= 0 {
							if *verbose {
								logprintf("DEBUG: Reinstancing new master: %s and new slave: %s [%d]", nmUrl, slaves[nsKey].URL, nsKey)
							}
							master, err = newServerMonitor(nmUrl)
							slaves[nsKey], err = newServerMonitor(slaves[nsKey].URL)
						}
					}
					if event.Key == termbox.KeyCtrlF {
						command = "failover"
						exit = true
					}
					if event.Key == termbox.KeyCtrlQ {
						exit = true
					}
				}
				switch event.Ch {
				case 's':
					termbox.Sync()
				}
			}
			if master.State == STATE_FAILED && *interactive == false {
				command = "failover"
				exit = true
			}
		}
		switch command {
		case "failover":
			termbox.Close()
			nmUrl, nmKey := master.failover()
			if nmUrl != "" {
				if *verbose {
					log.Printf("DEBUG: Reinstancing new master: %s", nmUrl)
				}
				master, err = newServerMonitor(nmUrl)
				// Remove new master from slave slice
				slaves = append(slaves[:nmKey], slaves[nmKey+1:]...)
			}
			log.Println("###### Restarting monitor console in 5 seconds. Press Ctrl-C to exit")
			time.Sleep(5 * time.Second)
			exit = false
			goto MainLoop
		}
		termbox.Close()
	}
}