func (l *localnet) Healthy() bool { _, err := util.EnsureNetworkHealthy(l.Servers(), l.NetworkPassword) return err == nil }
func main() { defer glog.Flush() glog.CopyStandardLogTo("INFO") flag.Parse() if strings.TrimSpace(*network) == "" { log.Fatalf("You need to specify -network") } binaryHash := fileHash(*binaryPath) glog.Infof("binaryHash = %s", binaryHash) servers := util.ResolveNetwork(*network) log.Printf("Checking network health\n") if statuses, err := util.EnsureNetworkHealthy(servers, *networkPassword); err != nil { log.Fatalf("Aborting upgrade for safety: %v", err) } else { if allNodesUpdated(statuses, binaryHash) { log.Printf("All nodes are already running the requested version.\n") return } } log.Printf("Restarting %q nodes until their binary hash is %s\n", *network, binaryHash) for rtry := 0; rtry < 5; rtry++ { for _, server := range servers { var statuses map[string]util.ServerStatus var err error started := time.Now() for time.Since(started) < *networkHealthTimeout { statuses, err = util.EnsureNetworkHealthy(servers, *networkPassword) if err != nil { log.Printf("Network is not healthy: %v\n", err) time.Sleep(1 * time.Second) continue } log.Printf("Network became healthy.\n") break } if err != nil { log.Fatalf("Network did not become healthy within %v, aborting. (reason: %v)\n", *networkHealthTimeout, err) } if statuses[server].ExecutableHash == binaryHash { if allNodesUpdated(statuses, binaryHash) { log.Printf("All done!\n") return } log.Printf("Skipping %q which is already running the requested version\n", server) continue } lastApplied := statuses[server].AppliedIndex log.Printf("Killing node %q\n", server) if err := quit(server); err != nil { log.Printf("%v\n", err) } for htry := 0; htry < 60; htry++ { time.Sleep(1 * time.Second) current, err := util.GetServerStatus(server, *networkPassword) if err != nil { log.Printf("Node unhealthy: %v\n", err) continue } if current.ExecutableHash != binaryHash { log.Printf("Node %q came up with hash %s instead of %s?!\n", server, current.ExecutableHash, binaryHash) break } if current.AppliedIndex < lastApplied { log.Printf("Node %q has not yet applied all messages it saw before, waiting (got %d, want ≥ %d)\n", server, current.AppliedIndex, lastApplied) continue } log.Printf("Node %q was upgraded and is healthy again\n", server) break } } } log.Printf("All done!\n") }