Example #1
0
func (l *localnet) Healthy() bool {
	_, err := util.EnsureNetworkHealthy(l.Servers(), l.NetworkPassword)
	return err == nil
}
Example #2
0
func main() {
	defer glog.Flush()
	glog.CopyStandardLogTo("INFO")
	flag.Parse()
	if strings.TrimSpace(*network) == "" {
		log.Fatalf("You need to specify -network")
	}

	binaryHash := fileHash(*binaryPath)
	glog.Infof("binaryHash = %s", binaryHash)

	servers := util.ResolveNetwork(*network)
	log.Printf("Checking network health\n")
	if statuses, err := util.EnsureNetworkHealthy(servers, *networkPassword); err != nil {
		log.Fatalf("Aborting upgrade for safety: %v", err)
	} else {
		if allNodesUpdated(statuses, binaryHash) {
			log.Printf("All nodes are already running the requested version.\n")
			return
		}
	}

	log.Printf("Restarting %q nodes until their binary hash is %s\n", *network, binaryHash)

	for rtry := 0; rtry < 5; rtry++ {
		for _, server := range servers {
			var statuses map[string]util.ServerStatus
			var err error

			started := time.Now()
			for time.Since(started) < *networkHealthTimeout {
				statuses, err = util.EnsureNetworkHealthy(servers, *networkPassword)
				if err != nil {
					log.Printf("Network is not healthy: %v\n", err)
					time.Sleep(1 * time.Second)
					continue
				}
				log.Printf("Network became healthy.\n")
				break
			}
			if err != nil {
				log.Fatalf("Network did not become healthy within %v, aborting. (reason: %v)\n", *networkHealthTimeout, err)
			}

			if statuses[server].ExecutableHash == binaryHash {
				if allNodesUpdated(statuses, binaryHash) {
					log.Printf("All done!\n")
					return
				}
				log.Printf("Skipping %q which is already running the requested version\n", server)
				continue
			}

			lastApplied := statuses[server].AppliedIndex

			log.Printf("Killing node %q\n", server)
			if err := quit(server); err != nil {
				log.Printf("%v\n", err)
			}

			for htry := 0; htry < 60; htry++ {
				time.Sleep(1 * time.Second)
				current, err := util.GetServerStatus(server, *networkPassword)
				if err != nil {
					log.Printf("Node unhealthy: %v\n", err)
					continue
				}
				if current.ExecutableHash != binaryHash {
					log.Printf("Node %q came up with hash %s instead of %s?!\n",
						server, current.ExecutableHash, binaryHash)
					break
				}
				if current.AppliedIndex < lastApplied {
					log.Printf("Node %q has not yet applied all messages it saw before, waiting (got %d, want ≥ %d)\n",
						server, current.AppliedIndex, lastApplied)
					continue
				}
				log.Printf("Node %q was upgraded and is healthy again\n", server)
				break
			}
		}
	}

	log.Printf("All done!\n")
}