示例#1
0
// removeNodeHandler calls scheduler.Unregister to unregistering a node into it.
func removeNodeHandler(w http.ResponseWriter, r *http.Request, t auth.Token) error {
	params, err := unmarshal(r.Body)
	if err != nil {
		return err
	}
	address, _ := params["address"]
	if address == "" {
		return fmt.Errorf("Node address is required.")
	}
	err = dockerCluster().Unregister(address)
	if err != nil {
		return err
	}
	removeIaaS, _ := strconv.ParseBool(params["remove_iaas"])
	if removeIaaS {
		m, err := iaas.FindMachineByAddress(urlToHost(address))
		if err != nil && err != mgo.ErrNotFound {
			return err
		}
		return m.Destroy()
	}
	return nil
}
func (h *Healer) HandleError(node cluster.Node) time.Duration {
	defaultWait := 1 * time.Minute
	failures := node.FailureCount()
	if failures < 5 {
		return defaultWait
	}
	failingAddr := node.Address
	failingHost := urlToHost(failingAddr)
	containers, err := listContainersByHost(failingHost)
	if err != nil {
		log.Errorf("Error in cluster healer, trying to list containers: %s", err.Error())
		return defaultWait
	}
	// Empty host let's just try again in the future
	if len(containers) == 0 {
		return defaultWait
	}
	iaasName, hasIaas := node.Metadata["iaas"]
	if !hasIaas {
		log.Errorf("Can't auto-heal after %d failures for node %s: no IaaS information.", failures, failingHost)
		return defaultWait
	}
	machine, err := iaas.CreateMachineForIaaS(iaasName, node.Metadata)
	if err != nil {
		log.Errorf("Can't auto-heal after %d failures for node %s: error creating new machine: %s", failures, failingHost, err.Error())
		return defaultWait
	}
	newAddr, err := machine.FormatNodeAddress()
	if err != nil {
		log.Errorf("Can't auto-heal after %d failures for node %s: error formatting address: %s", failures, failingHost, err.Error())
		machine.Destroy()
		return defaultWait
	}
	cluster := dockerCluster()
	err = cluster.Unregister(failingAddr)
	if err != nil {
		log.Errorf("Can't auto-heal after %d failures for node %s: error unregistering old node: %s", failures, failingHost, err.Error())
		return defaultWait
	}
	err = cluster.WaitAndRegister(newAddr, node.Metadata, 2*time.Minute)
	if err != nil {
		log.Errorf("Can't auto-heal after %d failures for node %s: error registering new node: %s", failures, failingHost, err.Error())
		machine.Destroy()
		return defaultWait
	}
	var buf bytes.Buffer
	encoder := json.NewEncoder(&buf)
	err = moveContainers(failingHost, machine.Address, encoder)
	if err != nil {
		log.Errorf("Unable to move containers from: %s to: %s - %s", failingHost, machine.Address, err.Error())
		return 0
	}
	failingMachine, err := iaas.FindMachineByAddress(failingHost)
	if err != nil {
		log.Errorf("Unable to find failing machine %s in IaaS", failingHost)
		return 0
	}
	err = failingMachine.Destroy()
	if err != nil {
		log.Errorf("Unable to find destroy machine %s from IaaS", failingHost)
	}
	return 0
}