func (a *autoScaleConfig) removeNode(chosenNode *cluster.Node) error { _, hasIaas := chosenNode.Metadata["iaas"] if !hasIaas { return fmt.Errorf("no IaaS information in node (%s) metadata: %#v", chosenNode.Address, chosenNode.Metadata) } err := a.provisioner.getCluster().Unregister(chosenNode.Address) if err != nil { return fmt.Errorf("unable to unregister node (%s) for removal: %s", chosenNode.Address, err) } buf := safe.NewBuffer(nil) err = a.provisioner.moveContainers(urlToHost(chosenNode.Address), "", buf) if err != nil { a.provisioner.getCluster().Register(chosenNode.Address, chosenNode.Metadata) return fmt.Errorf("unable to move containers from node (%s): %s - log: %s", chosenNode.Address, err, buf.String()) } m, err := iaas.FindMachineByIdOrAddress(chosenNode.Metadata["iaas-id"], urlToHost(chosenNode.Address)) if err != nil { a.logError("unable to find machine for removal in iaas: %s", err) return nil } err = m.Destroy() if err != nil { a.logError("unable to destroy machine in IaaS: %s", err) } return nil }
// removeNodeHandler calls scheduler.Unregister to unregistering a node into it. func removeNodeHandler(w http.ResponseWriter, r *http.Request, t auth.Token) error { params, err := unmarshal(r.Body) if err != nil { return err } address, _ := params["address"] if address == "" { return fmt.Errorf("Node address is required.") } node, err := mainDockerProvisioner.Cluster().GetNode(address) if err != nil { return err } allowedNodeRemove := permission.Check(t, permission.PermNodeDelete, permission.Context(permission.CtxPool, node.Metadata["pool"]), ) if !allowedNodeRemove { return permission.ErrUnauthorized } removeIaaS, _ := strconv.ParseBool(params["remove_iaas"]) if removeIaaS { allowedIaasRemove := permission.Check(t, permission.PermMachineDelete, permission.Context(permission.CtxIaaS, node.Metadata["iaas"]), ) if !allowedIaasRemove { return permission.ErrUnauthorized } } node.CreationStatus = cluster.NodeCreationStatusDisabled _, err = mainDockerProvisioner.Cluster().UpdateNode(node) if err != nil { return err } noRebalance, err := strconv.ParseBool(r.URL.Query().Get("no-rebalance")) if !noRebalance { err = mainDockerProvisioner.rebalanceContainersByHost(urlToHost(address), w) if err != nil { return err } } err = mainDockerProvisioner.Cluster().Unregister(address) if err != nil { return err } if removeIaaS { var m iaas.Machine m, err = iaas.FindMachineByIdOrAddress(node.Metadata["iaas-id"], urlToHost(address)) if err != nil && err != mgo.ErrNotFound { return err } return m.Destroy() } return nil }
func (a *autoScaleConfig) removeMultipleNodes(event *autoScaleEvent, chosenNodes []cluster.Node) error { nodeAddrs := make([]string, len(chosenNodes)) nodeHosts := make([]string, len(chosenNodes)) for i, node := range chosenNodes { _, hasIaas := node.Metadata["iaas"] if !hasIaas { return fmt.Errorf("no IaaS information in node (%s) metadata: %#v", node.Address, node.Metadata) } nodeAddrs[i] = node.Address nodeHosts[i] = urlToHost(node.Address) } err := a.provisioner.Cluster().UnregisterNodes(nodeAddrs...) if err != nil { return fmt.Errorf("unable to unregister nodes (%s) for removal: %s", strings.Join(nodeAddrs, ", "), err) } buf := safe.NewBuffer(nil) err = a.provisioner.moveContainersFromHosts(nodeHosts, "", buf) if err != nil { for _, node := range chosenNodes { a.provisioner.Cluster().Register(node) } return fmt.Errorf("unable to move containers from nodes (%s): %s - log: %s", strings.Join(nodeAddrs, ", "), err, buf.String()) } wg := sync.WaitGroup{} for i := range chosenNodes { wg.Add(1) go func(i int) { defer wg.Done() node := chosenNodes[i] m, err := iaas.FindMachineByIdOrAddress(node.Metadata["iaas-id"], urlToHost(node.Address)) if err != nil { event.logMsg("unable to find machine for removal in iaas: %s", err) return } err = m.Destroy() if err != nil { event.logMsg("unable to destroy machine in IaaS: %s", err) } }(i) } wg.Wait() return nil }
// removeNodeHandler calls scheduler.Unregister to unregistering a node into it. func removeNodeHandler(w http.ResponseWriter, r *http.Request, t auth.Token) error { params, err := unmarshal(r.Body) if err != nil { return err } address, _ := params["address"] if address == "" { return fmt.Errorf("Node address is required.") } nodes, err := mainDockerProvisioner.Cluster().UnfilteredNodes() if err != nil { return err } var node *cluster.Node for i := range nodes { if nodes[i].Address == address { node = &nodes[i] break } } if node == nil { return fmt.Errorf("node with address %q not found in cluster", address) } err = mainDockerProvisioner.Cluster().Unregister(address) if err != nil { return err } removeIaaS, _ := strconv.ParseBool(params["remove_iaas"]) if removeIaaS { var m iaas.Machine m, err = iaas.FindMachineByIdOrAddress(node.Metadata["iaas-id"], urlToHost(address)) if err != nil && err != mgo.ErrNotFound { return err } return m.Destroy() } noRebalance, err := strconv.ParseBool(r.URL.Query().Get("no-rebalance")) if !noRebalance { return mainDockerProvisioner.rebalanceContainersByHost(urlToHost(address), w) } return nil }
func (h *nodeHealer) healNode(node *cluster.Node) (cluster.Node, error) { emptyNode := cluster.Node{} failingAddr := node.Address nodeMetadata := node.CleanMetadata() failingHost := urlToHost(failingAddr) failures := node.FailureCount() machine, err := iaas.CreateMachineForIaaS(nodeMetadata["iaas"], nodeMetadata) if err != nil { node.ResetFailures() return emptyNode, fmt.Errorf("Can't auto-heal after %d failures for node %s: error creating new machine: %s", failures, failingHost, err.Error()) } err = h.provisioner.getCluster().Unregister(failingAddr) if err != nil { machine.Destroy() return emptyNode, fmt.Errorf("Can't auto-heal after %d failures for node %s: error unregistering old node: %s", failures, failingHost, err.Error()) } newAddr := machine.FormatNodeAddress() log.Debugf("New machine created during healing process: %s - Waiting for docker to start...", newAddr) createdNode, err := h.provisioner.getCluster().WaitAndRegister(newAddr, nodeMetadata, h.waitTimeNewMachine) if err != nil { node.ResetFailures() h.provisioner.getCluster().Register(failingAddr, nodeMetadata) machine.Destroy() return emptyNode, fmt.Errorf("Can't auto-heal after %d failures for node %s: error registering new node: %s", failures, failingHost, err.Error()) } var buf bytes.Buffer err = h.provisioner.moveContainers(failingHost, "", &buf) if err != nil { log.Errorf("Unable to move containers, skipping containers healing %q -> %q: %s: %s", failingHost, machine.Address, err.Error(), buf.String()) } failingMachine, err := iaas.FindMachineByIdOrAddress(node.Metadata["iaas-id"], failingHost) if err != nil { return createdNode, fmt.Errorf("Unable to find failing machine %s in IaaS: %s", failingHost, err.Error()) } err = failingMachine.Destroy() if err != nil { return createdNode, fmt.Errorf("Unable to destroy machine %s from IaaS: %s", failingHost, err.Error()) } log.Debugf("Done auto-healing node %q, node %q created in its place.", failingHost, machine.Address) return createdNode, nil }
func (h *NodeHealer) healNode(node *cluster.Node) (cluster.Node, error) { emptyNode := cluster.Node{} failingAddr := node.Address nodeMetadata := node.CleanMetadata() failingHost := net.URLToHost(failingAddr) failures := node.FailureCount() machine, err := iaas.CreateMachineForIaaS(nodeMetadata["iaas"], nodeMetadata) if err != nil { node.ResetFailures() return emptyNode, fmt.Errorf("Can't auto-heal after %d failures for node %s: error creating new machine: %s", failures, failingHost, err.Error()) } err = h.provisioner.Cluster().Unregister(failingAddr) if err != nil { machine.Destroy() return emptyNode, fmt.Errorf("Can't auto-heal after %d failures for node %s: error unregistering old node: %s", failures, failingHost, err.Error()) } newAddr := machine.FormatNodeAddress() log.Debugf("New machine created during healing process: %s - Waiting for docker to start...", newAddr) createdNode := cluster.Node{ Address: newAddr, Metadata: nodeMetadata, CreationStatus: cluster.NodeCreationStatusPending, } err = h.provisioner.Cluster().Register(createdNode) if err != nil { node.ResetFailures() h.provisioner.Cluster().Register(cluster.Node{Address: failingAddr, Metadata: nodeMetadata}) machine.Destroy() return emptyNode, fmt.Errorf("Can't auto-heal after %d failures for node %s: error registering new node: %s", failures, failingHost, err.Error()) } q, err := queue.Queue() if err != nil { return emptyNode, err } jobParams := monsterqueue.JobParams{ "endpoint": createdNode.Address, "machine": machine.Id, "metadata": createdNode.Metadata, } job, err := q.EnqueueWait(bs.QueueTaskName, jobParams, h.waitTimeNewMachine) if err == nil { _, err = job.Result() } if err != nil { node.ResetFailures() h.provisioner.Cluster().Register(cluster.Node{Address: failingAddr, Metadata: nodeMetadata}) return emptyNode, fmt.Errorf("Can't auto-heal after %d failures for node %s: error waiting for the bs task: %s", failures, failingHost, err.Error()) } var buf bytes.Buffer err = h.provisioner.MoveContainers(failingHost, "", &buf) if err != nil { log.Errorf("Unable to move containers, skipping containers healing %q -> %q: %s: %s", failingHost, machine.Address, err.Error(), buf.String()) } failingMachine, err := iaas.FindMachineByIdOrAddress(node.Metadata["iaas-id"], failingHost) if err != nil { return createdNode, fmt.Errorf("Unable to find failing machine %s in IaaS: %s", failingHost, err.Error()) } err = failingMachine.Destroy() if err != nil { return createdNode, fmt.Errorf("Unable to destroy machine %s from IaaS: %s", failingHost, err.Error()) } log.Debugf("Done auto-healing node %q, node %q created in its place.", failingHost, machine.Address) return createdNode, nil }
func (h *NodeHealer) healNode(node provision.Node) (*provision.NodeSpec, error) { failingAddr := node.Address() // Copy metadata to ensure underlying data structure is not modified. newNodeMetadata := map[string]string{} for k, v := range node.Metadata() { newNodeMetadata[k] = v } failingHost := net.URLToHost(failingAddr) healthNode, isHealthNode := node.(provision.NodeHealthChecker) failures := 0 if isHealthNode { failures = healthNode.FailureCount() } machine, err := iaas.CreateMachineForIaaS(newNodeMetadata["iaas"], newNodeMetadata) if err != nil { if isHealthNode { healthNode.ResetFailures() } return nil, errors.Wrapf(err, "Can't auto-heal after %d failures for node %s: error creating new machine", failures, failingHost) } err = node.Provisioner().UpdateNode(provision.UpdateNodeOptions{ Address: failingAddr, Disable: true, }) if err != nil { machine.Destroy() return nil, errors.Wrapf(err, "Can't auto-heal after %d failures for node %s: error unregistering old node", failures, failingHost) } newAddr := machine.FormatNodeAddress() log.Debugf("New machine created during healing process: %s - Waiting for docker to start...", newAddr) createOpts := provision.AddNodeOptions{ Address: newAddr, Metadata: newNodeMetadata, WaitTO: h.waitTimeNewMachine, CaCert: machine.CaCert, ClientCert: machine.ClientCert, ClientKey: machine.ClientKey, } err = node.Provisioner().AddNode(createOpts) if err != nil { if isHealthNode { healthNode.ResetFailures() } node.Provisioner().UpdateNode(provision.UpdateNodeOptions{Address: failingAddr, Enable: true}) machine.Destroy() return nil, errors.Wrapf(err, "Can't auto-heal after %d failures for node %s: error registering new node", failures, failingHost) } nodeSpec := provision.NodeToSpec(node) nodeSpec.Address = newAddr nodeSpec.Metadata = newNodeMetadata var buf bytes.Buffer err = node.Provisioner().RemoveNode(provision.RemoveNodeOptions{ Address: failingAddr, Rebalance: true, Writer: &buf, }) if err != nil { log.Errorf("Unable to move containers, skipping containers healing %q -> %q: %s: %s", failingHost, machine.Address, err, buf.String()) } failingMachine, err := iaas.FindMachineByIdOrAddress(node.Metadata()["iaas-id"], failingHost) if err != nil { return &nodeSpec, errors.Wrapf(err, "Unable to find failing machine %s in IaaS", failingHost) } err = failingMachine.Destroy() if err != nil { return &nodeSpec, errors.Wrapf(err, "Unable to destroy machine %s from IaaS", failingHost) } log.Debugf("Done auto-healing node %q, node %q created in its place.", failingHost, machine.Address) return &nodeSpec, nil }
// title: remove node // path: /{provisioner}/node/{address} // method: DELETE // responses: // 200: Ok // 401: Unauthorized // 404: Not found func removeNodeHandler(w http.ResponseWriter, r *http.Request, t auth.Token) (err error) { r.ParseForm() address := r.URL.Query().Get(":address") if address == "" { return errors.Errorf("Node address is required.") } prov, node, err := provision.FindNode(address) if err != nil { if err == provision.ErrNodeNotFound { return &tsuruErrors.HTTP{ Code: http.StatusNotFound, Message: err.Error(), } } return err } nodeProv := prov.(provision.NodeProvisioner) pool := node.Pool() allowedNodeRemove := permission.Check(t, permission.PermNodeDelete, permission.Context(permission.CtxPool, pool), ) if !allowedNodeRemove { return permission.ErrUnauthorized } removeIaaS, _ := strconv.ParseBool(r.URL.Query().Get("remove-iaas")) if removeIaaS { allowedIaasRemove := permission.Check(t, permission.PermMachineDelete, permission.Context(permission.CtxIaaS, node.Metadata()["iaas"]), ) if !allowedIaasRemove { return permission.ErrUnauthorized } } evt, err := event.New(&event.Opts{ Target: event.Target{Type: event.TargetTypeNode, Value: node.Address()}, Kind: permission.PermNodeDelete, Owner: t, CustomData: event.FormToCustomData(r.Form), Allowed: event.Allowed(permission.PermPoolReadEvents, permission.Context(permission.CtxPool, pool)), }) if err != nil { return err } defer func() { evt.Done(err) }() noRebalance, _ := strconv.ParseBool(r.URL.Query().Get("no-rebalance")) err = nodeProv.RemoveNode(provision.RemoveNodeOptions{ Address: address, Rebalance: !noRebalance, Writer: w, }) if err != nil { return err } if removeIaaS { var m iaas.Machine m, err = iaas.FindMachineByIdOrAddress(node.Metadata()["iaas-id"], net.URLToHost(address)) if err != nil && err != mgo.ErrNotFound { return nil } return m.Destroy() } return nil }