func (s *S) TestMachinesList(c *check.C) { iaas.RegisterIaasProvider("test-iaas", newTestIaaS) _, err := iaas.CreateMachineForIaaS("test-iaas", map[string]string{"id": "myid1"}) defer (&iaas.Machine{Id: "myid1"}).Destroy() c.Assert(err, check.IsNil) _, err = iaas.CreateMachineForIaaS("test-iaas", map[string]string{"id": "myid2"}) defer (&iaas.Machine{Id: "myid2"}).Destroy() c.Assert(err, check.IsNil) recorder := httptest.NewRecorder() request, err := http.NewRequest("GET", "/iaas/machines", nil) c.Assert(err, check.IsNil) request.Header.Set("Authorization", "bearer "+s.admintoken.GetValue()) m := RunServer(true) m.ServeHTTP(recorder, request) c.Assert(recorder.Code, check.Equals, http.StatusOK) var machines []iaas.Machine err = json.NewDecoder(recorder.Body).Decode(&machines) c.Assert(err, check.IsNil) c.Assert(machines[0].Id, check.Equals, "myid1") c.Assert(machines[0].Address, check.Equals, "myid1.somewhere.com") c.Assert(machines[0].CreationParams, check.DeepEquals, map[string]string{ "id": "myid1", "iaas": "test-iaas", "iaas-id": "myid1", }) c.Assert(machines[1].Id, check.Equals, "myid2") c.Assert(machines[1].Address, check.Equals, "myid2.somewhere.com") c.Assert(machines[1].CreationParams, check.DeepEquals, map[string]string{ "id": "myid2", "iaas": "test-iaas", "iaas-id": "myid2", }) }
func (s *S) TestHealerHealNodeWaitAndRegisterError(c *check.C) { iaas.RegisterIaasProvider("my-healer-iaas", iaasTesting.NewHealerIaaSConstructor("addr1", nil)) _, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{}) c.Assert(err, check.IsNil) iaas.RegisterIaasProvider("my-healer-iaas", iaasTesting.NewHealerIaaSConstructor("addr2", nil)) config.Set("iaas:node-protocol", "http") config.Set("iaas:node-port", 2) defer config.Unset("iaas:node-protocol") defer config.Unset("iaas:node-port") p := provisiontest.ProvisionerInstance err = p.AddNode(provision.AddNodeOptions{ Address: "http://addr1:1", Metadata: map[string]string{"iaas": "my-healer-iaas"}, }) c.Assert(err, check.IsNil) p.PrepareFailure("AddNode", fmt.Errorf("add node error")) healer := newNodeHealer(nodeHealerArgs{ WaitTimeNewMachine: time.Second, }) healer.Shutdown() nodes, err := p.ListNodes(nil) c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(nodes[0].Address(), check.Equals, "http://addr1:1") created, err := healer.healNode(nodes[0]) c.Assert(err, check.ErrorMatches, ".*error registering new node: add node error.*") c.Assert(created, check.IsNil) nodes, err = p.ListNodes(nil) c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(nodes[0].Address(), check.Equals, "http://addr1:1") c.Assert(nodes[0].Status(), check.Equals, "enabled") }
func (s *S) TestHealerHandleErrorThrottled(c *check.C) { factory, iaasInst := iaasTesting.NewHealerIaaSConstructorWithInst("addr1") iaas.RegisterIaasProvider("my-healer-iaas", factory) _, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{}) c.Assert(err, check.IsNil) iaasInst.Addr = "addr2" config.Set("iaas:node-protocol", "http") config.Set("iaas:node-port", 2) defer config.Unset("iaas:node-protocol") defer config.Unset("iaas:node-port") p := provisiontest.ProvisionerInstance err = p.AddNode(provision.AddNodeOptions{ Address: "http://addr1:1", Metadata: map[string]string{"iaas": "my-healer-iaas"}, }) c.Assert(err, check.IsNil) node, err := p.GetNode("http://addr1:1") c.Assert(err, check.IsNil) healer := newNodeHealer(nodeHealerArgs{ FailuresBeforeHealing: 1, WaitTimeNewMachine: time.Minute, }) healer.Shutdown() healer.started = time.Now().Add(-3 * time.Second) conf := healerConfig() err = conf.SaveBase(NodeHealerConfig{Enabled: boolPtr(true), MaxUnresponsiveTime: intPtr(1)}) c.Assert(err, check.IsNil) err = healer.UpdateNodeData(node, []provision.NodeCheckResult{}) c.Assert(err, check.IsNil) time.Sleep(1200 * time.Millisecond) nodes, err := p.ListNodes(nil) c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(nodes[0].Address(), check.Equals, "http://addr1:1") machines, err := iaas.ListMachines() c.Assert(err, check.IsNil) c.Assert(machines, check.HasLen, 1) c.Assert(machines[0].Address, check.Equals, "addr1") for i := 0; i < 3; i++ { var evt *event.Event evt, err = event.NewInternal(&event.Opts{ Target: event.Target{Type: "node", Value: nodes[0].Address()}, InternalKind: "healer", Allowed: event.Allowed(permission.PermPoolReadEvents), }) c.Assert(err, check.IsNil) err = evt.Done(nil) c.Assert(err, check.IsNil) } err = healer.tryHealingNode(nodes[0], "myreason", nil) c.Assert(err, check.ErrorMatches, "Error trying to insert node healing event, healing aborted: event throttled, limit for healer on node \".*?\" is 3 every 5m0s") nodes, err = p.ListNodes(nil) c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(nodes[0].Address(), check.Equals, "http://addr1:1") }
func (s *S) TestHealerHealNodeWaitAndRegisterError(c *check.C) { defer func() { machines, _ := iaas.ListMachines() for _, m := range machines { m.Destroy() } }() iaas.RegisterIaasProvider("my-healer-iaas", newHealerIaaSConstructor("127.0.0.1", nil)) _, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{}) c.Assert(err, check.IsNil) iaas.RegisterIaasProvider("my-healer-iaas", newHealerIaaSConstructor("localhost", nil)) node1, err := testing.NewServer("127.0.0.1:0", nil, nil) c.Assert(err, check.IsNil) node2, err := testing.NewServer("127.0.0.1:0", nil, nil) c.Assert(err, check.IsNil) node2.PrepareFailure("ping-failure", "/_ping") config.Set("iaas:node-protocol", "http") config.Set("iaas:node-port", urlPort(node2.URL())) defer config.Unset("iaas:node-protocol") defer config.Unset("iaas:node-port") cluster, err := cluster.New(nil, &cluster.MapStorage{}, cluster.Node{Address: node1.URL()}, ) c.Assert(err, check.IsNil) node1.PrepareFailure("pingErr", "/_ping") cluster.StartActiveMonitoring(100 * time.Millisecond) time.Sleep(300 * time.Millisecond) cluster.StopActiveMonitoring() var p dockerProvisioner err = p.Initialize() c.Assert(err, check.IsNil) p.cluster = cluster healer := nodeHealer{ locks: make(map[string]*sync.Mutex), provisioner: &p, disabledTime: 0, failuresBeforeHealing: 1, waitTimeNewMachine: 1 * time.Second, } nodes, err := p.getCluster().UnfilteredNodes() c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(urlPort(nodes[0].Address), check.Equals, urlPort(node1.URL())) c.Assert(urlToHost(nodes[0].Address), check.Equals, "127.0.0.1") c.Assert(nodes[0].FailureCount() > 0, check.Equals, true) nodes[0].Metadata["iaas"] = "my-healer-iaas" created, err := healer.healNode(&nodes[0]) c.Assert(err, check.ErrorMatches, ".*error registering new node.*") c.Assert(created.Address, check.Equals, "") c.Assert(nodes[0].FailureCount(), check.Equals, 0) nodes, err = p.getCluster().UnfilteredNodes() c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(urlPort(nodes[0].Address), check.Equals, urlPort(node1.URL())) c.Assert(urlToHost(nodes[0].Address), check.Equals, "127.0.0.1") }
func (s *S) TestMachinesDestroy(c *check.C) { iaas.RegisterIaasProvider("test-iaas", newTestIaaS) _, err := iaas.CreateMachineForIaaS("test-iaas", map[string]string{"id": "myid1"}) c.Assert(err, check.IsNil) recorder := httptest.NewRecorder() request, err := http.NewRequest("DELETE", "/iaas/machines/myid1", nil) c.Assert(err, check.IsNil) request.Header.Set("Authorization", "bearer "+s.admintoken.GetValue()) m := RunServer(true) m.ServeHTTP(recorder, request) c.Assert(recorder.Code, check.Equals, http.StatusOK) }
func (s *S) TestHealerHealNodeDestroyError(c *check.C) { factory, iaasInst := iaasTesting.NewHealerIaaSConstructorWithInst("addr1") iaasInst.DelErr = fmt.Errorf("my destroy error") iaas.RegisterIaasProvider("my-healer-iaas", factory) _, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{}) c.Assert(err, check.IsNil) iaasInst.Addr = "addr2" config.Set("iaas:node-protocol", "http") config.Set("iaas:node-port", 2) defer config.Unset("iaas:node-protocol") defer config.Unset("iaas:node-port") p := provisiontest.ProvisionerInstance err = p.AddNode(provision.AddNodeOptions{ Address: "http://addr1:1", Metadata: map[string]string{"iaas": "my-healer-iaas"}, }) c.Assert(err, check.IsNil) healer := newNodeHealer(nodeHealerArgs{ WaitTimeNewMachine: time.Minute, }) healer.Shutdown() nodes, err := p.ListNodes(nil) c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(nodes[0].Address(), check.Equals, "http://addr1:1") machines, err := iaas.ListMachines() c.Assert(err, check.IsNil) c.Assert(machines, check.HasLen, 1) c.Assert(machines[0].Address, check.Equals, "addr1") buf := bytes.Buffer{} log.SetLogger(log.NewWriterLogger(&buf, false)) defer log.SetLogger(nil) created, err := healer.healNode(nodes[0]) c.Assert(err, check.IsNil) c.Assert(created.Address, check.Equals, "http://addr2:2") c.Assert(buf.String(), check.Matches, "(?s).*my destroy error.*") nodes, err = p.ListNodes(nil) c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(nodes[0].Address(), check.Equals, "http://addr2:2") machines, err = iaas.ListMachines() c.Assert(err, check.IsNil) c.Assert(machines, check.HasLen, 1) c.Assert(machines[0].Address, check.Equals, "addr2") }
func (a *autoScaleConfig) addNode(event *autoScaleEvent, modelNodes []*cluster.Node) (*cluster.Node, error) { metadata, err := chooseMetadataFromNodes(modelNodes) if err != nil { return nil, err } _, hasIaas := metadata["iaas"] if !hasIaas { return nil, fmt.Errorf("no IaaS information in nodes metadata: %#v", metadata) } machine, err := iaas.CreateMachineForIaaS(metadata["iaas"], metadata) if err != nil { return nil, fmt.Errorf("unable to create machine: %s", err.Error()) } newAddr := machine.FormatNodeAddress() event.logMsg("new machine created: %s - Waiting for docker to start...", newAddr) createdNode := cluster.Node{ Address: newAddr, Metadata: metadata, CreationStatus: cluster.NodeCreationStatusPending, } err = a.provisioner.Cluster().Register(createdNode) if err != nil { machine.Destroy() return nil, fmt.Errorf("error registering new node %s: %s", newAddr, err.Error()) } q, err := queue.Queue() if err == nil { jobParams := monsterqueue.JobParams{ "endpoint": createdNode.Address, "machine": machine.Id, "metadata": createdNode.Metadata, } var job monsterqueue.Job job, err = q.EnqueueWait(bs.QueueTaskName, jobParams, a.WaitTimeNewMachine) if err == nil { _, err = job.Result() } } if err != nil { machine.Destroy() a.provisioner.Cluster().Unregister(newAddr) return nil, fmt.Errorf("error running bs task: %s", err) } event.logMsg("new machine created: %s - started!", newAddr) return &createdNode, nil }
func (s *S) TestHealerHealNode(c *check.C) { factory, iaasInst := iaasTesting.NewHealerIaaSConstructorWithInst("addr1") iaas.RegisterIaasProvider("my-healer-iaas", factory) _, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{}) c.Assert(err, check.IsNil) iaasInst.Addr = "addr2" config.Set("iaas:node-protocol", "http") config.Set("iaas:node-port", 2) defer config.Unset("iaas:node-protocol") defer config.Unset("iaas:node-port") p := provisiontest.ProvisionerInstance err = p.AddNode(provision.AddNodeOptions{ Address: "http://addr1:1", Metadata: map[string]string{"iaas": "my-healer-iaas"}, }) c.Assert(err, check.IsNil) healer := newNodeHealer(nodeHealerArgs{ FailuresBeforeHealing: 1, WaitTimeNewMachine: time.Minute, }) healer.Shutdown() nodes, err := p.ListNodes(nil) c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(nodes[0].Address(), check.Equals, "http://addr1:1") machines, err := iaas.ListMachines() c.Assert(err, check.IsNil) c.Assert(machines, check.HasLen, 1) c.Assert(machines[0].Address, check.Equals, "addr1") created, err := healer.healNode(nodes[0]) c.Assert(err, check.IsNil) c.Assert(created.Address, check.Equals, "http://addr2:2") nodes, err = p.ListNodes(nil) c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(nodes[0].Address(), check.Equals, "http://addr2:2") machines, err = iaas.ListMachines() c.Assert(err, check.IsNil) c.Assert(machines, check.HasLen, 1) c.Assert(machines[0].Address, check.Equals, "addr2") }
func (s *HandlersSuite) TestRemoveNodeHandlerRemoveIaaS(c *check.C) { iaas.RegisterIaasProvider("my-xxx-iaas", newTestIaaS) machine, err := iaas.CreateMachineForIaaS("my-xxx-iaas", map[string]string{}) c.Assert(err, check.IsNil) mainDockerProvisioner.cluster, err = cluster.New(nil, &cluster.MapStorage{}) c.Assert(err, check.IsNil) _, err = mainDockerProvisioner.getCluster().Register(fmt.Sprintf("http://%s:2375", machine.Address), nil) c.Assert(err, check.IsNil) b := bytes.NewBufferString(fmt.Sprintf(`{"address": "http://%s:2375", "remove_iaas": "true"}`, machine.Address)) req, err := http.NewRequest("POST", "/node/remove", b) c.Assert(err, check.IsNil) rec := httptest.NewRecorder() err = removeNodeHandler(rec, req, nil) c.Assert(err, check.IsNil) nodes, err := mainDockerProvisioner.getCluster().Nodes() c.Assert(len(nodes), check.Equals, 0) _, err = iaas.FindMachineById(machine.Id) c.Assert(err, check.Equals, mgo.ErrNotFound) }
func (s *S) TestHealerHealNodeWaitAndRegisterError(c *check.C) { iaas.RegisterIaasProvider("my-healer-iaas", dockertest.NewHealerIaaSConstructor("127.0.0.1", nil)) _, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{}) c.Assert(err, check.IsNil) iaas.RegisterIaasProvider("my-healer-iaas", dockertest.NewHealerIaaSConstructor("localhost", nil)) node1, err := testing.NewServer("127.0.0.1:0", nil, nil) c.Assert(err, check.IsNil) node2, err := testing.NewServer("127.0.0.1:0", nil, nil) c.Assert(err, check.IsNil) node2.PrepareFailure("ping-failure", "/_ping") config.Set("iaas:node-protocol", "http") config.Set("iaas:node-port", dockertest.URLPort(node2.URL())) defer config.Unset("iaas:node-protocol") defer config.Unset("iaas:node-port") p, err := s.newFakeDockerProvisioner(node1.URL()) c.Assert(err, check.IsNil) defer p.Destroy() node1.PrepareFailure("pingErr", "/_ping") p.Cluster().StartActiveMonitoring(100 * time.Millisecond) time.Sleep(300 * time.Millisecond) p.Cluster().StopActiveMonitoring() healer := NewNodeHealer(NodeHealerArgs{ Provisioner: p, FailuresBeforeHealing: 1, WaitTimeNewMachine: time.Second, }) nodes, err := p.Cluster().UnfilteredNodes() c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(dockertest.URLPort(nodes[0].Address), check.Equals, dockertest.URLPort(node1.URL())) c.Assert(net.URLToHost(nodes[0].Address), check.Equals, "127.0.0.1") c.Assert(nodes[0].FailureCount() > 0, check.Equals, true) nodes[0].Metadata["iaas"] = "my-healer-iaas" created, err := healer.healNode(&nodes[0]) c.Assert(err, check.ErrorMatches, ".*timeout waiting for result.*") c.Assert(created.Address, check.Equals, "") c.Assert(nodes[0].FailureCount(), check.Equals, 0) nodes, err = p.Cluster().Nodes() c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(dockertest.URLPort(nodes[0].Address), check.Equals, dockertest.URLPort(node1.URL())) c.Assert(net.URLToHost(nodes[0].Address), check.Equals, "127.0.0.1") }
func (s *S) TestMachinesDestroy(c *check.C) { iaas.RegisterIaasProvider("test-iaas", newTestIaaS) _, err := iaas.CreateMachineForIaaS("test-iaas", map[string]string{"id": "myid1"}) c.Assert(err, check.IsNil) recorder := httptest.NewRecorder() request, err := http.NewRequest("DELETE", "/iaas/machines/myid1", nil) c.Assert(err, check.IsNil) request.Header.Set("Authorization", "bearer "+s.token.GetValue()) m := RunServer(true) m.ServeHTTP(recorder, request) c.Assert(recorder.Code, check.Equals, http.StatusOK) c.Assert(eventtest.EventDesc{ Target: event.Target{Type: event.TargetTypeIaas, Value: "test-iaas"}, Owner: s.token.GetUserName(), Kind: "machine.delete", StartCustomData: []map[string]interface{}{ {"name": ":machine_id", "value": "myid1"}, }, }, eventtest.HasEvent) }
func (s *HandlersSuite) TestRemoveNodeHandlerWithoutRemoveIaaS(c *gocheck.C) { iaas.RegisterIaasProvider("some-iaas", TestIaaS{}) machine, err := iaas.CreateMachineForIaaS("some-iaas", map[string]string{}) c.Assert(err, gocheck.IsNil) dCluster, err = cluster.New(nil, &cluster.MapStorage{}) c.Assert(err, gocheck.IsNil) err = dCluster.Register(fmt.Sprintf("http://%s:4243", machine.Address), nil) c.Assert(err, gocheck.IsNil) b := bytes.NewBufferString(fmt.Sprintf(`{"address": "http://%s:4243", "remove_iaas": "false"}`, machine.Address)) req, err := http.NewRequest("POST", "/node/remove", b) c.Assert(err, gocheck.IsNil) rec := httptest.NewRecorder() err = removeNodeHandler(rec, req, nil) c.Assert(err, gocheck.IsNil) nodes, err := dCluster.Nodes() c.Assert(len(nodes), gocheck.Equals, 0) dbM, err := iaas.FindMachineById(machine.Id) c.Assert(err, gocheck.IsNil) c.Assert(dbM.Id, gocheck.Equals, machine.Id) }
func (h *nodeHealer) healNode(node *cluster.Node) (cluster.Node, error) { emptyNode := cluster.Node{} failingAddr := node.Address nodeMetadata := node.CleanMetadata() failingHost := urlToHost(failingAddr) failures := node.FailureCount() machine, err := iaas.CreateMachineForIaaS(nodeMetadata["iaas"], nodeMetadata) if err != nil { node.ResetFailures() return emptyNode, fmt.Errorf("Can't auto-heal after %d failures for node %s: error creating new machine: %s", failures, failingHost, err.Error()) } err = h.provisioner.getCluster().Unregister(failingAddr) if err != nil { machine.Destroy() return emptyNode, fmt.Errorf("Can't auto-heal after %d failures for node %s: error unregistering old node: %s", failures, failingHost, err.Error()) } newAddr := machine.FormatNodeAddress() log.Debugf("New machine created during healing process: %s - Waiting for docker to start...", newAddr) createdNode, err := h.provisioner.getCluster().WaitAndRegister(newAddr, nodeMetadata, h.waitTimeNewMachine) if err != nil { node.ResetFailures() h.provisioner.getCluster().Register(failingAddr, nodeMetadata) machine.Destroy() return emptyNode, fmt.Errorf("Can't auto-heal after %d failures for node %s: error registering new node: %s", failures, failingHost, err.Error()) } var buf bytes.Buffer err = h.provisioner.moveContainers(failingHost, "", &buf) if err != nil { log.Errorf("Unable to move containers, skipping containers healing %q -> %q: %s: %s", failingHost, machine.Address, err.Error(), buf.String()) } failingMachine, err := iaas.FindMachineByIdOrAddress(node.Metadata["iaas-id"], failingHost) if err != nil { return createdNode, fmt.Errorf("Unable to find failing machine %s in IaaS: %s", failingHost, err.Error()) } err = failingMachine.Destroy() if err != nil { return createdNode, fmt.Errorf("Unable to destroy machine %s from IaaS: %s", failingHost, err.Error()) } log.Debugf("Done auto-healing node %q, node %q created in its place.", failingHost, machine.Address) return createdNode, nil }
func addNodeForParams(params map[string]string, isRegister bool) (map[string]string, error) { response := make(map[string]string) var address string if isRegister { address, _ = params["address"] delete(params, "address") } else { iaasName, _ := params["iaas"] desc, err := iaas.Describe(iaasName) if err != nil { return response, err } response["description"] = desc var m *iaas.Machine if iaasName != "" { m, err = iaas.CreateMachineForIaaS(iaasName, params) } else { m, err = iaas.CreateMachine(params) } if err != nil { return response, err } nodeAddress, err := m.FormatNodeAddress() if err != nil { return response, err } params["iaas"] = m.Iaas address = nodeAddress } err := validateNodeAddress(address) if err != nil { return response, err } err = dockerCluster().Register(address, params) if err != nil { return response, err } return response, err }
func (s *S) TestHealerHealNodeCreateMachineError(c *check.C) { factory, iaasInst := dockertest.NewHealerIaaSConstructorWithInst("127.0.0.1") iaas.RegisterIaasProvider("my-healer-iaas", factory) _, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{}) c.Assert(err, check.IsNil) iaasInst.Addr = "localhost" iaasInst.Err = fmt.Errorf("my create machine error") node1, err := testing.NewServer("127.0.0.1:0", nil, nil) c.Assert(err, check.IsNil) p, err := s.newFakeDockerProvisioner(node1.URL()) c.Assert(err, check.IsNil) defer p.Destroy() node1.PrepareFailure("pingErr", "/_ping") p.Cluster().StartActiveMonitoring(100 * time.Millisecond) time.Sleep(300 * time.Millisecond) p.Cluster().StopActiveMonitoring() healer := NewNodeHealer(NodeHealerArgs{ Provisioner: p, FailuresBeforeHealing: 1, WaitTimeNewMachine: time.Minute, }) nodes, err := p.Cluster().UnfilteredNodes() c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(dockertest.URLPort(nodes[0].Address), check.Equals, dockertest.URLPort(node1.URL())) c.Assert(net.URLToHost(nodes[0].Address), check.Equals, "127.0.0.1") c.Assert(nodes[0].FailureCount() > 0, check.Equals, true) nodes[0].Metadata["iaas"] = "my-healer-iaas" created, err := healer.healNode(&nodes[0]) c.Assert(err, check.ErrorMatches, ".*my create machine error.*") c.Assert(created.Address, check.Equals, "") c.Assert(nodes[0].FailureCount(), check.Equals, 0) nodes, err = p.Cluster().UnfilteredNodes() c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(dockertest.URLPort(nodes[0].Address), check.Equals, dockertest.URLPort(node1.URL())) c.Assert(net.URLToHost(nodes[0].Address), check.Equals, "127.0.0.1") }
func (a *autoScaleConfig) addNode(evt *event.Event, modelNodes []*cluster.Node) (*cluster.Node, error) { metadata, err := chooseMetadataFromNodes(modelNodes) if err != nil { return nil, err } _, hasIaas := metadata["iaas"] if !hasIaas { return nil, errors.Errorf("no IaaS information in nodes metadata: %#v", metadata) } machine, err := iaas.CreateMachineForIaaS(metadata["iaas"], metadata) if err != nil { return nil, errors.Wrap(err, "unable to create machine") } newAddr := machine.FormatNodeAddress() evt.Logf("new machine created: %s - Waiting for docker to start...", newAddr) createOpts := provision.AddNodeOptions{ Address: newAddr, Metadata: metadata, WaitTO: a.WaitTimeNewMachine, CaCert: machine.CaCert, ClientCert: machine.ClientCert, ClientKey: machine.ClientKey, } err = a.provisioner.AddNode(createOpts) if err != nil { machine.Destroy() a.provisioner.Cluster().Unregister(newAddr) return nil, errors.Wrapf(err, "error adding new node %s", newAddr) } createdNode, err := a.provisioner.Cluster().GetNode(newAddr) if err != nil { machine.Destroy() a.provisioner.Cluster().Unregister(newAddr) return nil, errors.Wrapf(err, "error retrieving new node %s", newAddr) } evt.Logf("new machine created: %s - started!", newAddr) return &createdNode, nil }
func (s *S) TestTryHealingNodeDoubleCheck(c *check.C) { factory, iaasInst := iaasTesting.NewHealerIaaSConstructorWithInst("addr1") iaas.RegisterIaasProvider("my-healer-iaas", factory) _, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{}) c.Assert(err, check.IsNil) iaasInst.Addr = "addr2" config.Set("iaas:node-protocol", "http") config.Set("iaas:node-port", 2) defer config.Unset("iaas:node-protocol") defer config.Unset("iaas:node-port") p := provisiontest.ProvisionerInstance err = p.AddNode(provision.AddNodeOptions{ Address: "http://addr1:1", Metadata: map[string]string{"iaas": "my-healer-iaas"}, }) c.Assert(err, check.IsNil) healer := newNodeHealer(nodeHealerArgs{ FailuresBeforeHealing: 1, WaitTimeNewMachine: time.Minute, }) healer.started = time.Now().Add(-3 * time.Second) nodes, err := p.ListNodes(nil) c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) healErr := healer.tryHealingNode(nodes[0], "something", nil) c.Assert(healErr, check.IsNil) nodes, err = p.ListNodes(nil) c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(nodes[0].Address(), check.Equals, "http://addr1:1") machines, err := iaas.ListMachines() c.Assert(err, check.IsNil) c.Assert(machines, check.HasLen, 1) c.Assert(machines[0].Address, check.Equals, "addr1") c.Assert(eventtest.EventDesc{ IsEmpty: true, }, eventtest.HasEvent) }
func (s *S) TestRemoveNodeHandlerWithRemoveIaaS(c *check.C) { iaas.RegisterIaasProvider("some-iaas", newTestIaaS) machine, err := iaas.CreateMachineForIaaS("some-iaas", map[string]string{"id": "m1"}) c.Assert(err, check.IsNil) err = s.provisioner.AddNode(provision.AddNodeOptions{ Address: fmt.Sprintf("http://%s:2375", machine.Address), }) c.Assert(err, check.IsNil) u := fmt.Sprintf("/node/http://%s:2375?remove-iaas=true", machine.Address) req, err := http.NewRequest("DELETE", u, nil) c.Assert(err, check.IsNil) req.Header.Set("Authorization", "bearer "+s.token.GetValue()) rec := httptest.NewRecorder() server := RunServer(true) server.ServeHTTP(rec, req) c.Assert(rec.Body.String(), check.Equals, "rebalancing...remove done!") c.Assert(rec.Code, check.Equals, http.StatusOK) nodes, err := s.provisioner.ListNodes(nil) c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 0) _, err = iaas.FindMachineById(machine.Id) c.Assert(err, check.Equals, mgo.ErrNotFound) }
func (a *autoScaleConfig) addNode(modelNodes []*cluster.Node) (*cluster.Node, error) { metadata, err := chooseMetadataFromNodes(modelNodes) if err != nil { return nil, err } _, hasIaas := metadata["iaas"] if !hasIaas { return nil, fmt.Errorf("no IaaS information in nodes metadata: %#v", metadata) } machine, err := iaas.CreateMachineForIaaS(metadata["iaas"], metadata) if err != nil { return nil, fmt.Errorf("unable to create machine: %s", err.Error()) } newAddr := machine.FormatNodeAddress() a.logDebug("new machine created: %s - Waiting for docker to start...", newAddr) createdNode, err := a.provisioner.getCluster().WaitAndRegister(newAddr, metadata, a.waitTimeNewMachine) if err != nil { machine.Destroy() return nil, fmt.Errorf("error registering new node %s: %s", newAddr, err.Error()) } a.logDebug("new machine created: %s - started!", newAddr) return &createdNode, nil }
func (s *S) TestHealerHealNodeCreateMachineError(c *check.C) { factory, iaasInst := iaasTesting.NewHealerIaaSConstructorWithInst("addr1") iaas.RegisterIaasProvider("my-healer-iaas", factory) _, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{}) c.Assert(err, check.IsNil) iaasInst.Addr = "addr2" iaasInst.Err = fmt.Errorf("my create machine error") p := provisiontest.ProvisionerInstance err = p.AddNode(provision.AddNodeOptions{ Address: "http://addr1:1", Metadata: map[string]string{"iaas": "my-healer-iaas"}, }) c.Assert(err, check.IsNil) healer := newNodeHealer(nodeHealerArgs{ FailuresBeforeHealing: 1, WaitTimeNewMachine: time.Minute, }) healer.Shutdown() nodes, err := p.ListNodes(nil) c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(nodes[0].Address(), check.Equals, "http://addr1:1") fakeNode := nodes[0].(*provisiontest.FakeNode) fakeNode.SetHealth(1, false) c.Assert(fakeNode.FailureCount() > 0, check.Equals, true) created, err := healer.healNode(nodes[0]) c.Assert(err, check.ErrorMatches, ".*my create machine error.*") c.Assert(created, check.IsNil) c.Assert(fakeNode.FailureCount(), check.Equals, 0) nodes, err = p.ListNodes(nil) c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(nodes[0].Address(), check.Equals, "http://addr1:1") c.Assert(nodes[0].Status(), check.Equals, "enabled") }
func (s *S) TestHealerHandleErrorFailureEvent(c *check.C) { factory, iaasInst := iaasTesting.NewHealerIaaSConstructorWithInst("addr1") iaas.RegisterIaasProvider("my-healer-iaas", factory) _, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{}) c.Assert(err, check.IsNil) iaasInst.Addr = "addr2" config.Set("iaas:node-protocol", "http") config.Set("iaas:node-port", 2) defer config.Unset("iaas:node-protocol") defer config.Unset("iaas:node-port") p := provisiontest.ProvisionerInstance err = p.AddNode(provision.AddNodeOptions{ Address: "http://addr1:1", Metadata: map[string]string{"iaas": "my-healer-iaas"}, }) c.Assert(err, check.IsNil) node, err := p.GetNode("http://addr1:1") c.Assert(err, check.IsNil) healer := newNodeHealer(nodeHealerArgs{ FailuresBeforeHealing: 1, WaitTimeNewMachine: time.Minute, }) healer.Shutdown() healer.started = time.Now().Add(-3 * time.Second) conf := healerConfig() err = conf.SaveBase(NodeHealerConfig{Enabled: boolPtr(true), MaxUnresponsiveTime: intPtr(1)}) c.Assert(err, check.IsNil) err = healer.UpdateNodeData(node, []provision.NodeCheckResult{}) c.Assert(err, check.IsNil) time.Sleep(1200 * time.Millisecond) nodes, err := p.ListNodes(nil) c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(nodes[0].Address(), check.Equals, "http://addr1:1") machines, err := iaas.ListMachines() c.Assert(err, check.IsNil) c.Assert(machines, check.HasLen, 1) c.Assert(machines[0].Address, check.Equals, "addr1") p.PrepareFailure("AddNode", fmt.Errorf("error registering new node")) nodes[0].(*provisiontest.FakeNode).SetHealth(2, true) waitTime := healer.HandleError(nodes[0].(provision.NodeHealthChecker)) c.Assert(waitTime, check.Equals, time.Duration(0)) nodes, err = p.ListNodes(nil) c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(nodes[0].Address(), check.Equals, "http://addr1:1") machines, err = iaas.ListMachines() c.Assert(err, check.IsNil) c.Assert(machines, check.HasLen, 1) c.Assert(machines[0].Address, check.Equals, "addr1") c.Assert(eventtest.EventDesc{ Target: event.Target{Type: "node", Value: "http://addr1:1"}, Kind: "healer", StartCustomData: map[string]interface{}{ "reason": "2 consecutive failures", "node._id": "http://addr1:1", }, ErrorMatches: `Can't auto-heal after 2 failures for node addr1: error registering new node: error registering new node`, }, eventtest.HasEvent) }
func (s *S) TestCheckActiveHealing(c *check.C) { conf := healerConfig() err := conf.SaveBase(NodeHealerConfig{Enabled: boolPtr(true), MaxUnresponsiveTime: intPtr(1)}) c.Assert(err, check.IsNil) factory, iaasInst := iaasTesting.NewHealerIaaSConstructorWithInst("addr1") iaas.RegisterIaasProvider("my-healer-iaas", factory) _, err = iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{}) c.Assert(err, check.IsNil) iaasInst.Addr = "addr2" config.Set("iaas:node-protocol", "http") config.Set("iaas:node-port", 2) defer config.Unset("iaas:node-protocol") defer config.Unset("iaas:node-port") p := provisiontest.ProvisionerInstance err = p.AddNode(provision.AddNodeOptions{ Address: "http://addr1:1", Metadata: map[string]string{"iaas": "my-healer-iaas"}, }) c.Assert(err, check.IsNil) node, err := p.GetNode("http://addr1:1") c.Assert(err, check.IsNil) healer := newNodeHealer(nodeHealerArgs{ WaitTimeNewMachine: time.Minute, }) healer.Shutdown() healer.started = time.Now().Add(-3 * time.Second) err = healer.UpdateNodeData(node, []provision.NodeCheckResult{}) c.Assert(err, check.IsNil) time.Sleep(1200 * time.Millisecond) nodes, err := p.ListNodes(nil) c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(nodes[0].Address(), check.Equals, "http://addr1:1") machines, err := iaas.ListMachines() c.Assert(err, check.IsNil) c.Assert(machines, check.HasLen, 1) c.Assert(machines[0].Address, check.Equals, "addr1") healer.runActiveHealing() nodes, err = p.ListNodes(nil) c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(nodes[0].Address(), check.Equals, "http://addr2:2") machines, err = iaas.ListMachines() c.Assert(err, check.IsNil) c.Assert(machines, check.HasLen, 1) c.Assert(machines[0].Address, check.Equals, "addr2") c.Assert(eventtest.EventDesc{ Target: event.Target{Type: "node", Value: "http://addr1:1"}, Kind: "healer", StartCustomData: map[string]interface{}{ "reason": bson.M{"$regex": `last update \d+\.\d*?s ago, last success \d+\.\d*?s ago`}, "lastcheck.time": bson.M{"$exists": true}, "node._id": "http://addr1:1", }, EndCustomData: map[string]interface{}{ "_id": "http://addr2:2", }, }, eventtest.HasEvent) }
func (h *Healer) HandleError(node cluster.Node) time.Duration { defaultWait := 1 * time.Minute failures := node.FailureCount() if failures < 5 { return defaultWait } failingAddr := node.Address failingHost := urlToHost(failingAddr) containers, err := listContainersByHost(failingHost) if err != nil { log.Errorf("Error in cluster healer, trying to list containers: %s", err.Error()) return defaultWait } // Empty host let's just try again in the future if len(containers) == 0 { return defaultWait } iaasName, hasIaas := node.Metadata["iaas"] if !hasIaas { log.Errorf("Can't auto-heal after %d failures for node %s: no IaaS information.", failures, failingHost) return defaultWait } machine, err := iaas.CreateMachineForIaaS(iaasName, node.Metadata) if err != nil { log.Errorf("Can't auto-heal after %d failures for node %s: error creating new machine: %s", failures, failingHost, err.Error()) return defaultWait } newAddr, err := machine.FormatNodeAddress() if err != nil { log.Errorf("Can't auto-heal after %d failures for node %s: error formatting address: %s", failures, failingHost, err.Error()) machine.Destroy() return defaultWait } cluster := dockerCluster() err = cluster.Unregister(failingAddr) if err != nil { log.Errorf("Can't auto-heal after %d failures for node %s: error unregistering old node: %s", failures, failingHost, err.Error()) return defaultWait } err = cluster.WaitAndRegister(newAddr, node.Metadata, 2*time.Minute) if err != nil { log.Errorf("Can't auto-heal after %d failures for node %s: error registering new node: %s", failures, failingHost, err.Error()) machine.Destroy() return defaultWait } var buf bytes.Buffer encoder := json.NewEncoder(&buf) err = moveContainers(failingHost, machine.Address, encoder) if err != nil { log.Errorf("Unable to move containers from: %s to: %s - %s", failingHost, machine.Address, err.Error()) return 0 } failingMachine, err := iaas.FindMachineByAddress(failingHost) if err != nil { log.Errorf("Unable to find failing machine %s in IaaS", failingHost) return 0 } err = failingMachine.Destroy() if err != nil { log.Errorf("Unable to find destroy machine %s from IaaS", failingHost) } return 0 }
func (s *S) TestTryHealingNodeConcurrent(c *check.C) { defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(10)) factory, iaasInst := iaasTesting.NewHealerIaaSConstructorWithInst("addr1") iaas.RegisterIaasProvider("my-healer-iaas", factory) _, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{}) c.Assert(err, check.IsNil) iaasInst.Addr = "addr2" config.Set("iaas:node-protocol", "http") config.Set("iaas:node-port", 2) defer config.Unset("iaas:node-protocol") defer config.Unset("iaas:node-port") p := provisiontest.ProvisionerInstance err = p.AddNode(provision.AddNodeOptions{ Address: "http://addr1:1", Metadata: map[string]string{"iaas": "my-healer-iaas"}, }) c.Assert(err, check.IsNil) node, err := p.GetNode("http://addr1:1") c.Assert(err, check.IsNil) healer := newNodeHealer(nodeHealerArgs{ FailuresBeforeHealing: 1, WaitTimeNewMachine: time.Minute, }) healer.started = time.Now().Add(-3 * time.Second) conf := healerConfig() err = conf.SaveBase(NodeHealerConfig{Enabled: boolPtr(true), MaxUnresponsiveTime: intPtr(1)}) c.Assert(err, check.IsNil) err = healer.UpdateNodeData(node, []provision.NodeCheckResult{}) c.Assert(err, check.IsNil) time.Sleep(1200 * time.Millisecond) nodes, err := p.ListNodes(nil) c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(nodes[0].Address(), check.Equals, "http://addr1:1") machines, err := iaas.ListMachines() c.Assert(err, check.IsNil) c.Assert(machines, check.HasLen, 1) c.Assert(machines[0].Address, check.Equals, "addr1") wg := sync.WaitGroup{} for i := 0; i < 100; i++ { wg.Add(1) go func() { defer wg.Done() healErr := healer.tryHealingNode(nodes[0], "something", nil) c.Assert(healErr, check.IsNil) }() } wg.Wait() nodes, err = p.ListNodes(nil) c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(nodes[0].Address(), check.Equals, "http://addr2:2") machines, err = iaas.ListMachines() c.Assert(err, check.IsNil) c.Assert(machines, check.HasLen, 1) c.Assert(machines[0].Address, check.Equals, "addr2") c.Assert(eventtest.EventDesc{ Target: event.Target{Type: "node", Value: "http://addr1:1"}, Kind: "healer", StartCustomData: map[string]interface{}{ "reason": "something", "node._id": "http://addr1:1", }, EndCustomData: map[string]interface{}{ "_id": "http://addr2:2", }, }, eventtest.HasEvent) }
func (h *NodeHealer) healNode(node provision.Node) (*provision.NodeSpec, error) { failingAddr := node.Address() // Copy metadata to ensure underlying data structure is not modified. newNodeMetadata := map[string]string{} for k, v := range node.Metadata() { newNodeMetadata[k] = v } failingHost := net.URLToHost(failingAddr) healthNode, isHealthNode := node.(provision.NodeHealthChecker) failures := 0 if isHealthNode { failures = healthNode.FailureCount() } machine, err := iaas.CreateMachineForIaaS(newNodeMetadata["iaas"], newNodeMetadata) if err != nil { if isHealthNode { healthNode.ResetFailures() } return nil, errors.Wrapf(err, "Can't auto-heal after %d failures for node %s: error creating new machine", failures, failingHost) } err = node.Provisioner().UpdateNode(provision.UpdateNodeOptions{ Address: failingAddr, Disable: true, }) if err != nil { machine.Destroy() return nil, errors.Wrapf(err, "Can't auto-heal after %d failures for node %s: error unregistering old node", failures, failingHost) } newAddr := machine.FormatNodeAddress() log.Debugf("New machine created during healing process: %s - Waiting for docker to start...", newAddr) createOpts := provision.AddNodeOptions{ Address: newAddr, Metadata: newNodeMetadata, WaitTO: h.waitTimeNewMachine, CaCert: machine.CaCert, ClientCert: machine.ClientCert, ClientKey: machine.ClientKey, } err = node.Provisioner().AddNode(createOpts) if err != nil { if isHealthNode { healthNode.ResetFailures() } node.Provisioner().UpdateNode(provision.UpdateNodeOptions{Address: failingAddr, Enable: true}) machine.Destroy() return nil, errors.Wrapf(err, "Can't auto-heal after %d failures for node %s: error registering new node", failures, failingHost) } nodeSpec := provision.NodeToSpec(node) nodeSpec.Address = newAddr nodeSpec.Metadata = newNodeMetadata var buf bytes.Buffer err = node.Provisioner().RemoveNode(provision.RemoveNodeOptions{ Address: failingAddr, Rebalance: true, Writer: &buf, }) if err != nil { log.Errorf("Unable to move containers, skipping containers healing %q -> %q: %s: %s", failingHost, machine.Address, err, buf.String()) } failingMachine, err := iaas.FindMachineByIdOrAddress(node.Metadata()["iaas-id"], failingHost) if err != nil { return &nodeSpec, errors.Wrapf(err, "Unable to find failing machine %s in IaaS", failingHost) } err = failingMachine.Destroy() if err != nil { return &nodeSpec, errors.Wrapf(err, "Unable to destroy machine %s from IaaS", failingHost) } log.Debugf("Done auto-healing node %q, node %q created in its place.", failingHost, machine.Address) return &nodeSpec, nil }
func (s *S) TestHealerHealNode(c *check.C) { factory, iaasInst := dockertest.NewHealerIaaSConstructorWithInst("127.0.0.1") iaas.RegisterIaasProvider("my-healer-iaas", factory) _, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{}) c.Assert(err, check.IsNil) iaasInst.Addr = "localhost" node1, err := testing.NewServer("127.0.0.1:0", nil, nil) c.Assert(err, check.IsNil) node2, err := testing.NewServer("127.0.0.1:0", nil, nil) c.Assert(err, check.IsNil) config.Set("iaas:node-protocol", "http") config.Set("iaas:node-port", dockertest.URLPort(node2.URL())) defer config.Unset("iaas:node-protocol") defer config.Unset("iaas:node-port") p, err := s.newFakeDockerProvisioner(node1.URL()) c.Assert(err, check.IsNil) defer p.Destroy() app := provisiontest.NewFakeApp("myapp", "python", 0) _, err = p.StartContainers(dockertest.StartContainersArgs{ Endpoint: node1.URL(), App: app, Amount: map[string]int{"web": 1}, Image: "tsuru/python", PullImage: true, }) c.Assert(err, check.IsNil) healer := NewNodeHealer(NodeHealerArgs{ Provisioner: p, FailuresBeforeHealing: 1, WaitTimeNewMachine: time.Minute, }) nodes, err := p.Cluster().UnfilteredNodes() c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(dockertest.URLPort(nodes[0].Address), check.Equals, dockertest.URLPort(node1.URL())) c.Assert(net.URLToHost(nodes[0].Address), check.Equals, "127.0.0.1") containers := p.AllContainers() c.Assert(err, check.IsNil) c.Assert(containers, check.HasLen, 1) c.Assert(containers[0].HostAddr, check.Equals, "127.0.0.1") machines, err := iaas.ListMachines() c.Assert(err, check.IsNil) c.Assert(machines, check.HasLen, 1) c.Assert(machines[0].Address, check.Equals, "127.0.0.1") nodes[0].Metadata["iaas"] = "my-healer-iaas" created, err := healer.healNode(&nodes[0]) c.Assert(err, check.IsNil) c.Assert(created.Address, check.Equals, fmt.Sprintf("http://localhost:%d", dockertest.URLPort(node2.URL()))) nodes, err = p.Cluster().UnfilteredNodes() c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(dockertest.URLPort(nodes[0].Address), check.Equals, dockertest.URLPort(node2.URL())) c.Assert(net.URLToHost(nodes[0].Address), check.Equals, "localhost") machines, err = iaas.ListMachines() c.Assert(err, check.IsNil) c.Assert(machines, check.HasLen, 1) c.Assert(machines[0].Address, check.Equals, "localhost") err = tsurutest.WaitCondition(5*time.Second, func() bool { containers := p.AllContainers() return len(containers) == 1 && containers[0].HostAddr == "localhost" }) c.Assert(err, check.IsNil) }
func (s *S) TestHealerHandleError(c *check.C) { factory, iaasInst := dockertest.NewHealerIaaSConstructorWithInst("127.0.0.1") iaas.RegisterIaasProvider("my-healer-iaas", factory) _, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{}) c.Assert(err, check.IsNil) iaasInst.Addr = "localhost" node1, err := testing.NewServer("127.0.0.1:0", nil, nil) c.Assert(err, check.IsNil) node2, err := testing.NewServer("127.0.0.1:0", nil, nil) c.Assert(err, check.IsNil) config.Set("iaas:node-protocol", "http") config.Set("iaas:node-port", dockertest.URLPort(node2.URL())) defer config.Unset("iaas:node-protocol") defer config.Unset("iaas:node-port") p, err := s.newFakeDockerProvisioner(node1.URL()) c.Assert(err, check.IsNil) defer p.Destroy() app := provisiontest.NewFakeApp("myapp", "python", 0) _, err = p.StartContainers(dockertest.StartContainersArgs{ Endpoint: node1.URL(), App: app, Amount: map[string]int{"web": 1}, Image: "tsuru/python", PullImage: true, }) c.Assert(err, check.IsNil) healer := NewNodeHealer(NodeHealerArgs{ Provisioner: p, FailuresBeforeHealing: 1, WaitTimeNewMachine: time.Minute, }) nodes, err := p.Cluster().UnfilteredNodes() c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(dockertest.URLPort(nodes[0].Address), check.Equals, dockertest.URLPort(node1.URL())) c.Assert(net.URLToHost(nodes[0].Address), check.Equals, "127.0.0.1") machines, err := iaas.ListMachines() c.Assert(err, check.IsNil) c.Assert(machines, check.HasLen, 1) c.Assert(machines[0].Address, check.Equals, "127.0.0.1") nodes[0].Metadata["iaas"] = "my-healer-iaas" nodes[0].Metadata["Failures"] = "2" waitTime := healer.HandleError(&nodes[0]) c.Assert(waitTime, check.Equals, time.Duration(0)) nodes, err = p.Cluster().Nodes() c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(dockertest.URLPort(nodes[0].Address), check.Equals, dockertest.URLPort(node2.URL())) c.Assert(net.URLToHost(nodes[0].Address), check.Equals, "localhost") machines, err = iaas.ListMachines() c.Assert(err, check.IsNil) c.Assert(machines, check.HasLen, 1) c.Assert(machines[0].Address, check.Equals, "localhost") healingColl, err := healingCollection() c.Assert(err, check.IsNil) defer healingColl.Close() var events []HealingEvent err = healingColl.Find(nil).All(&events) c.Assert(err, check.IsNil) c.Assert(events, check.HasLen, 1) c.Assert(events[0].Action, check.Equals, "node-healing") c.Assert(events[0].StartTime, check.Not(check.DeepEquals), time.Time{}) c.Assert(events[0].EndTime, check.Not(check.DeepEquals), time.Time{}) c.Assert(events[0].Error, check.Equals, "") c.Assert(events[0].Successful, check.Equals, true) c.Assert(events[0].FailingNode.Address, check.Equals, fmt.Sprintf("http://127.0.0.1:%d/", dockertest.URLPort(node1.URL()))) c.Assert(events[0].CreatedNode.Address, check.Equals, fmt.Sprintf("http://localhost:%d", dockertest.URLPort(node2.URL()))) }
func (s *S) TestHealerHandleError(c *check.C) { rollback := startTestRepositoryServer() defer rollback() defer func() { machines, _ := iaas.ListMachines() for _, m := range machines { m.Destroy() } }() factory, iaasInst := newHealerIaaSConstructorWithInst("127.0.0.1") iaas.RegisterIaasProvider("my-healer-iaas", factory) _, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{}) c.Assert(err, check.IsNil) iaasInst.addr = "localhost" node1, err := testing.NewServer("127.0.0.1:0", nil, nil) c.Assert(err, check.IsNil) node2, err := testing.NewServer("127.0.0.1:0", nil, nil) c.Assert(err, check.IsNil) config.Set("iaas:node-protocol", "http") config.Set("iaas:node-port", urlPort(node2.URL())) defer config.Unset("iaas:node-protocol") defer config.Unset("iaas:node-port") cluster, err := cluster.New(nil, &cluster.MapStorage{}, cluster.Node{Address: node1.URL()}, ) c.Assert(err, check.IsNil) var p dockerProvisioner err = p.Initialize() c.Assert(err, check.IsNil) p.cluster = cluster appInstance := provisiontest.NewFakeApp("myapp", "python", 0) defer p.Destroy(appInstance) p.Provision(appInstance) imageId, err := appCurrentImageName(appInstance.GetName()) c.Assert(err, check.IsNil) customData := map[string]interface{}{ "procfile": "web: python ./myapp", } err = saveImageCustomData(imageId, customData) c.Assert(err, check.IsNil) _, err = addContainersWithHost(&changeUnitsPipelineArgs{ toHost: "127.0.0.1", toAdd: map[string]*containersToAdd{"web": {Quantity: 1}}, app: appInstance, imageId: imageId, provisioner: &p, }) c.Assert(err, check.IsNil) conn, err := db.Conn() c.Assert(err, check.IsNil) defer conn.Close() appStruct := &app.App{ Name: appInstance.GetName(), } err = conn.Apps().Insert(appStruct) c.Assert(err, check.IsNil) defer conn.Apps().Remove(bson.M{"name": appStruct.Name}) healer := nodeHealer{ locks: make(map[string]*sync.Mutex), provisioner: &p, disabledTime: 0, failuresBeforeHealing: 1, waitTimeNewMachine: 1 * time.Second, } nodes, err := cluster.UnfilteredNodes() c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(urlPort(nodes[0].Address), check.Equals, urlPort(node1.URL())) c.Assert(urlToHost(nodes[0].Address), check.Equals, "127.0.0.1") machines, err := iaas.ListMachines() c.Assert(err, check.IsNil) c.Assert(machines, check.HasLen, 1) c.Assert(machines[0].Address, check.Equals, "127.0.0.1") nodes[0].Metadata["iaas"] = "my-healer-iaas" nodes[0].Metadata["Failures"] = "2" waitTime := healer.HandleError(&nodes[0]) c.Assert(waitTime, check.Equals, time.Duration(0)) nodes, err = cluster.UnfilteredNodes() c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(urlPort(nodes[0].Address), check.Equals, urlPort(node2.URL())) c.Assert(urlToHost(nodes[0].Address), check.Equals, "localhost") machines, err = iaas.ListMachines() c.Assert(err, check.IsNil) c.Assert(machines, check.HasLen, 1) c.Assert(machines[0].Address, check.Equals, "localhost") healingColl, err := healingCollection() c.Assert(err, check.IsNil) defer healingColl.Close() var events []healingEvent err = healingColl.Find(nil).All(&events) c.Assert(err, check.IsNil) c.Assert(events, check.HasLen, 1) c.Assert(events[0].Action, check.Equals, "node-healing") c.Assert(events[0].StartTime, check.Not(check.DeepEquals), time.Time{}) c.Assert(events[0].EndTime, check.Not(check.DeepEquals), time.Time{}) c.Assert(events[0].Error, check.Equals, "") c.Assert(events[0].Successful, check.Equals, true) c.Assert(events[0].FailingNode.Address, check.Equals, fmt.Sprintf("http://127.0.0.1:%d/", urlPort(node1.URL()))) c.Assert(events[0].CreatedNode.Address, check.Equals, fmt.Sprintf("http://localhost:%d", urlPort(node2.URL()))) }
func (s *S) TestHealerHealNode(c *check.C) { rollback := startTestRepositoryServer() defer rollback() defer func() { machines, _ := iaas.ListMachines() for _, m := range machines { m.Destroy() } }() factory, iaasInst := newHealerIaaSConstructorWithInst("127.0.0.1") iaas.RegisterIaasProvider("my-healer-iaas", factory) _, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{}) c.Assert(err, check.IsNil) iaasInst.addr = "localhost" node1, err := testing.NewServer("127.0.0.1:0", nil, nil) c.Assert(err, check.IsNil) node2, err := testing.NewServer("127.0.0.1:0", nil, nil) c.Assert(err, check.IsNil) config.Set("iaas:node-protocol", "http") config.Set("iaas:node-port", urlPort(node2.URL())) defer config.Unset("iaas:node-protocol") defer config.Unset("iaas:node-port") cluster, err := cluster.New(nil, &cluster.MapStorage{}, cluster.Node{Address: node1.URL()}, ) c.Assert(err, check.IsNil) appInstance := provisiontest.NewFakeApp("myapp", "python", 0) var p dockerProvisioner err = p.Initialize() c.Assert(err, check.IsNil) p.cluster = cluster defer p.Destroy(appInstance) p.Provision(appInstance) imageId, err := appCurrentImageName(appInstance.GetName()) c.Assert(err, check.IsNil) customData := map[string]interface{}{ "procfile": "web: python ./myapp", } err = saveImageCustomData(imageId, customData) c.Assert(err, check.IsNil) _, err = addContainersWithHost(&changeUnitsPipelineArgs{ toHost: "127.0.0.1", toAdd: map[string]*containersToAdd{"web": {Quantity: 1}}, app: appInstance, imageId: imageId, provisioner: &p, }) c.Assert(err, check.IsNil) conn, err := db.Conn() c.Assert(err, check.IsNil) defer conn.Close() appStruct := &app.App{ Name: appInstance.GetName(), } err = conn.Apps().Insert(appStruct) c.Assert(err, check.IsNil) defer conn.Apps().Remove(bson.M{"name": appStruct.Name}) healer := nodeHealer{ locks: make(map[string]*sync.Mutex), provisioner: &p, disabledTime: 0, failuresBeforeHealing: 1, waitTimeNewMachine: 1 * time.Second, } nodes, err := p.getCluster().UnfilteredNodes() c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(urlPort(nodes[0].Address), check.Equals, urlPort(node1.URL())) c.Assert(urlToHost(nodes[0].Address), check.Equals, "127.0.0.1") containers, err := p.listAllContainers() c.Assert(err, check.IsNil) c.Assert(containers, check.HasLen, 1) c.Assert(containers[0].HostAddr, check.Equals, "127.0.0.1") machines, err := iaas.ListMachines() c.Assert(err, check.IsNil) c.Assert(machines, check.HasLen, 1) c.Assert(machines[0].Address, check.Equals, "127.0.0.1") nodes[0].Metadata["iaas"] = "my-healer-iaas" created, err := healer.healNode(&nodes[0]) c.Assert(err, check.IsNil) c.Assert(created.Address, check.Equals, fmt.Sprintf("http://localhost:%d", urlPort(node2.URL()))) nodes, err = cluster.UnfilteredNodes() c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(urlPort(nodes[0].Address), check.Equals, urlPort(node2.URL())) c.Assert(urlToHost(nodes[0].Address), check.Equals, "localhost") machines, err = iaas.ListMachines() c.Assert(err, check.IsNil) c.Assert(machines, check.HasLen, 1) c.Assert(machines[0].Address, check.Equals, "localhost") done := make(chan bool) go func() { for range time.Tick(100 * time.Millisecond) { containers, err := p.listAllContainers() if err == nil && len(containers) == 1 && containers[0].HostAddr == "localhost" { close(done) return } } }() select { case <-done: case <-time.After(5 * time.Second): c.Fatal("Timed out waiting for containers to move") } }
func (h *NodeHealer) healNode(node *cluster.Node) (cluster.Node, error) { emptyNode := cluster.Node{} failingAddr := node.Address nodeMetadata := node.CleanMetadata() failingHost := net.URLToHost(failingAddr) failures := node.FailureCount() machine, err := iaas.CreateMachineForIaaS(nodeMetadata["iaas"], nodeMetadata) if err != nil { node.ResetFailures() return emptyNode, fmt.Errorf("Can't auto-heal after %d failures for node %s: error creating new machine: %s", failures, failingHost, err.Error()) } err = h.provisioner.Cluster().Unregister(failingAddr) if err != nil { machine.Destroy() return emptyNode, fmt.Errorf("Can't auto-heal after %d failures for node %s: error unregistering old node: %s", failures, failingHost, err.Error()) } newAddr := machine.FormatNodeAddress() log.Debugf("New machine created during healing process: %s - Waiting for docker to start...", newAddr) createdNode := cluster.Node{ Address: newAddr, Metadata: nodeMetadata, CreationStatus: cluster.NodeCreationStatusPending, } err = h.provisioner.Cluster().Register(createdNode) if err != nil { node.ResetFailures() h.provisioner.Cluster().Register(cluster.Node{Address: failingAddr, Metadata: nodeMetadata}) machine.Destroy() return emptyNode, fmt.Errorf("Can't auto-heal after %d failures for node %s: error registering new node: %s", failures, failingHost, err.Error()) } q, err := queue.Queue() if err != nil { return emptyNode, err } jobParams := monsterqueue.JobParams{ "endpoint": createdNode.Address, "machine": machine.Id, "metadata": createdNode.Metadata, } job, err := q.EnqueueWait(bs.QueueTaskName, jobParams, h.waitTimeNewMachine) if err == nil { _, err = job.Result() } if err != nil { node.ResetFailures() h.provisioner.Cluster().Register(cluster.Node{Address: failingAddr, Metadata: nodeMetadata}) return emptyNode, fmt.Errorf("Can't auto-heal after %d failures for node %s: error waiting for the bs task: %s", failures, failingHost, err.Error()) } var buf bytes.Buffer err = h.provisioner.MoveContainers(failingHost, "", &buf) if err != nil { log.Errorf("Unable to move containers, skipping containers healing %q -> %q: %s: %s", failingHost, machine.Address, err.Error(), buf.String()) } failingMachine, err := iaas.FindMachineByIdOrAddress(node.Metadata["iaas-id"], failingHost) if err != nil { return createdNode, fmt.Errorf("Unable to find failing machine %s in IaaS: %s", failingHost, err.Error()) } err = failingMachine.Destroy() if err != nil { return createdNode, fmt.Errorf("Unable to destroy machine %s from IaaS: %s", failingHost, err.Error()) } log.Debugf("Done auto-healing node %q, node %q created in its place.", failingHost, machine.Address) return createdNode, nil }