Exemplo n.º 1
0
func (s *S) TestMachinesList(c *check.C) {
	iaas.RegisterIaasProvider("test-iaas", newTestIaaS)
	_, err := iaas.CreateMachineForIaaS("test-iaas", map[string]string{"id": "myid1"})
	defer (&iaas.Machine{Id: "myid1"}).Destroy()
	c.Assert(err, check.IsNil)
	_, err = iaas.CreateMachineForIaaS("test-iaas", map[string]string{"id": "myid2"})
	defer (&iaas.Machine{Id: "myid2"}).Destroy()
	c.Assert(err, check.IsNil)
	recorder := httptest.NewRecorder()
	request, err := http.NewRequest("GET", "/iaas/machines", nil)
	c.Assert(err, check.IsNil)
	request.Header.Set("Authorization", "bearer "+s.admintoken.GetValue())
	m := RunServer(true)
	m.ServeHTTP(recorder, request)
	c.Assert(recorder.Code, check.Equals, http.StatusOK)
	var machines []iaas.Machine
	err = json.NewDecoder(recorder.Body).Decode(&machines)
	c.Assert(err, check.IsNil)
	c.Assert(machines[0].Id, check.Equals, "myid1")
	c.Assert(machines[0].Address, check.Equals, "myid1.somewhere.com")
	c.Assert(machines[0].CreationParams, check.DeepEquals, map[string]string{
		"id":      "myid1",
		"iaas":    "test-iaas",
		"iaas-id": "myid1",
	})
	c.Assert(machines[1].Id, check.Equals, "myid2")
	c.Assert(machines[1].Address, check.Equals, "myid2.somewhere.com")
	c.Assert(machines[1].CreationParams, check.DeepEquals, map[string]string{
		"id":      "myid2",
		"iaas":    "test-iaas",
		"iaas-id": "myid2",
	})
}
Exemplo n.º 2
0
func (s *S) TestHealerHealNodeWaitAndRegisterError(c *check.C) {
	iaas.RegisterIaasProvider("my-healer-iaas", iaasTesting.NewHealerIaaSConstructor("addr1", nil))
	_, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{})
	c.Assert(err, check.IsNil)
	iaas.RegisterIaasProvider("my-healer-iaas", iaasTesting.NewHealerIaaSConstructor("addr2", nil))
	config.Set("iaas:node-protocol", "http")
	config.Set("iaas:node-port", 2)
	defer config.Unset("iaas:node-protocol")
	defer config.Unset("iaas:node-port")
	p := provisiontest.ProvisionerInstance
	err = p.AddNode(provision.AddNodeOptions{
		Address:  "http://addr1:1",
		Metadata: map[string]string{"iaas": "my-healer-iaas"},
	})
	c.Assert(err, check.IsNil)
	p.PrepareFailure("AddNode", fmt.Errorf("add node error"))
	healer := newNodeHealer(nodeHealerArgs{
		WaitTimeNewMachine: time.Second,
	})
	healer.Shutdown()
	nodes, err := p.ListNodes(nil)
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(nodes[0].Address(), check.Equals, "http://addr1:1")
	created, err := healer.healNode(nodes[0])
	c.Assert(err, check.ErrorMatches, ".*error registering new node: add node error.*")
	c.Assert(created, check.IsNil)
	nodes, err = p.ListNodes(nil)
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(nodes[0].Address(), check.Equals, "http://addr1:1")
	c.Assert(nodes[0].Status(), check.Equals, "enabled")
}
Exemplo n.º 3
0
func (s *S) TestHealerHandleErrorThrottled(c *check.C) {
	factory, iaasInst := iaasTesting.NewHealerIaaSConstructorWithInst("addr1")
	iaas.RegisterIaasProvider("my-healer-iaas", factory)
	_, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{})
	c.Assert(err, check.IsNil)
	iaasInst.Addr = "addr2"
	config.Set("iaas:node-protocol", "http")
	config.Set("iaas:node-port", 2)
	defer config.Unset("iaas:node-protocol")
	defer config.Unset("iaas:node-port")
	p := provisiontest.ProvisionerInstance
	err = p.AddNode(provision.AddNodeOptions{
		Address:  "http://addr1:1",
		Metadata: map[string]string{"iaas": "my-healer-iaas"},
	})
	c.Assert(err, check.IsNil)
	node, err := p.GetNode("http://addr1:1")
	c.Assert(err, check.IsNil)
	healer := newNodeHealer(nodeHealerArgs{
		FailuresBeforeHealing: 1,
		WaitTimeNewMachine:    time.Minute,
	})
	healer.Shutdown()
	healer.started = time.Now().Add(-3 * time.Second)
	conf := healerConfig()
	err = conf.SaveBase(NodeHealerConfig{Enabled: boolPtr(true), MaxUnresponsiveTime: intPtr(1)})
	c.Assert(err, check.IsNil)
	err = healer.UpdateNodeData(node, []provision.NodeCheckResult{})
	c.Assert(err, check.IsNil)
	time.Sleep(1200 * time.Millisecond)
	nodes, err := p.ListNodes(nil)
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(nodes[0].Address(), check.Equals, "http://addr1:1")
	machines, err := iaas.ListMachines()
	c.Assert(err, check.IsNil)
	c.Assert(machines, check.HasLen, 1)
	c.Assert(machines[0].Address, check.Equals, "addr1")
	for i := 0; i < 3; i++ {
		var evt *event.Event
		evt, err = event.NewInternal(&event.Opts{
			Target:       event.Target{Type: "node", Value: nodes[0].Address()},
			InternalKind: "healer",
			Allowed:      event.Allowed(permission.PermPoolReadEvents),
		})
		c.Assert(err, check.IsNil)
		err = evt.Done(nil)
		c.Assert(err, check.IsNil)
	}
	err = healer.tryHealingNode(nodes[0], "myreason", nil)
	c.Assert(err, check.ErrorMatches, "Error trying to insert node healing event, healing aborted: event throttled, limit for healer on node \".*?\" is 3 every 5m0s")
	nodes, err = p.ListNodes(nil)
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(nodes[0].Address(), check.Equals, "http://addr1:1")
}
Exemplo n.º 4
0
func (s *S) TestHealerHealNodeWaitAndRegisterError(c *check.C) {
	defer func() {
		machines, _ := iaas.ListMachines()
		for _, m := range machines {
			m.Destroy()
		}
	}()
	iaas.RegisterIaasProvider("my-healer-iaas", newHealerIaaSConstructor("127.0.0.1", nil))
	_, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{})
	c.Assert(err, check.IsNil)
	iaas.RegisterIaasProvider("my-healer-iaas", newHealerIaaSConstructor("localhost", nil))
	node1, err := testing.NewServer("127.0.0.1:0", nil, nil)
	c.Assert(err, check.IsNil)
	node2, err := testing.NewServer("127.0.0.1:0", nil, nil)
	c.Assert(err, check.IsNil)
	node2.PrepareFailure("ping-failure", "/_ping")
	config.Set("iaas:node-protocol", "http")
	config.Set("iaas:node-port", urlPort(node2.URL()))
	defer config.Unset("iaas:node-protocol")
	defer config.Unset("iaas:node-port")
	cluster, err := cluster.New(nil, &cluster.MapStorage{},
		cluster.Node{Address: node1.URL()},
	)
	c.Assert(err, check.IsNil)
	node1.PrepareFailure("pingErr", "/_ping")
	cluster.StartActiveMonitoring(100 * time.Millisecond)
	time.Sleep(300 * time.Millisecond)
	cluster.StopActiveMonitoring()
	var p dockerProvisioner
	err = p.Initialize()
	c.Assert(err, check.IsNil)
	p.cluster = cluster
	healer := nodeHealer{
		locks:                 make(map[string]*sync.Mutex),
		provisioner:           &p,
		disabledTime:          0,
		failuresBeforeHealing: 1,
		waitTimeNewMachine:    1 * time.Second,
	}
	nodes, err := p.getCluster().UnfilteredNodes()
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(urlPort(nodes[0].Address), check.Equals, urlPort(node1.URL()))
	c.Assert(urlToHost(nodes[0].Address), check.Equals, "127.0.0.1")
	c.Assert(nodes[0].FailureCount() > 0, check.Equals, true)
	nodes[0].Metadata["iaas"] = "my-healer-iaas"
	created, err := healer.healNode(&nodes[0])
	c.Assert(err, check.ErrorMatches, ".*error registering new node.*")
	c.Assert(created.Address, check.Equals, "")
	c.Assert(nodes[0].FailureCount(), check.Equals, 0)
	nodes, err = p.getCluster().UnfilteredNodes()
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(urlPort(nodes[0].Address), check.Equals, urlPort(node1.URL()))
	c.Assert(urlToHost(nodes[0].Address), check.Equals, "127.0.0.1")
}
Exemplo n.º 5
0
func (s *S) TestMachinesDestroy(c *check.C) {
	iaas.RegisterIaasProvider("test-iaas", newTestIaaS)
	_, err := iaas.CreateMachineForIaaS("test-iaas", map[string]string{"id": "myid1"})
	c.Assert(err, check.IsNil)
	recorder := httptest.NewRecorder()
	request, err := http.NewRequest("DELETE", "/iaas/machines/myid1", nil)
	c.Assert(err, check.IsNil)
	request.Header.Set("Authorization", "bearer "+s.admintoken.GetValue())
	m := RunServer(true)
	m.ServeHTTP(recorder, request)
	c.Assert(recorder.Code, check.Equals, http.StatusOK)
}
Exemplo n.º 6
0
func (s *S) TestHealerHealNodeDestroyError(c *check.C) {
	factory, iaasInst := iaasTesting.NewHealerIaaSConstructorWithInst("addr1")
	iaasInst.DelErr = fmt.Errorf("my destroy error")
	iaas.RegisterIaasProvider("my-healer-iaas", factory)
	_, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{})
	c.Assert(err, check.IsNil)
	iaasInst.Addr = "addr2"
	config.Set("iaas:node-protocol", "http")
	config.Set("iaas:node-port", 2)
	defer config.Unset("iaas:node-protocol")
	defer config.Unset("iaas:node-port")
	p := provisiontest.ProvisionerInstance
	err = p.AddNode(provision.AddNodeOptions{
		Address:  "http://addr1:1",
		Metadata: map[string]string{"iaas": "my-healer-iaas"},
	})
	c.Assert(err, check.IsNil)

	healer := newNodeHealer(nodeHealerArgs{
		WaitTimeNewMachine: time.Minute,
	})
	healer.Shutdown()
	nodes, err := p.ListNodes(nil)
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(nodes[0].Address(), check.Equals, "http://addr1:1")

	machines, err := iaas.ListMachines()
	c.Assert(err, check.IsNil)
	c.Assert(machines, check.HasLen, 1)
	c.Assert(machines[0].Address, check.Equals, "addr1")

	buf := bytes.Buffer{}
	log.SetLogger(log.NewWriterLogger(&buf, false))
	defer log.SetLogger(nil)
	created, err := healer.healNode(nodes[0])
	c.Assert(err, check.IsNil)
	c.Assert(created.Address, check.Equals, "http://addr2:2")
	c.Assert(buf.String(), check.Matches, "(?s).*my destroy error.*")

	nodes, err = p.ListNodes(nil)
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(nodes[0].Address(), check.Equals, "http://addr2:2")

	machines, err = iaas.ListMachines()
	c.Assert(err, check.IsNil)
	c.Assert(machines, check.HasLen, 1)
	c.Assert(machines[0].Address, check.Equals, "addr2")
}
Exemplo n.º 7
0
func (a *autoScaleConfig) addNode(event *autoScaleEvent, modelNodes []*cluster.Node) (*cluster.Node, error) {
	metadata, err := chooseMetadataFromNodes(modelNodes)
	if err != nil {
		return nil, err
	}
	_, hasIaas := metadata["iaas"]
	if !hasIaas {
		return nil, fmt.Errorf("no IaaS information in nodes metadata: %#v", metadata)
	}
	machine, err := iaas.CreateMachineForIaaS(metadata["iaas"], metadata)
	if err != nil {
		return nil, fmt.Errorf("unable to create machine: %s", err.Error())
	}
	newAddr := machine.FormatNodeAddress()
	event.logMsg("new machine created: %s - Waiting for docker to start...", newAddr)
	createdNode := cluster.Node{
		Address:        newAddr,
		Metadata:       metadata,
		CreationStatus: cluster.NodeCreationStatusPending,
	}
	err = a.provisioner.Cluster().Register(createdNode)
	if err != nil {
		machine.Destroy()
		return nil, fmt.Errorf("error registering new node %s: %s", newAddr, err.Error())
	}
	q, err := queue.Queue()
	if err == nil {
		jobParams := monsterqueue.JobParams{
			"endpoint": createdNode.Address,
			"machine":  machine.Id,
			"metadata": createdNode.Metadata,
		}
		var job monsterqueue.Job
		job, err = q.EnqueueWait(bs.QueueTaskName, jobParams, a.WaitTimeNewMachine)
		if err == nil {
			_, err = job.Result()
		}
	}
	if err != nil {
		machine.Destroy()
		a.provisioner.Cluster().Unregister(newAddr)
		return nil, fmt.Errorf("error running bs task: %s", err)
	}
	event.logMsg("new machine created: %s - started!", newAddr)
	return &createdNode, nil
}
Exemplo n.º 8
0
func (s *S) TestHealerHealNode(c *check.C) {
	factory, iaasInst := iaasTesting.NewHealerIaaSConstructorWithInst("addr1")
	iaas.RegisterIaasProvider("my-healer-iaas", factory)
	_, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{})
	c.Assert(err, check.IsNil)
	iaasInst.Addr = "addr2"
	config.Set("iaas:node-protocol", "http")
	config.Set("iaas:node-port", 2)
	defer config.Unset("iaas:node-protocol")
	defer config.Unset("iaas:node-port")
	p := provisiontest.ProvisionerInstance
	err = p.AddNode(provision.AddNodeOptions{
		Address:  "http://addr1:1",
		Metadata: map[string]string{"iaas": "my-healer-iaas"},
	})
	c.Assert(err, check.IsNil)

	healer := newNodeHealer(nodeHealerArgs{
		FailuresBeforeHealing: 1,
		WaitTimeNewMachine:    time.Minute,
	})
	healer.Shutdown()
	nodes, err := p.ListNodes(nil)
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(nodes[0].Address(), check.Equals, "http://addr1:1")

	machines, err := iaas.ListMachines()
	c.Assert(err, check.IsNil)
	c.Assert(machines, check.HasLen, 1)
	c.Assert(machines[0].Address, check.Equals, "addr1")

	created, err := healer.healNode(nodes[0])
	c.Assert(err, check.IsNil)
	c.Assert(created.Address, check.Equals, "http://addr2:2")
	nodes, err = p.ListNodes(nil)
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(nodes[0].Address(), check.Equals, "http://addr2:2")

	machines, err = iaas.ListMachines()
	c.Assert(err, check.IsNil)
	c.Assert(machines, check.HasLen, 1)
	c.Assert(machines[0].Address, check.Equals, "addr2")
}
Exemplo n.º 9
0
func (s *HandlersSuite) TestRemoveNodeHandlerRemoveIaaS(c *check.C) {
	iaas.RegisterIaasProvider("my-xxx-iaas", newTestIaaS)
	machine, err := iaas.CreateMachineForIaaS("my-xxx-iaas", map[string]string{})
	c.Assert(err, check.IsNil)
	mainDockerProvisioner.cluster, err = cluster.New(nil, &cluster.MapStorage{})
	c.Assert(err, check.IsNil)
	_, err = mainDockerProvisioner.getCluster().Register(fmt.Sprintf("http://%s:2375", machine.Address), nil)
	c.Assert(err, check.IsNil)
	b := bytes.NewBufferString(fmt.Sprintf(`{"address": "http://%s:2375", "remove_iaas": "true"}`, machine.Address))
	req, err := http.NewRequest("POST", "/node/remove", b)
	c.Assert(err, check.IsNil)
	rec := httptest.NewRecorder()
	err = removeNodeHandler(rec, req, nil)
	c.Assert(err, check.IsNil)
	nodes, err := mainDockerProvisioner.getCluster().Nodes()
	c.Assert(len(nodes), check.Equals, 0)
	_, err = iaas.FindMachineById(machine.Id)
	c.Assert(err, check.Equals, mgo.ErrNotFound)
}
Exemplo n.º 10
0
func (s *S) TestHealerHealNodeWaitAndRegisterError(c *check.C) {
	iaas.RegisterIaasProvider("my-healer-iaas", dockertest.NewHealerIaaSConstructor("127.0.0.1", nil))
	_, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{})
	c.Assert(err, check.IsNil)
	iaas.RegisterIaasProvider("my-healer-iaas", dockertest.NewHealerIaaSConstructor("localhost", nil))
	node1, err := testing.NewServer("127.0.0.1:0", nil, nil)
	c.Assert(err, check.IsNil)
	node2, err := testing.NewServer("127.0.0.1:0", nil, nil)
	c.Assert(err, check.IsNil)
	node2.PrepareFailure("ping-failure", "/_ping")
	config.Set("iaas:node-protocol", "http")
	config.Set("iaas:node-port", dockertest.URLPort(node2.URL()))
	defer config.Unset("iaas:node-protocol")
	defer config.Unset("iaas:node-port")
	p, err := s.newFakeDockerProvisioner(node1.URL())
	c.Assert(err, check.IsNil)
	defer p.Destroy()
	node1.PrepareFailure("pingErr", "/_ping")
	p.Cluster().StartActiveMonitoring(100 * time.Millisecond)
	time.Sleep(300 * time.Millisecond)
	p.Cluster().StopActiveMonitoring()
	healer := NewNodeHealer(NodeHealerArgs{
		Provisioner:           p,
		FailuresBeforeHealing: 1,
		WaitTimeNewMachine:    time.Second,
	})
	nodes, err := p.Cluster().UnfilteredNodes()
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(dockertest.URLPort(nodes[0].Address), check.Equals, dockertest.URLPort(node1.URL()))
	c.Assert(net.URLToHost(nodes[0].Address), check.Equals, "127.0.0.1")
	c.Assert(nodes[0].FailureCount() > 0, check.Equals, true)
	nodes[0].Metadata["iaas"] = "my-healer-iaas"
	created, err := healer.healNode(&nodes[0])
	c.Assert(err, check.ErrorMatches, ".*timeout waiting for result.*")
	c.Assert(created.Address, check.Equals, "")
	c.Assert(nodes[0].FailureCount(), check.Equals, 0)
	nodes, err = p.Cluster().Nodes()
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(dockertest.URLPort(nodes[0].Address), check.Equals, dockertest.URLPort(node1.URL()))
	c.Assert(net.URLToHost(nodes[0].Address), check.Equals, "127.0.0.1")
}
Exemplo n.º 11
0
func (s *S) TestMachinesDestroy(c *check.C) {
	iaas.RegisterIaasProvider("test-iaas", newTestIaaS)
	_, err := iaas.CreateMachineForIaaS("test-iaas", map[string]string{"id": "myid1"})
	c.Assert(err, check.IsNil)
	recorder := httptest.NewRecorder()
	request, err := http.NewRequest("DELETE", "/iaas/machines/myid1", nil)
	c.Assert(err, check.IsNil)
	request.Header.Set("Authorization", "bearer "+s.token.GetValue())
	m := RunServer(true)
	m.ServeHTTP(recorder, request)
	c.Assert(recorder.Code, check.Equals, http.StatusOK)
	c.Assert(eventtest.EventDesc{
		Target: event.Target{Type: event.TargetTypeIaas, Value: "test-iaas"},
		Owner:  s.token.GetUserName(),
		Kind:   "machine.delete",
		StartCustomData: []map[string]interface{}{
			{"name": ":machine_id", "value": "myid1"},
		},
	}, eventtest.HasEvent)
}
Exemplo n.º 12
0
func (s *HandlersSuite) TestRemoveNodeHandlerWithoutRemoveIaaS(c *gocheck.C) {
	iaas.RegisterIaasProvider("some-iaas", TestIaaS{})
	machine, err := iaas.CreateMachineForIaaS("some-iaas", map[string]string{})
	c.Assert(err, gocheck.IsNil)
	dCluster, err = cluster.New(nil, &cluster.MapStorage{})
	c.Assert(err, gocheck.IsNil)
	err = dCluster.Register(fmt.Sprintf("http://%s:4243", machine.Address), nil)
	c.Assert(err, gocheck.IsNil)
	b := bytes.NewBufferString(fmt.Sprintf(`{"address": "http://%s:4243", "remove_iaas": "false"}`, machine.Address))
	req, err := http.NewRequest("POST", "/node/remove", b)
	c.Assert(err, gocheck.IsNil)
	rec := httptest.NewRecorder()
	err = removeNodeHandler(rec, req, nil)
	c.Assert(err, gocheck.IsNil)
	nodes, err := dCluster.Nodes()
	c.Assert(len(nodes), gocheck.Equals, 0)
	dbM, err := iaas.FindMachineById(machine.Id)
	c.Assert(err, gocheck.IsNil)
	c.Assert(dbM.Id, gocheck.Equals, machine.Id)
}
Exemplo n.º 13
0
func (h *nodeHealer) healNode(node *cluster.Node) (cluster.Node, error) {
	emptyNode := cluster.Node{}
	failingAddr := node.Address
	nodeMetadata := node.CleanMetadata()
	failingHost := urlToHost(failingAddr)
	failures := node.FailureCount()
	machine, err := iaas.CreateMachineForIaaS(nodeMetadata["iaas"], nodeMetadata)
	if err != nil {
		node.ResetFailures()
		return emptyNode, fmt.Errorf("Can't auto-heal after %d failures for node %s: error creating new machine: %s", failures, failingHost, err.Error())
	}
	err = h.provisioner.getCluster().Unregister(failingAddr)
	if err != nil {
		machine.Destroy()
		return emptyNode, fmt.Errorf("Can't auto-heal after %d failures for node %s: error unregistering old node: %s", failures, failingHost, err.Error())
	}
	newAddr := machine.FormatNodeAddress()
	log.Debugf("New machine created during healing process: %s - Waiting for docker to start...", newAddr)
	createdNode, err := h.provisioner.getCluster().WaitAndRegister(newAddr, nodeMetadata, h.waitTimeNewMachine)
	if err != nil {
		node.ResetFailures()
		h.provisioner.getCluster().Register(failingAddr, nodeMetadata)
		machine.Destroy()
		return emptyNode, fmt.Errorf("Can't auto-heal after %d failures for node %s: error registering new node: %s", failures, failingHost, err.Error())
	}
	var buf bytes.Buffer
	err = h.provisioner.moveContainers(failingHost, "", &buf)
	if err != nil {
		log.Errorf("Unable to move containers, skipping containers healing %q -> %q: %s: %s", failingHost, machine.Address, err.Error(), buf.String())
	}
	failingMachine, err := iaas.FindMachineByIdOrAddress(node.Metadata["iaas-id"], failingHost)
	if err != nil {
		return createdNode, fmt.Errorf("Unable to find failing machine %s in IaaS: %s", failingHost, err.Error())
	}
	err = failingMachine.Destroy()
	if err != nil {
		return createdNode, fmt.Errorf("Unable to destroy machine %s from IaaS: %s", failingHost, err.Error())
	}
	log.Debugf("Done auto-healing node %q, node %q created in its place.", failingHost, machine.Address)
	return createdNode, nil
}
Exemplo n.º 14
0
func addNodeForParams(params map[string]string, isRegister bool) (map[string]string, error) {
	response := make(map[string]string)
	var address string
	if isRegister {
		address, _ = params["address"]
		delete(params, "address")
	} else {
		iaasName, _ := params["iaas"]
		desc, err := iaas.Describe(iaasName)
		if err != nil {
			return response, err
		}
		response["description"] = desc
		var m *iaas.Machine
		if iaasName != "" {
			m, err = iaas.CreateMachineForIaaS(iaasName, params)
		} else {
			m, err = iaas.CreateMachine(params)
		}
		if err != nil {
			return response, err
		}
		nodeAddress, err := m.FormatNodeAddress()
		if err != nil {
			return response, err
		}
		params["iaas"] = m.Iaas
		address = nodeAddress
	}
	err := validateNodeAddress(address)
	if err != nil {
		return response, err
	}
	err = dockerCluster().Register(address, params)
	if err != nil {
		return response, err
	}
	return response, err
}
Exemplo n.º 15
0
func (s *S) TestHealerHealNodeCreateMachineError(c *check.C) {
	factory, iaasInst := dockertest.NewHealerIaaSConstructorWithInst("127.0.0.1")
	iaas.RegisterIaasProvider("my-healer-iaas", factory)
	_, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{})
	c.Assert(err, check.IsNil)
	iaasInst.Addr = "localhost"
	iaasInst.Err = fmt.Errorf("my create machine error")
	node1, err := testing.NewServer("127.0.0.1:0", nil, nil)
	c.Assert(err, check.IsNil)
	p, err := s.newFakeDockerProvisioner(node1.URL())
	c.Assert(err, check.IsNil)
	defer p.Destroy()
	node1.PrepareFailure("pingErr", "/_ping")
	p.Cluster().StartActiveMonitoring(100 * time.Millisecond)
	time.Sleep(300 * time.Millisecond)
	p.Cluster().StopActiveMonitoring()
	healer := NewNodeHealer(NodeHealerArgs{
		Provisioner:           p,
		FailuresBeforeHealing: 1,
		WaitTimeNewMachine:    time.Minute,
	})
	nodes, err := p.Cluster().UnfilteredNodes()
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(dockertest.URLPort(nodes[0].Address), check.Equals, dockertest.URLPort(node1.URL()))
	c.Assert(net.URLToHost(nodes[0].Address), check.Equals, "127.0.0.1")
	c.Assert(nodes[0].FailureCount() > 0, check.Equals, true)
	nodes[0].Metadata["iaas"] = "my-healer-iaas"
	created, err := healer.healNode(&nodes[0])
	c.Assert(err, check.ErrorMatches, ".*my create machine error.*")
	c.Assert(created.Address, check.Equals, "")
	c.Assert(nodes[0].FailureCount(), check.Equals, 0)
	nodes, err = p.Cluster().UnfilteredNodes()
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(dockertest.URLPort(nodes[0].Address), check.Equals, dockertest.URLPort(node1.URL()))
	c.Assert(net.URLToHost(nodes[0].Address), check.Equals, "127.0.0.1")
}
Exemplo n.º 16
0
func (a *autoScaleConfig) addNode(evt *event.Event, modelNodes []*cluster.Node) (*cluster.Node, error) {
	metadata, err := chooseMetadataFromNodes(modelNodes)
	if err != nil {
		return nil, err
	}
	_, hasIaas := metadata["iaas"]
	if !hasIaas {
		return nil, errors.Errorf("no IaaS information in nodes metadata: %#v", metadata)
	}
	machine, err := iaas.CreateMachineForIaaS(metadata["iaas"], metadata)
	if err != nil {
		return nil, errors.Wrap(err, "unable to create machine")
	}
	newAddr := machine.FormatNodeAddress()
	evt.Logf("new machine created: %s - Waiting for docker to start...", newAddr)
	createOpts := provision.AddNodeOptions{
		Address:    newAddr,
		Metadata:   metadata,
		WaitTO:     a.WaitTimeNewMachine,
		CaCert:     machine.CaCert,
		ClientCert: machine.ClientCert,
		ClientKey:  machine.ClientKey,
	}
	err = a.provisioner.AddNode(createOpts)
	if err != nil {
		machine.Destroy()
		a.provisioner.Cluster().Unregister(newAddr)
		return nil, errors.Wrapf(err, "error adding new node %s", newAddr)
	}
	createdNode, err := a.provisioner.Cluster().GetNode(newAddr)
	if err != nil {
		machine.Destroy()
		a.provisioner.Cluster().Unregister(newAddr)
		return nil, errors.Wrapf(err, "error retrieving new node %s", newAddr)
	}
	evt.Logf("new machine created: %s - started!", newAddr)
	return &createdNode, nil
}
Exemplo n.º 17
0
func (s *S) TestTryHealingNodeDoubleCheck(c *check.C) {
	factory, iaasInst := iaasTesting.NewHealerIaaSConstructorWithInst("addr1")
	iaas.RegisterIaasProvider("my-healer-iaas", factory)
	_, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{})
	c.Assert(err, check.IsNil)
	iaasInst.Addr = "addr2"
	config.Set("iaas:node-protocol", "http")
	config.Set("iaas:node-port", 2)
	defer config.Unset("iaas:node-protocol")
	defer config.Unset("iaas:node-port")
	p := provisiontest.ProvisionerInstance
	err = p.AddNode(provision.AddNodeOptions{
		Address:  "http://addr1:1",
		Metadata: map[string]string{"iaas": "my-healer-iaas"},
	})
	c.Assert(err, check.IsNil)
	healer := newNodeHealer(nodeHealerArgs{
		FailuresBeforeHealing: 1,
		WaitTimeNewMachine:    time.Minute,
	})
	healer.started = time.Now().Add(-3 * time.Second)
	nodes, err := p.ListNodes(nil)
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	healErr := healer.tryHealingNode(nodes[0], "something", nil)
	c.Assert(healErr, check.IsNil)
	nodes, err = p.ListNodes(nil)
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(nodes[0].Address(), check.Equals, "http://addr1:1")
	machines, err := iaas.ListMachines()
	c.Assert(err, check.IsNil)
	c.Assert(machines, check.HasLen, 1)
	c.Assert(machines[0].Address, check.Equals, "addr1")
	c.Assert(eventtest.EventDesc{
		IsEmpty: true,
	}, eventtest.HasEvent)
}
Exemplo n.º 18
0
func (s *S) TestRemoveNodeHandlerWithRemoveIaaS(c *check.C) {
	iaas.RegisterIaasProvider("some-iaas", newTestIaaS)
	machine, err := iaas.CreateMachineForIaaS("some-iaas", map[string]string{"id": "m1"})
	c.Assert(err, check.IsNil)
	err = s.provisioner.AddNode(provision.AddNodeOptions{
		Address: fmt.Sprintf("http://%s:2375", machine.Address),
	})
	c.Assert(err, check.IsNil)
	u := fmt.Sprintf("/node/http://%s:2375?remove-iaas=true", machine.Address)
	req, err := http.NewRequest("DELETE", u, nil)
	c.Assert(err, check.IsNil)
	req.Header.Set("Authorization", "bearer "+s.token.GetValue())
	rec := httptest.NewRecorder()
	server := RunServer(true)
	server.ServeHTTP(rec, req)
	c.Assert(rec.Body.String(), check.Equals, "rebalancing...remove done!")
	c.Assert(rec.Code, check.Equals, http.StatusOK)
	nodes, err := s.provisioner.ListNodes(nil)
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 0)
	_, err = iaas.FindMachineById(machine.Id)
	c.Assert(err, check.Equals, mgo.ErrNotFound)
}
Exemplo n.º 19
0
func (a *autoScaleConfig) addNode(modelNodes []*cluster.Node) (*cluster.Node, error) {
	metadata, err := chooseMetadataFromNodes(modelNodes)
	if err != nil {
		return nil, err
	}
	_, hasIaas := metadata["iaas"]
	if !hasIaas {
		return nil, fmt.Errorf("no IaaS information in nodes metadata: %#v", metadata)
	}
	machine, err := iaas.CreateMachineForIaaS(metadata["iaas"], metadata)
	if err != nil {
		return nil, fmt.Errorf("unable to create machine: %s", err.Error())
	}
	newAddr := machine.FormatNodeAddress()
	a.logDebug("new machine created: %s - Waiting for docker to start...", newAddr)
	createdNode, err := a.provisioner.getCluster().WaitAndRegister(newAddr, metadata, a.waitTimeNewMachine)
	if err != nil {
		machine.Destroy()
		return nil, fmt.Errorf("error registering new node %s: %s", newAddr, err.Error())
	}
	a.logDebug("new machine created: %s - started!", newAddr)
	return &createdNode, nil
}
Exemplo n.º 20
0
func (s *S) TestHealerHealNodeCreateMachineError(c *check.C) {
	factory, iaasInst := iaasTesting.NewHealerIaaSConstructorWithInst("addr1")
	iaas.RegisterIaasProvider("my-healer-iaas", factory)
	_, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{})
	c.Assert(err, check.IsNil)
	iaasInst.Addr = "addr2"
	iaasInst.Err = fmt.Errorf("my create machine error")

	p := provisiontest.ProvisionerInstance
	err = p.AddNode(provision.AddNodeOptions{
		Address:  "http://addr1:1",
		Metadata: map[string]string{"iaas": "my-healer-iaas"},
	})
	c.Assert(err, check.IsNil)

	healer := newNodeHealer(nodeHealerArgs{
		FailuresBeforeHealing: 1,
		WaitTimeNewMachine:    time.Minute,
	})
	healer.Shutdown()
	nodes, err := p.ListNodes(nil)
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(nodes[0].Address(), check.Equals, "http://addr1:1")
	fakeNode := nodes[0].(*provisiontest.FakeNode)
	fakeNode.SetHealth(1, false)
	c.Assert(fakeNode.FailureCount() > 0, check.Equals, true)
	created, err := healer.healNode(nodes[0])
	c.Assert(err, check.ErrorMatches, ".*my create machine error.*")
	c.Assert(created, check.IsNil)
	c.Assert(fakeNode.FailureCount(), check.Equals, 0)
	nodes, err = p.ListNodes(nil)
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(nodes[0].Address(), check.Equals, "http://addr1:1")
	c.Assert(nodes[0].Status(), check.Equals, "enabled")
}
Exemplo n.º 21
0
func (s *S) TestHealerHandleErrorFailureEvent(c *check.C) {
	factory, iaasInst := iaasTesting.NewHealerIaaSConstructorWithInst("addr1")
	iaas.RegisterIaasProvider("my-healer-iaas", factory)
	_, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{})
	c.Assert(err, check.IsNil)
	iaasInst.Addr = "addr2"
	config.Set("iaas:node-protocol", "http")
	config.Set("iaas:node-port", 2)
	defer config.Unset("iaas:node-protocol")
	defer config.Unset("iaas:node-port")
	p := provisiontest.ProvisionerInstance
	err = p.AddNode(provision.AddNodeOptions{
		Address:  "http://addr1:1",
		Metadata: map[string]string{"iaas": "my-healer-iaas"},
	})
	c.Assert(err, check.IsNil)
	node, err := p.GetNode("http://addr1:1")
	c.Assert(err, check.IsNil)

	healer := newNodeHealer(nodeHealerArgs{
		FailuresBeforeHealing: 1,
		WaitTimeNewMachine:    time.Minute,
	})
	healer.Shutdown()
	healer.started = time.Now().Add(-3 * time.Second)
	conf := healerConfig()
	err = conf.SaveBase(NodeHealerConfig{Enabled: boolPtr(true), MaxUnresponsiveTime: intPtr(1)})
	c.Assert(err, check.IsNil)
	err = healer.UpdateNodeData(node, []provision.NodeCheckResult{})
	c.Assert(err, check.IsNil)
	time.Sleep(1200 * time.Millisecond)
	nodes, err := p.ListNodes(nil)
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(nodes[0].Address(), check.Equals, "http://addr1:1")

	machines, err := iaas.ListMachines()
	c.Assert(err, check.IsNil)
	c.Assert(machines, check.HasLen, 1)
	c.Assert(machines[0].Address, check.Equals, "addr1")

	p.PrepareFailure("AddNode", fmt.Errorf("error registering new node"))
	nodes[0].(*provisiontest.FakeNode).SetHealth(2, true)

	waitTime := healer.HandleError(nodes[0].(provision.NodeHealthChecker))
	c.Assert(waitTime, check.Equals, time.Duration(0))

	nodes, err = p.ListNodes(nil)
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(nodes[0].Address(), check.Equals, "http://addr1:1")

	machines, err = iaas.ListMachines()
	c.Assert(err, check.IsNil)
	c.Assert(machines, check.HasLen, 1)
	c.Assert(machines[0].Address, check.Equals, "addr1")

	c.Assert(eventtest.EventDesc{
		Target: event.Target{Type: "node", Value: "http://addr1:1"},
		Kind:   "healer",
		StartCustomData: map[string]interface{}{
			"reason":   "2 consecutive failures",
			"node._id": "http://addr1:1",
		},
		ErrorMatches: `Can't auto-heal after 2 failures for node addr1: error registering new node: error registering new node`,
	}, eventtest.HasEvent)
}
Exemplo n.º 22
0
func (s *S) TestCheckActiveHealing(c *check.C) {
	conf := healerConfig()
	err := conf.SaveBase(NodeHealerConfig{Enabled: boolPtr(true), MaxUnresponsiveTime: intPtr(1)})
	c.Assert(err, check.IsNil)
	factory, iaasInst := iaasTesting.NewHealerIaaSConstructorWithInst("addr1")
	iaas.RegisterIaasProvider("my-healer-iaas", factory)
	_, err = iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{})
	c.Assert(err, check.IsNil)
	iaasInst.Addr = "addr2"
	config.Set("iaas:node-protocol", "http")
	config.Set("iaas:node-port", 2)
	defer config.Unset("iaas:node-protocol")
	defer config.Unset("iaas:node-port")

	p := provisiontest.ProvisionerInstance
	err = p.AddNode(provision.AddNodeOptions{
		Address:  "http://addr1:1",
		Metadata: map[string]string{"iaas": "my-healer-iaas"},
	})
	c.Assert(err, check.IsNil)
	node, err := p.GetNode("http://addr1:1")
	c.Assert(err, check.IsNil)

	healer := newNodeHealer(nodeHealerArgs{
		WaitTimeNewMachine: time.Minute,
	})
	healer.Shutdown()
	healer.started = time.Now().Add(-3 * time.Second)

	err = healer.UpdateNodeData(node, []provision.NodeCheckResult{})
	c.Assert(err, check.IsNil)
	time.Sleep(1200 * time.Millisecond)

	nodes, err := p.ListNodes(nil)
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(nodes[0].Address(), check.Equals, "http://addr1:1")

	machines, err := iaas.ListMachines()
	c.Assert(err, check.IsNil)
	c.Assert(machines, check.HasLen, 1)
	c.Assert(machines[0].Address, check.Equals, "addr1")

	healer.runActiveHealing()

	nodes, err = p.ListNodes(nil)
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(nodes[0].Address(), check.Equals, "http://addr2:2")

	machines, err = iaas.ListMachines()
	c.Assert(err, check.IsNil)
	c.Assert(machines, check.HasLen, 1)
	c.Assert(machines[0].Address, check.Equals, "addr2")

	c.Assert(eventtest.EventDesc{
		Target: event.Target{Type: "node", Value: "http://addr1:1"},
		Kind:   "healer",
		StartCustomData: map[string]interface{}{
			"reason":         bson.M{"$regex": `last update \d+\.\d*?s ago, last success \d+\.\d*?s ago`},
			"lastcheck.time": bson.M{"$exists": true},
			"node._id":       "http://addr1:1",
		},
		EndCustomData: map[string]interface{}{
			"_id": "http://addr2:2",
		},
	}, eventtest.HasEvent)
}
func (h *Healer) HandleError(node cluster.Node) time.Duration {
	defaultWait := 1 * time.Minute
	failures := node.FailureCount()
	if failures < 5 {
		return defaultWait
	}
	failingAddr := node.Address
	failingHost := urlToHost(failingAddr)
	containers, err := listContainersByHost(failingHost)
	if err != nil {
		log.Errorf("Error in cluster healer, trying to list containers: %s", err.Error())
		return defaultWait
	}
	// Empty host let's just try again in the future
	if len(containers) == 0 {
		return defaultWait
	}
	iaasName, hasIaas := node.Metadata["iaas"]
	if !hasIaas {
		log.Errorf("Can't auto-heal after %d failures for node %s: no IaaS information.", failures, failingHost)
		return defaultWait
	}
	machine, err := iaas.CreateMachineForIaaS(iaasName, node.Metadata)
	if err != nil {
		log.Errorf("Can't auto-heal after %d failures for node %s: error creating new machine: %s", failures, failingHost, err.Error())
		return defaultWait
	}
	newAddr, err := machine.FormatNodeAddress()
	if err != nil {
		log.Errorf("Can't auto-heal after %d failures for node %s: error formatting address: %s", failures, failingHost, err.Error())
		machine.Destroy()
		return defaultWait
	}
	cluster := dockerCluster()
	err = cluster.Unregister(failingAddr)
	if err != nil {
		log.Errorf("Can't auto-heal after %d failures for node %s: error unregistering old node: %s", failures, failingHost, err.Error())
		return defaultWait
	}
	err = cluster.WaitAndRegister(newAddr, node.Metadata, 2*time.Minute)
	if err != nil {
		log.Errorf("Can't auto-heal after %d failures for node %s: error registering new node: %s", failures, failingHost, err.Error())
		machine.Destroy()
		return defaultWait
	}
	var buf bytes.Buffer
	encoder := json.NewEncoder(&buf)
	err = moveContainers(failingHost, machine.Address, encoder)
	if err != nil {
		log.Errorf("Unable to move containers from: %s to: %s - %s", failingHost, machine.Address, err.Error())
		return 0
	}
	failingMachine, err := iaas.FindMachineByAddress(failingHost)
	if err != nil {
		log.Errorf("Unable to find failing machine %s in IaaS", failingHost)
		return 0
	}
	err = failingMachine.Destroy()
	if err != nil {
		log.Errorf("Unable to find destroy machine %s from IaaS", failingHost)
	}
	return 0
}
Exemplo n.º 24
0
func (s *S) TestTryHealingNodeConcurrent(c *check.C) {
	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(10))
	factory, iaasInst := iaasTesting.NewHealerIaaSConstructorWithInst("addr1")
	iaas.RegisterIaasProvider("my-healer-iaas", factory)
	_, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{})
	c.Assert(err, check.IsNil)
	iaasInst.Addr = "addr2"
	config.Set("iaas:node-protocol", "http")
	config.Set("iaas:node-port", 2)
	defer config.Unset("iaas:node-protocol")
	defer config.Unset("iaas:node-port")
	p := provisiontest.ProvisionerInstance
	err = p.AddNode(provision.AddNodeOptions{
		Address:  "http://addr1:1",
		Metadata: map[string]string{"iaas": "my-healer-iaas"},
	})
	c.Assert(err, check.IsNil)
	node, err := p.GetNode("http://addr1:1")
	c.Assert(err, check.IsNil)
	healer := newNodeHealer(nodeHealerArgs{
		FailuresBeforeHealing: 1,
		WaitTimeNewMachine:    time.Minute,
	})
	healer.started = time.Now().Add(-3 * time.Second)
	conf := healerConfig()
	err = conf.SaveBase(NodeHealerConfig{Enabled: boolPtr(true), MaxUnresponsiveTime: intPtr(1)})
	c.Assert(err, check.IsNil)
	err = healer.UpdateNodeData(node, []provision.NodeCheckResult{})
	c.Assert(err, check.IsNil)
	time.Sleep(1200 * time.Millisecond)
	nodes, err := p.ListNodes(nil)
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(nodes[0].Address(), check.Equals, "http://addr1:1")
	machines, err := iaas.ListMachines()
	c.Assert(err, check.IsNil)
	c.Assert(machines, check.HasLen, 1)
	c.Assert(machines[0].Address, check.Equals, "addr1")
	wg := sync.WaitGroup{}
	for i := 0; i < 100; i++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			healErr := healer.tryHealingNode(nodes[0], "something", nil)
			c.Assert(healErr, check.IsNil)
		}()
	}
	wg.Wait()
	nodes, err = p.ListNodes(nil)
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(nodes[0].Address(), check.Equals, "http://addr2:2")
	machines, err = iaas.ListMachines()
	c.Assert(err, check.IsNil)
	c.Assert(machines, check.HasLen, 1)
	c.Assert(machines[0].Address, check.Equals, "addr2")
	c.Assert(eventtest.EventDesc{
		Target: event.Target{Type: "node", Value: "http://addr1:1"},
		Kind:   "healer",
		StartCustomData: map[string]interface{}{
			"reason":   "something",
			"node._id": "http://addr1:1",
		},
		EndCustomData: map[string]interface{}{
			"_id": "http://addr2:2",
		},
	}, eventtest.HasEvent)
}
Exemplo n.º 25
0
func (h *NodeHealer) healNode(node provision.Node) (*provision.NodeSpec, error) {
	failingAddr := node.Address()
	// Copy metadata to ensure underlying data structure is not modified.
	newNodeMetadata := map[string]string{}
	for k, v := range node.Metadata() {
		newNodeMetadata[k] = v
	}
	failingHost := net.URLToHost(failingAddr)
	healthNode, isHealthNode := node.(provision.NodeHealthChecker)
	failures := 0
	if isHealthNode {
		failures = healthNode.FailureCount()
	}
	machine, err := iaas.CreateMachineForIaaS(newNodeMetadata["iaas"], newNodeMetadata)
	if err != nil {
		if isHealthNode {
			healthNode.ResetFailures()
		}
		return nil, errors.Wrapf(err, "Can't auto-heal after %d failures for node %s: error creating new machine", failures, failingHost)
	}
	err = node.Provisioner().UpdateNode(provision.UpdateNodeOptions{
		Address: failingAddr,
		Disable: true,
	})
	if err != nil {
		machine.Destroy()
		return nil, errors.Wrapf(err, "Can't auto-heal after %d failures for node %s: error unregistering old node", failures, failingHost)
	}
	newAddr := machine.FormatNodeAddress()
	log.Debugf("New machine created during healing process: %s - Waiting for docker to start...", newAddr)
	createOpts := provision.AddNodeOptions{
		Address:    newAddr,
		Metadata:   newNodeMetadata,
		WaitTO:     h.waitTimeNewMachine,
		CaCert:     machine.CaCert,
		ClientCert: machine.ClientCert,
		ClientKey:  machine.ClientKey,
	}
	err = node.Provisioner().AddNode(createOpts)
	if err != nil {
		if isHealthNode {
			healthNode.ResetFailures()
		}
		node.Provisioner().UpdateNode(provision.UpdateNodeOptions{Address: failingAddr, Enable: true})
		machine.Destroy()
		return nil, errors.Wrapf(err, "Can't auto-heal after %d failures for node %s: error registering new node", failures, failingHost)
	}
	nodeSpec := provision.NodeToSpec(node)
	nodeSpec.Address = newAddr
	nodeSpec.Metadata = newNodeMetadata
	var buf bytes.Buffer
	err = node.Provisioner().RemoveNode(provision.RemoveNodeOptions{
		Address:   failingAddr,
		Rebalance: true,
		Writer:    &buf,
	})
	if err != nil {
		log.Errorf("Unable to move containers, skipping containers healing %q -> %q: %s: %s", failingHost, machine.Address, err, buf.String())
	}
	failingMachine, err := iaas.FindMachineByIdOrAddress(node.Metadata()["iaas-id"], failingHost)
	if err != nil {
		return &nodeSpec, errors.Wrapf(err, "Unable to find failing machine %s in IaaS", failingHost)
	}
	err = failingMachine.Destroy()
	if err != nil {
		return &nodeSpec, errors.Wrapf(err, "Unable to destroy machine %s from IaaS", failingHost)
	}
	log.Debugf("Done auto-healing node %q, node %q created in its place.", failingHost, machine.Address)
	return &nodeSpec, nil
}
Exemplo n.º 26
0
func (s *S) TestHealerHealNode(c *check.C) {
	factory, iaasInst := dockertest.NewHealerIaaSConstructorWithInst("127.0.0.1")
	iaas.RegisterIaasProvider("my-healer-iaas", factory)
	_, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{})
	c.Assert(err, check.IsNil)
	iaasInst.Addr = "localhost"
	node1, err := testing.NewServer("127.0.0.1:0", nil, nil)
	c.Assert(err, check.IsNil)
	node2, err := testing.NewServer("127.0.0.1:0", nil, nil)
	c.Assert(err, check.IsNil)
	config.Set("iaas:node-protocol", "http")
	config.Set("iaas:node-port", dockertest.URLPort(node2.URL()))
	defer config.Unset("iaas:node-protocol")
	defer config.Unset("iaas:node-port")
	p, err := s.newFakeDockerProvisioner(node1.URL())
	c.Assert(err, check.IsNil)
	defer p.Destroy()
	app := provisiontest.NewFakeApp("myapp", "python", 0)
	_, err = p.StartContainers(dockertest.StartContainersArgs{
		Endpoint:  node1.URL(),
		App:       app,
		Amount:    map[string]int{"web": 1},
		Image:     "tsuru/python",
		PullImage: true,
	})
	c.Assert(err, check.IsNil)

	healer := NewNodeHealer(NodeHealerArgs{
		Provisioner:           p,
		FailuresBeforeHealing: 1,
		WaitTimeNewMachine:    time.Minute,
	})
	nodes, err := p.Cluster().UnfilteredNodes()
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(dockertest.URLPort(nodes[0].Address), check.Equals, dockertest.URLPort(node1.URL()))
	c.Assert(net.URLToHost(nodes[0].Address), check.Equals, "127.0.0.1")

	containers := p.AllContainers()
	c.Assert(err, check.IsNil)
	c.Assert(containers, check.HasLen, 1)
	c.Assert(containers[0].HostAddr, check.Equals, "127.0.0.1")

	machines, err := iaas.ListMachines()
	c.Assert(err, check.IsNil)
	c.Assert(machines, check.HasLen, 1)
	c.Assert(machines[0].Address, check.Equals, "127.0.0.1")

	nodes[0].Metadata["iaas"] = "my-healer-iaas"
	created, err := healer.healNode(&nodes[0])
	c.Assert(err, check.IsNil)
	c.Assert(created.Address, check.Equals, fmt.Sprintf("http://localhost:%d", dockertest.URLPort(node2.URL())))
	nodes, err = p.Cluster().UnfilteredNodes()
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(dockertest.URLPort(nodes[0].Address), check.Equals, dockertest.URLPort(node2.URL()))
	c.Assert(net.URLToHost(nodes[0].Address), check.Equals, "localhost")

	machines, err = iaas.ListMachines()
	c.Assert(err, check.IsNil)
	c.Assert(machines, check.HasLen, 1)
	c.Assert(machines[0].Address, check.Equals, "localhost")

	err = tsurutest.WaitCondition(5*time.Second, func() bool {
		containers := p.AllContainers()
		return len(containers) == 1 && containers[0].HostAddr == "localhost"
	})
	c.Assert(err, check.IsNil)
}
Exemplo n.º 27
0
func (s *S) TestHealerHandleError(c *check.C) {
	factory, iaasInst := dockertest.NewHealerIaaSConstructorWithInst("127.0.0.1")
	iaas.RegisterIaasProvider("my-healer-iaas", factory)
	_, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{})
	c.Assert(err, check.IsNil)
	iaasInst.Addr = "localhost"
	node1, err := testing.NewServer("127.0.0.1:0", nil, nil)
	c.Assert(err, check.IsNil)
	node2, err := testing.NewServer("127.0.0.1:0", nil, nil)
	c.Assert(err, check.IsNil)
	config.Set("iaas:node-protocol", "http")
	config.Set("iaas:node-port", dockertest.URLPort(node2.URL()))
	defer config.Unset("iaas:node-protocol")
	defer config.Unset("iaas:node-port")
	p, err := s.newFakeDockerProvisioner(node1.URL())
	c.Assert(err, check.IsNil)
	defer p.Destroy()

	app := provisiontest.NewFakeApp("myapp", "python", 0)
	_, err = p.StartContainers(dockertest.StartContainersArgs{
		Endpoint:  node1.URL(),
		App:       app,
		Amount:    map[string]int{"web": 1},
		Image:     "tsuru/python",
		PullImage: true,
	})
	c.Assert(err, check.IsNil)

	healer := NewNodeHealer(NodeHealerArgs{
		Provisioner:           p,
		FailuresBeforeHealing: 1,
		WaitTimeNewMachine:    time.Minute,
	})
	nodes, err := p.Cluster().UnfilteredNodes()
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(dockertest.URLPort(nodes[0].Address), check.Equals, dockertest.URLPort(node1.URL()))
	c.Assert(net.URLToHost(nodes[0].Address), check.Equals, "127.0.0.1")

	machines, err := iaas.ListMachines()
	c.Assert(err, check.IsNil)
	c.Assert(machines, check.HasLen, 1)
	c.Assert(machines[0].Address, check.Equals, "127.0.0.1")

	nodes[0].Metadata["iaas"] = "my-healer-iaas"
	nodes[0].Metadata["Failures"] = "2"

	waitTime := healer.HandleError(&nodes[0])
	c.Assert(waitTime, check.Equals, time.Duration(0))

	nodes, err = p.Cluster().Nodes()
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(dockertest.URLPort(nodes[0].Address), check.Equals, dockertest.URLPort(node2.URL()))
	c.Assert(net.URLToHost(nodes[0].Address), check.Equals, "localhost")

	machines, err = iaas.ListMachines()
	c.Assert(err, check.IsNil)
	c.Assert(machines, check.HasLen, 1)
	c.Assert(machines[0].Address, check.Equals, "localhost")

	healingColl, err := healingCollection()
	c.Assert(err, check.IsNil)
	defer healingColl.Close()
	var events []HealingEvent
	err = healingColl.Find(nil).All(&events)
	c.Assert(err, check.IsNil)
	c.Assert(events, check.HasLen, 1)
	c.Assert(events[0].Action, check.Equals, "node-healing")
	c.Assert(events[0].StartTime, check.Not(check.DeepEquals), time.Time{})
	c.Assert(events[0].EndTime, check.Not(check.DeepEquals), time.Time{})
	c.Assert(events[0].Error, check.Equals, "")
	c.Assert(events[0].Successful, check.Equals, true)
	c.Assert(events[0].FailingNode.Address, check.Equals, fmt.Sprintf("http://127.0.0.1:%d/", dockertest.URLPort(node1.URL())))
	c.Assert(events[0].CreatedNode.Address, check.Equals, fmt.Sprintf("http://localhost:%d", dockertest.URLPort(node2.URL())))
}
Exemplo n.º 28
0
func (s *S) TestHealerHandleError(c *check.C) {
	rollback := startTestRepositoryServer()
	defer rollback()
	defer func() {
		machines, _ := iaas.ListMachines()
		for _, m := range machines {
			m.Destroy()
		}
	}()
	factory, iaasInst := newHealerIaaSConstructorWithInst("127.0.0.1")
	iaas.RegisterIaasProvider("my-healer-iaas", factory)
	_, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{})
	c.Assert(err, check.IsNil)
	iaasInst.addr = "localhost"
	node1, err := testing.NewServer("127.0.0.1:0", nil, nil)
	c.Assert(err, check.IsNil)
	node2, err := testing.NewServer("127.0.0.1:0", nil, nil)
	c.Assert(err, check.IsNil)
	config.Set("iaas:node-protocol", "http")
	config.Set("iaas:node-port", urlPort(node2.URL()))
	defer config.Unset("iaas:node-protocol")
	defer config.Unset("iaas:node-port")
	cluster, err := cluster.New(nil, &cluster.MapStorage{},
		cluster.Node{Address: node1.URL()},
	)
	c.Assert(err, check.IsNil)
	var p dockerProvisioner
	err = p.Initialize()
	c.Assert(err, check.IsNil)
	p.cluster = cluster

	appInstance := provisiontest.NewFakeApp("myapp", "python", 0)
	defer p.Destroy(appInstance)
	p.Provision(appInstance)
	imageId, err := appCurrentImageName(appInstance.GetName())
	c.Assert(err, check.IsNil)
	customData := map[string]interface{}{
		"procfile": "web: python ./myapp",
	}
	err = saveImageCustomData(imageId, customData)
	c.Assert(err, check.IsNil)
	_, err = addContainersWithHost(&changeUnitsPipelineArgs{
		toHost:      "127.0.0.1",
		toAdd:       map[string]*containersToAdd{"web": {Quantity: 1}},
		app:         appInstance,
		imageId:     imageId,
		provisioner: &p,
	})
	c.Assert(err, check.IsNil)

	conn, err := db.Conn()
	c.Assert(err, check.IsNil)
	defer conn.Close()
	appStruct := &app.App{
		Name: appInstance.GetName(),
	}
	err = conn.Apps().Insert(appStruct)
	c.Assert(err, check.IsNil)
	defer conn.Apps().Remove(bson.M{"name": appStruct.Name})

	healer := nodeHealer{
		locks:                 make(map[string]*sync.Mutex),
		provisioner:           &p,
		disabledTime:          0,
		failuresBeforeHealing: 1,
		waitTimeNewMachine:    1 * time.Second,
	}
	nodes, err := cluster.UnfilteredNodes()
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(urlPort(nodes[0].Address), check.Equals, urlPort(node1.URL()))
	c.Assert(urlToHost(nodes[0].Address), check.Equals, "127.0.0.1")

	machines, err := iaas.ListMachines()
	c.Assert(err, check.IsNil)
	c.Assert(machines, check.HasLen, 1)
	c.Assert(machines[0].Address, check.Equals, "127.0.0.1")

	nodes[0].Metadata["iaas"] = "my-healer-iaas"
	nodes[0].Metadata["Failures"] = "2"

	waitTime := healer.HandleError(&nodes[0])
	c.Assert(waitTime, check.Equals, time.Duration(0))

	nodes, err = cluster.UnfilteredNodes()
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(urlPort(nodes[0].Address), check.Equals, urlPort(node2.URL()))
	c.Assert(urlToHost(nodes[0].Address), check.Equals, "localhost")

	machines, err = iaas.ListMachines()
	c.Assert(err, check.IsNil)
	c.Assert(machines, check.HasLen, 1)
	c.Assert(machines[0].Address, check.Equals, "localhost")

	healingColl, err := healingCollection()
	c.Assert(err, check.IsNil)
	defer healingColl.Close()
	var events []healingEvent
	err = healingColl.Find(nil).All(&events)
	c.Assert(err, check.IsNil)
	c.Assert(events, check.HasLen, 1)
	c.Assert(events[0].Action, check.Equals, "node-healing")
	c.Assert(events[0].StartTime, check.Not(check.DeepEquals), time.Time{})
	c.Assert(events[0].EndTime, check.Not(check.DeepEquals), time.Time{})
	c.Assert(events[0].Error, check.Equals, "")
	c.Assert(events[0].Successful, check.Equals, true)
	c.Assert(events[0].FailingNode.Address, check.Equals, fmt.Sprintf("http://127.0.0.1:%d/", urlPort(node1.URL())))
	c.Assert(events[0].CreatedNode.Address, check.Equals, fmt.Sprintf("http://localhost:%d", urlPort(node2.URL())))
}
Exemplo n.º 29
0
func (s *S) TestHealerHealNode(c *check.C) {
	rollback := startTestRepositoryServer()
	defer rollback()
	defer func() {
		machines, _ := iaas.ListMachines()
		for _, m := range machines {
			m.Destroy()
		}
	}()
	factory, iaasInst := newHealerIaaSConstructorWithInst("127.0.0.1")
	iaas.RegisterIaasProvider("my-healer-iaas", factory)
	_, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{})
	c.Assert(err, check.IsNil)
	iaasInst.addr = "localhost"
	node1, err := testing.NewServer("127.0.0.1:0", nil, nil)
	c.Assert(err, check.IsNil)
	node2, err := testing.NewServer("127.0.0.1:0", nil, nil)
	c.Assert(err, check.IsNil)
	config.Set("iaas:node-protocol", "http")
	config.Set("iaas:node-port", urlPort(node2.URL()))
	defer config.Unset("iaas:node-protocol")
	defer config.Unset("iaas:node-port")
	cluster, err := cluster.New(nil, &cluster.MapStorage{},
		cluster.Node{Address: node1.URL()},
	)
	c.Assert(err, check.IsNil)
	appInstance := provisiontest.NewFakeApp("myapp", "python", 0)
	var p dockerProvisioner
	err = p.Initialize()
	c.Assert(err, check.IsNil)
	p.cluster = cluster
	defer p.Destroy(appInstance)
	p.Provision(appInstance)
	imageId, err := appCurrentImageName(appInstance.GetName())
	c.Assert(err, check.IsNil)
	customData := map[string]interface{}{
		"procfile": "web: python ./myapp",
	}
	err = saveImageCustomData(imageId, customData)
	c.Assert(err, check.IsNil)
	_, err = addContainersWithHost(&changeUnitsPipelineArgs{
		toHost:      "127.0.0.1",
		toAdd:       map[string]*containersToAdd{"web": {Quantity: 1}},
		app:         appInstance,
		imageId:     imageId,
		provisioner: &p,
	})
	c.Assert(err, check.IsNil)

	conn, err := db.Conn()
	c.Assert(err, check.IsNil)
	defer conn.Close()
	appStruct := &app.App{
		Name: appInstance.GetName(),
	}
	err = conn.Apps().Insert(appStruct)
	c.Assert(err, check.IsNil)
	defer conn.Apps().Remove(bson.M{"name": appStruct.Name})

	healer := nodeHealer{
		locks:                 make(map[string]*sync.Mutex),
		provisioner:           &p,
		disabledTime:          0,
		failuresBeforeHealing: 1,
		waitTimeNewMachine:    1 * time.Second,
	}
	nodes, err := p.getCluster().UnfilteredNodes()
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(urlPort(nodes[0].Address), check.Equals, urlPort(node1.URL()))
	c.Assert(urlToHost(nodes[0].Address), check.Equals, "127.0.0.1")

	containers, err := p.listAllContainers()
	c.Assert(err, check.IsNil)
	c.Assert(containers, check.HasLen, 1)
	c.Assert(containers[0].HostAddr, check.Equals, "127.0.0.1")

	machines, err := iaas.ListMachines()
	c.Assert(err, check.IsNil)
	c.Assert(machines, check.HasLen, 1)
	c.Assert(machines[0].Address, check.Equals, "127.0.0.1")

	nodes[0].Metadata["iaas"] = "my-healer-iaas"
	created, err := healer.healNode(&nodes[0])
	c.Assert(err, check.IsNil)
	c.Assert(created.Address, check.Equals, fmt.Sprintf("http://localhost:%d", urlPort(node2.URL())))
	nodes, err = cluster.UnfilteredNodes()
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(urlPort(nodes[0].Address), check.Equals, urlPort(node2.URL()))
	c.Assert(urlToHost(nodes[0].Address), check.Equals, "localhost")

	machines, err = iaas.ListMachines()
	c.Assert(err, check.IsNil)
	c.Assert(machines, check.HasLen, 1)
	c.Assert(machines[0].Address, check.Equals, "localhost")

	done := make(chan bool)
	go func() {
		for range time.Tick(100 * time.Millisecond) {
			containers, err := p.listAllContainers()
			if err == nil && len(containers) == 1 && containers[0].HostAddr == "localhost" {
				close(done)
				return
			}
		}
	}()

	select {
	case <-done:
	case <-time.After(5 * time.Second):
		c.Fatal("Timed out waiting for containers to move")
	}
}
Exemplo n.º 30
0
func (h *NodeHealer) healNode(node *cluster.Node) (cluster.Node, error) {
	emptyNode := cluster.Node{}
	failingAddr := node.Address
	nodeMetadata := node.CleanMetadata()
	failingHost := net.URLToHost(failingAddr)
	failures := node.FailureCount()
	machine, err := iaas.CreateMachineForIaaS(nodeMetadata["iaas"], nodeMetadata)
	if err != nil {
		node.ResetFailures()
		return emptyNode, fmt.Errorf("Can't auto-heal after %d failures for node %s: error creating new machine: %s", failures, failingHost, err.Error())
	}
	err = h.provisioner.Cluster().Unregister(failingAddr)
	if err != nil {
		machine.Destroy()
		return emptyNode, fmt.Errorf("Can't auto-heal after %d failures for node %s: error unregistering old node: %s", failures, failingHost, err.Error())
	}
	newAddr := machine.FormatNodeAddress()
	log.Debugf("New machine created during healing process: %s - Waiting for docker to start...", newAddr)
	createdNode := cluster.Node{
		Address:        newAddr,
		Metadata:       nodeMetadata,
		CreationStatus: cluster.NodeCreationStatusPending,
	}
	err = h.provisioner.Cluster().Register(createdNode)
	if err != nil {
		node.ResetFailures()
		h.provisioner.Cluster().Register(cluster.Node{Address: failingAddr, Metadata: nodeMetadata})
		machine.Destroy()
		return emptyNode, fmt.Errorf("Can't auto-heal after %d failures for node %s: error registering new node: %s", failures, failingHost, err.Error())
	}
	q, err := queue.Queue()
	if err != nil {
		return emptyNode, err
	}
	jobParams := monsterqueue.JobParams{
		"endpoint": createdNode.Address,
		"machine":  machine.Id,
		"metadata": createdNode.Metadata,
	}
	job, err := q.EnqueueWait(bs.QueueTaskName, jobParams, h.waitTimeNewMachine)
	if err == nil {
		_, err = job.Result()
	}
	if err != nil {
		node.ResetFailures()
		h.provisioner.Cluster().Register(cluster.Node{Address: failingAddr, Metadata: nodeMetadata})
		return emptyNode, fmt.Errorf("Can't auto-heal after %d failures for node %s: error waiting for the bs task: %s", failures, failingHost, err.Error())
	}
	var buf bytes.Buffer
	err = h.provisioner.MoveContainers(failingHost, "", &buf)
	if err != nil {
		log.Errorf("Unable to move containers, skipping containers healing %q -> %q: %s: %s", failingHost, machine.Address, err.Error(), buf.String())
	}
	failingMachine, err := iaas.FindMachineByIdOrAddress(node.Metadata["iaas-id"], failingHost)
	if err != nil {
		return createdNode, fmt.Errorf("Unable to find failing machine %s in IaaS: %s", failingHost, err.Error())
	}
	err = failingMachine.Destroy()
	if err != nil {
		return createdNode, fmt.Errorf("Unable to destroy machine %s from IaaS: %s", failingHost, err.Error())
	}
	log.Debugf("Done auto-healing node %q, node %q created in its place.", failingHost, machine.Address)
	return createdNode, nil
}