Пример #1
0
func (s *S) TestRunContainerHealerWithError(c *check.C) {
	p, err := dockertest.StartMultipleServersCluster()
	c.Assert(err, check.IsNil)
	defer p.Destroy()
	node1 := p.Servers()[0]
	app := newFakeAppInDB("myapp", "python", 0)
	_, err = p.StartContainers(dockertest.StartContainersArgs{
		Endpoint:  node1.URL(),
		App:       app,
		Amount:    map[string]int{"web": 2},
		Image:     "tsuru/python",
		PullImage: true,
	})
	c.Assert(err, check.IsNil)

	containers := p.AllContainers()
	c.Assert(err, check.IsNil)
	c.Assert(containers, check.HasLen, 2)
	c.Assert(containers[0].HostAddr, check.Equals, net.URLToHost(node1.URL()))
	c.Assert(containers[1].HostAddr, check.Equals, net.URLToHost(node1.URL()))
	node1.MutateContainer(containers[0].ID, docker.State{Running: false, Restarting: false})
	node1.MutateContainer(containers[1].ID, docker.State{Running: false, Restarting: false})

	toMoveCont := containers[1]
	toMoveCont.LastSuccessStatusUpdate = time.Now().Add(-2 * time.Minute)
	p.PrepareListResult([]container.Container{containers[0], toMoveCont}, nil)
	p.FailMove(
		errors.New("cannot move container"),
		errors.New("cannot move container"),
		errors.New("cannot move container"),
	)

	healer := NewContainerHealer(ContainerHealerArgs{
		Provisioner:         p,
		MaxUnresponsiveTime: time.Minute,
		Locker:              dockertest.NewFakeLocker(),
	})
	healer.runContainerHealerOnce()

	containers = p.AllContainers()
	c.Assert(err, check.IsNil)
	c.Assert(containers, check.HasLen, 2)
	hosts := []string{containers[0].HostAddr, containers[1].HostAddr}
	c.Assert(hosts[0], check.Equals, net.URLToHost(node1.URL()))
	c.Assert(hosts[1], check.Equals, net.URLToHost(node1.URL()))

	c.Assert(eventtest.EventDesc{
		Target: event.Target{Type: "container", Value: toMoveCont.ID},
		Kind:   "healer",
		StartCustomData: map[string]interface{}{
			"hostaddr": "127.0.0.1",
			"id":       toMoveCont.ID,
		},
		ErrorMatches: `.*Error trying to heal containers.*`,
		EndCustomData: map[string]interface{}{
			"hostaddr": "",
		},
	}, eventtest.HasEvent)
}
Пример #2
0
func (s *segregatedScheduler) filterByMemoryUsage(a *app.App, nodes []cluster.Node, maxMemoryRatio float32, TotalMemoryMetadata string) ([]cluster.Node, error) {
	if maxMemoryRatio == 0 || TotalMemoryMetadata == "" {
		return nodes, nil
	}
	hosts := make([]string, len(nodes))
	for i := range nodes {
		hosts[i] = net.URLToHost(nodes[i].Address)
	}
	containers, err := s.provisioner.ListContainers(bson.M{"hostaddr": bson.M{"$in": hosts}, "id": bson.M{"$nin": s.ignoredContainers}})
	if err != nil {
		return nil, err
	}
	hostReserved := make(map[string]int64)
	for _, cont := range containers {
		contApp, err := app.GetByName(cont.AppName)
		if err != nil {
			return nil, err
		}
		hostReserved[cont.HostAddr] += contApp.Plan.Memory
	}
	megabyte := float64(1024 * 1024)
	nodeList := make([]cluster.Node, 0, len(nodes))
	for _, node := range nodes {
		totalMemory, _ := strconv.ParseFloat(node.Metadata[TotalMemoryMetadata], 64)
		shouldAdd := true
		if totalMemory != 0 {
			maxMemory := totalMemory * float64(maxMemoryRatio)
			host := net.URLToHost(node.Address)
			nodeReserved := hostReserved[host] + a.Plan.Memory
			if nodeReserved > int64(maxMemory) {
				shouldAdd = false
				tryingToReserveMB := float64(a.Plan.Memory) / megabyte
				reservedMB := float64(hostReserved[host]) / megabyte
				limitMB := maxMemory / megabyte
				log.Errorf("Node %q has reached its memory limit. "+
					"Limit %0.4fMB. Reserved: %0.4fMB. Needed additional %0.4fMB",
					host, limitMB, reservedMB, tryingToReserveMB)
			}
		}
		if shouldAdd {
			nodeList = append(nodeList, node)
		}
	}
	if len(nodeList) == 0 {
		autoScaleEnabled, _ := config.GetBool("docker:auto-scale:enabled")
		errMsg := fmt.Sprintf("no nodes found with enough memory for container of %q: %0.4fMB",
			a.Name, float64(a.Plan.Memory)/megabyte)
		if autoScaleEnabled {
			// Allow going over quota temporarily because auto-scale will be
			// able to detect this and automatically add a new nodes.
			log.Errorf("WARNING: %s. Will ignore memory restrictions.", errMsg)
			return nodes, nil
		}
		return nil, errors.New(errMsg)
	}
	return nodeList, nil
}
Пример #3
0
func (p *FakeProvisioner) AddUnitsToNode(app provision.App, n uint, process string, w io.Writer, nodeAddr string) ([]provision.Unit, error) {
	if err := p.getError("AddUnits"); err != nil {
		return nil, err
	}
	if n == 0 {
		return nil, errors.New("Cannot add 0 units.")
	}
	p.mut.Lock()
	defer p.mut.Unlock()
	pApp, ok := p.apps[app.GetName()]
	if !ok {
		return nil, errNotProvisioned
	}
	name := app.GetName()
	platform := app.GetPlatform()
	length := uint(len(pApp.units))
	for i := uint(0); i < n; i++ {
		val := atomic.AddInt32(&uniqueIpCounter, 1)
		var hostAddr string
		if nodeAddr != "" {
			hostAddr = net.URLToHost(nodeAddr)
		} else if len(p.nodes) > 0 {
			for _, n := range p.nodes {
				hostAddr = net.URLToHost(n.Address())
				break
			}
		} else {
			hostAddr = fmt.Sprintf("10.10.10.%d", val)
		}
		unit := provision.Unit{
			ID:          fmt.Sprintf("%s-%d", name, pApp.unitLen),
			AppName:     name,
			Type:        platform,
			Status:      provision.StatusStarted,
			Ip:          hostAddr,
			ProcessName: process,
			Address: &url.URL{
				Scheme: "http",
				Host:   fmt.Sprintf("%s:%d", hostAddr, val),
			},
		}
		err := routertest.FakeRouter.AddRoute(name, unit.Address)
		if err != nil {
			return nil, err
		}
		pApp.units = append(pApp.units, unit)
		pApp.unitLen++
	}
	result := make([]provision.Unit, int(n))
	copy(result, pApp.units[length:])
	p.apps[app.GetName()] = pApp
	if w != nil {
		fmt.Fprintf(w, "added %d units", n)
	}
	return result, nil
}
Пример #4
0
func (s *S) TestRunContainerHealerAlreadyHealed(c *check.C) {
	p, err := dockertest.StartMultipleServersCluster()
	c.Assert(err, check.IsNil)
	defer p.Destroy()
	node1 := p.Servers()[0]
	app := provisiontest.NewFakeApp("myapp", "python", 0)
	_, err = p.StartContainers(dockertest.StartContainersArgs{
		Endpoint:  node1.URL(),
		App:       app,
		Amount:    map[string]int{"web": 2},
		Image:     "tsuru/python",
		PullImage: true,
	})
	c.Assert(err, check.IsNil)

	containers := p.AllContainers()
	c.Assert(err, check.IsNil)
	c.Assert(containers, check.HasLen, 2)
	c.Assert(containers[0].HostAddr, check.Equals, net.URLToHost(node1.URL()))
	c.Assert(containers[1].HostAddr, check.Equals, net.URLToHost(node1.URL()))
	node1.MutateContainer(containers[0].ID, docker.State{Running: false, Restarting: false})
	node1.MutateContainer(containers[1].ID, docker.State{Running: false, Restarting: false})
	toMoveCont := containers[1]
	toMoveCont.LastSuccessStatusUpdate = time.Now().Add(-5 * time.Minute)

	p.PrepareListResult([]container.Container{containers[0], toMoveCont}, nil)
	node1.PrepareFailure("createError", "/containers/create")

	healer := NewContainerHealer(ContainerHealerArgs{Provisioner: p, Locker: dockertest.NewFakeLocker()})
	healer.healContainerIfNeeded(toMoveCont)
	healer.healContainerIfNeeded(toMoveCont)

	expected := dockertest.ContainerMoving{
		ContainerID: toMoveCont.ID,
		HostFrom:    toMoveCont.HostAddr,
		HostTo:      "",
	}
	movings := p.Movings()
	c.Assert(movings, check.DeepEquals, []dockertest.ContainerMoving{expected})

	healingColl, err := healingCollection()
	c.Assert(err, check.IsNil)
	defer healingColl.Close()
	var events []HealingEvent
	err = healingColl.Find(nil).All(&events)
	c.Assert(err, check.IsNil)
	c.Assert(events, check.HasLen, 1)
	c.Assert(events[0].Action, check.Equals, "container-healing")
	c.Assert(events[0].StartTime.IsZero(), check.Equals, false)
	c.Assert(events[0].EndTime.IsZero(), check.Equals, false)
	c.Assert(events[0].Error, check.Equals, "")
	c.Assert(events[0].Successful, check.Equals, true)
	c.Assert(events[0].FailingContainer.HostAddr, check.Equals, "127.0.0.1")
	c.Assert(events[0].CreatedContainer.HostAddr, check.Equals, "127.0.0.1")
}
Пример #5
0
// removeNodeHandler calls scheduler.Unregister to unregistering a node into it.
func removeNodeHandler(w http.ResponseWriter, r *http.Request, t auth.Token) error {
	params, err := unmarshal(r.Body)
	if err != nil {
		return err
	}
	address, _ := params["address"]
	if address == "" {
		return fmt.Errorf("Node address is required.")
	}
	node, err := mainDockerProvisioner.Cluster().GetNode(address)
	if err != nil {
		return err
	}
	allowedNodeRemove := permission.Check(t, permission.PermNodeDelete,
		permission.Context(permission.CtxPool, node.Metadata["pool"]),
	)
	if !allowedNodeRemove {
		return permission.ErrUnauthorized
	}
	removeIaaS, _ := strconv.ParseBool(params["remove_iaas"])
	if removeIaaS {
		allowedIaasRemove := permission.Check(t, permission.PermMachineDelete,
			permission.Context(permission.CtxIaaS, node.Metadata["iaas"]),
		)
		if !allowedIaasRemove {
			return permission.ErrUnauthorized
		}
	}
	node.CreationStatus = cluster.NodeCreationStatusDisabled
	_, err = mainDockerProvisioner.Cluster().UpdateNode(node)
	if err != nil {
		return err
	}
	noRebalance, err := strconv.ParseBool(r.URL.Query().Get("no-rebalance"))
	if !noRebalance {
		err = mainDockerProvisioner.rebalanceContainersByHost(net.URLToHost(address), w)
		if err != nil {
			return err
		}
	}
	err = mainDockerProvisioner.Cluster().Unregister(address)
	if err != nil {
		return err
	}
	if removeIaaS {
		var m iaas.Machine
		m, err = iaas.FindMachineByIdOrAddress(node.Metadata["iaas-id"], net.URLToHost(address))
		if err != nil && err != mgo.ErrNotFound {
			return err
		}
		return m.Destroy()
	}
	return nil
}
Пример #6
0
func (s *S) TestRunContainerHealerShutdown(c *check.C) {
	p, err := dockertest.StartMultipleServersCluster()
	c.Assert(err, check.IsNil)
	defer p.Destroy()

	node1 := p.Servers()[0]
	app := provisiontest.NewFakeApp("myapp", "python", 0)
	_, err = p.StartContainers(dockertest.StartContainersArgs{
		Endpoint:  node1.URL(),
		App:       app,
		Amount:    map[string]int{"web": 2},
		Image:     "tsuru/python",
		PullImage: true,
	})
	c.Assert(err, check.IsNil)

	containers := p.AllContainers()
	c.Assert(err, check.IsNil)
	c.Assert(containers, check.HasLen, 2)
	c.Assert(containers[0].HostAddr, check.Equals, net.URLToHost(node1.URL()))
	c.Assert(containers[1].HostAddr, check.Equals, net.URLToHost(node1.URL()))
	node1.MutateContainer(containers[0].ID, docker.State{Running: false, Restarting: false})
	node1.MutateContainer(containers[1].ID, docker.State{Running: false, Restarting: false})

	toMoveCont := containers[1]
	toMoveCont.LastSuccessStatusUpdate = time.Now().Add(-5 * time.Minute)

	p.PrepareListResult([]container.Container{containers[0], toMoveCont}, nil)
	node1.PrepareFailure("createError", "/containers/create")

	healer := NewContainerHealer(ContainerHealerArgs{
		Provisioner:         p,
		MaxUnresponsiveTime: time.Minute,
		Done:                make(chan bool),
		Locker:              dockertest.NewFakeLocker(),
	})
	ch := make(chan bool)
	go func() {
		defer close(ch)
		healer.RunContainerHealer()
	}()
	healer.Shutdown()
	<-ch

	expected := dockertest.ContainerMoving{
		ContainerID: toMoveCont.ID,
		HostFrom:    toMoveCont.HostAddr,
		HostTo:      "",
	}
	movings := p.Movings()
	c.Assert(movings, check.DeepEquals, []dockertest.ContainerMoving{expected})
}
Пример #7
0
func (n *FakeNode) Units() ([]provision.Unit, error) {
	n.p.mut.Lock()
	defer n.p.mut.Unlock()
	var units []provision.Unit
	for _, a := range n.p.apps {
		for _, u := range a.units {
			if net.URLToHost(u.Address.String()) == net.URLToHost(n.address) {
				units = append(units, u)
			}
		}
	}
	return units, nil
}
Пример #8
0
func (s *S) TestRunContainerHealerAlreadyHealed(c *check.C) {
	p, err := dockertest.StartMultipleServersCluster()
	c.Assert(err, check.IsNil)
	defer p.Destroy()
	node1 := p.Servers()[0]
	app := newFakeAppInDB("myapp", "python", 0)
	_, err = p.StartContainers(dockertest.StartContainersArgs{
		Endpoint:  node1.URL(),
		App:       app,
		Amount:    map[string]int{"web": 2},
		Image:     "tsuru/python",
		PullImage: true,
	})
	c.Assert(err, check.IsNil)
	containers := p.AllContainers()
	c.Assert(err, check.IsNil)
	c.Assert(containers, check.HasLen, 2)
	c.Assert(containers[0].HostAddr, check.Equals, net.URLToHost(node1.URL()))
	c.Assert(containers[1].HostAddr, check.Equals, net.URLToHost(node1.URL()))
	node1.MutateContainer(containers[0].ID, docker.State{Running: false, Restarting: false})
	node1.MutateContainer(containers[1].ID, docker.State{Running: false, Restarting: false})
	toMoveCont := containers[1]
	toMoveCont.LastSuccessStatusUpdate = time.Now().Add(-5 * time.Minute)
	p.PrepareListResult([]container.Container{containers[0], toMoveCont}, nil)
	node1.PrepareFailure("createError", "/containers/create")
	healer := NewContainerHealer(ContainerHealerArgs{Provisioner: p, Locker: dockertest.NewFakeLocker()})
	err = healer.healContainerIfNeeded(toMoveCont)
	c.Assert(err, check.IsNil)
	err = healer.healContainerIfNeeded(toMoveCont)
	c.Assert(err, check.IsNil)
	expected := dockertest.ContainerMoving{
		ContainerID: toMoveCont.ID,
		HostFrom:    toMoveCont.HostAddr,
		HostTo:      "",
	}
	movings := p.Movings()
	c.Assert(movings, check.DeepEquals, []dockertest.ContainerMoving{expected})
	c.Assert(eventtest.EventDesc{
		Target: event.Target{Type: "container", Value: toMoveCont.ID},
		Kind:   "healer",
		StartCustomData: map[string]interface{}{
			"hostaddr": "127.0.0.1",
			"id":       toMoveCont.ID,
		},
		EndCustomData: map[string]interface{}{
			"hostaddr": "127.0.0.1",
			"id":       bson.M{"$ne": ""},
		},
	}, eventtest.HasEvent)
}
Пример #9
0
func (p *dockerProvisioner) runningContainersByNode(nodes []*cluster.Node) (map[string][]container.Container, error) {
	appNames, err := p.listAppsForNodes(nodes)
	if err != nil {
		return nil, err
	}
	for _, appName := range appNames {
		locked, err := app.AcquireApplicationLock(appName, app.InternalAppName, "node auto scale")
		if err != nil {
			return nil, err
		}
		if !locked {
			return nil, fmt.Errorf("unable to lock app %s, aborting", appName)
		}
		defer app.ReleaseApplicationLock(appName)
	}
	result := map[string][]container.Container{}
	for _, n := range nodes {
		nodeConts, err := p.listRunningContainersByHost(net.URLToHost(n.Address))
		if err != nil {
			return nil, err
		}
		result[n.Address] = nodeConts
	}
	return result, nil
}
Пример #10
0
func joinSwarm(existingClient *docker.Client, newClient *docker.Client, addr string) error {
	swarmInfo, err := existingClient.InspectSwarm(nil)
	if err != nil {
		return errors.WithStack(err)
	}
	dockerInfo, err := existingClient.Info()
	if err != nil {
		return errors.WithStack(err)
	}
	if len(dockerInfo.Swarm.RemoteManagers) == 0 {
		return errors.Errorf("no remote managers found in node %#v", dockerInfo)
	}
	addrs := make([]string, len(dockerInfo.Swarm.RemoteManagers))
	for i, peer := range dockerInfo.Swarm.RemoteManagers {
		addrs[i] = peer.Addr
	}
	host := tsuruNet.URLToHost(addr)
	opts := docker.JoinSwarmOptions{
		JoinRequest: swarm.JoinRequest{
			ListenAddr:    fmt.Sprintf("0.0.0.0:%d", swarmConfig.swarmPort),
			AdvertiseAddr: host,
			JoinToken:     swarmInfo.JoinTokens.Worker,
			RemoteAddrs:   addrs,
		},
	}
	err = newClient.JoinSwarm(opts)
	if err != nil && err != docker.ErrNodeAlreadyInSwarm {
		return errors.WithStack(err)
	}
	return redistributeManagers(existingClient)
}
Пример #11
0
func listContainersHandler(w http.ResponseWriter, r *http.Request, t auth.Token) error {
	address := r.URL.Query().Get(":address")
	if address != "" {
		node, err := mainDockerProvisioner.Cluster().GetNode(address)
		if err != nil {
			return err
		}
		hasAccess := permission.Check(t, permission.PermNodeRead,
			permission.Context(permission.CtxPool, node.Metadata["pool"]))
		if !hasAccess {
			return permission.ErrUnauthorized
		}
		containerList, err := mainDockerProvisioner.listContainersByHost(net.URLToHost(address))
		if err != nil {
			return err
		}
		return json.NewEncoder(w).Encode(containerList)
	}
	appName := r.URL.Query().Get(":appname")
	a, err := app.GetByName(appName)
	if err != nil {
		return err
	}
	hasAccess := permission.Check(t, permission.PermNodeRead,
		permission.Context(permission.CtxPool, a.Pool))
	if !hasAccess {
		return permission.ErrUnauthorized
	}
	containerList, err := mainDockerProvisioner.listContainersByApp(appName)
	if err != nil {
		return err
	}
	return json.NewEncoder(w).Encode(containerList)
}
Пример #12
0
func (s *segregatedScheduler) Schedule(c *cluster.Cluster, opts docker.CreateContainerOptions, schedulerOpts cluster.SchedulerOptions) (cluster.Node, error) {
	schedOpts, ok := schedulerOpts.(*container.SchedulerOpts)
	if !ok {
		return cluster.Node{}, &container.SchedulerError{
			Base: errors.Errorf("invalid scheduler opts: %#v", schedulerOpts),
		}
	}
	a, _ := app.GetByName(schedOpts.AppName)
	nodes, err := s.provisioner.Nodes(a)
	if err != nil {
		return cluster.Node{}, &container.SchedulerError{Base: err}
	}
	nodes, err = s.filterByMemoryUsage(a, nodes, s.maxMemoryRatio, s.TotalMemoryMetadata)
	if err != nil {
		return cluster.Node{}, &container.SchedulerError{Base: err}
	}
	node, err := s.chooseNodeToAdd(nodes, opts.Name, schedOpts.AppName, schedOpts.ProcessName)
	if err != nil {
		return cluster.Node{}, &container.SchedulerError{Base: err}
	}
	if schedOpts.ActionLimiter != nil {
		schedOpts.LimiterDone = schedOpts.ActionLimiter.Start(net.URLToHost(node))
	}
	return cluster.Node{Address: node}, nil
}
Пример #13
0
func (s *S) TestRebalanceContainersByHost(c *check.C) {
	otherServer, err := dtesting.NewServer("localhost:0", nil, nil)
	c.Assert(err, check.IsNil)
	defer otherServer.Stop()
	otherUrl := strings.Replace(otherServer.URL(), "127.0.0.1", "localhost", 1)
	p := &dockerProvisioner{}
	err = p.Initialize()
	c.Assert(err, check.IsNil)
	p.storage = &cluster.MapStorage{}
	p.scheduler = &segregatedScheduler{provisioner: p}
	p.cluster, err = cluster.New(p.scheduler, p.storage,
		cluster.Node{Address: s.server.URL(), Metadata: map[string]string{"pool": "pool1"}},
		cluster.Node{Address: otherUrl, Metadata: map[string]string{"pool": "pool1"}},
	)
	c.Assert(err, check.IsNil)
	opts := provision.AddPoolOptions{Name: "pool1"}
	err = provision.AddPool(opts)
	c.Assert(err, check.IsNil)
	err = provision.AddTeamsToPool("pool1", []string{"team1"})
	c.Assert(err, check.IsNil)
	err = s.newFakeImage(p, "tsuru/app-myapp", nil)
	c.Assert(err, check.IsNil)
	appInstance := provisiontest.NewFakeApp("myapp", "python", 0)
	defer p.Destroy(appInstance)
	p.Provision(appInstance)
	imageId, err := appCurrentImageName(appInstance.GetName())
	c.Assert(err, check.IsNil)
	_, err = addContainersWithHost(&changeUnitsPipelineArgs{
		toHost:      "localhost",
		toAdd:       map[string]*containersToAdd{"web": {Quantity: 5}},
		app:         appInstance,
		imageId:     imageId,
		provisioner: p,
	})
	c.Assert(err, check.IsNil)
	appStruct := &app.App{
		Name:      appInstance.GetName(),
		TeamOwner: "team1",
		Pool:      "pool1",
	}
	err = s.storage.Apps().Insert(appStruct)
	c.Assert(err, check.IsNil)
	c1, err := p.listContainersByHost("localhost")
	c.Assert(err, check.IsNil)
	c.Assert(c1, check.HasLen, 5)
	c2, err := p.listContainersByHost("127.0.0.1")
	c.Assert(err, check.IsNil)
	c.Assert(c2, check.HasLen, 0)
	err = p.Cluster().Unregister(otherUrl)
	c.Assert(err, check.IsNil)
	buf := safe.NewBuffer(nil)
	err = p.rebalanceContainersByHost(net.URLToHost(otherUrl), buf)
	c.Assert(err, check.IsNil)
	c.Assert(p.scheduler.ignoredContainers, check.IsNil)
	c2, err = p.listContainersByHost("127.0.0.1")
	c.Assert(err, check.IsNil)
	c.Assert(c2, check.HasLen, 5)
}
Пример #14
0
func (h *NodeHealer) findNodeForNodeData(nodeData provision.NodeStatusData) (*cluster.Node, error) {
	nodes, err := h.provisioner.Cluster().UnfilteredNodes()
	if err != nil {
		return nil, err
	}
	nodeSet := map[string]*cluster.Node{}
	for i := range nodes {
		nodeSet[net.URLToHost(nodes[i].Address)] = &nodes[i]
	}
	containerIDs := make([]string, 0, len(nodeData.Units))
	containerNames := make([]string, 0, len(nodeData.Units))
	for _, u := range nodeData.Units {
		if u.ID != "" {
			containerIDs = append(containerIDs, u.ID)
		}
		if u.Name != "" {
			containerNames = append(containerNames, u.Name)
		}
	}
	containersForNode, err := h.provisioner.ListContainers(bson.M{
		"$or": []bson.M{
			{"name": bson.M{"$in": containerNames}},
			{"id": bson.M{"$in": containerIDs}},
		},
	})
	if err != nil {
		return nil, err
	}
	var node *cluster.Node
	for _, c := range containersForNode {
		n := nodeSet[c.HostAddr]
		if n != nil {
			if node != nil && node.Address != n.Address {
				return nil, fmt.Errorf("containers match multiple nodes: %s and %s", node.Address, n.Address)
			}
			node = n
		}
	}
	if node != nil {
		return node, nil
	}
	// Node not found through containers, try finding using addrs.
	for _, addr := range nodeData.Addrs {
		n := nodeSet[addr]
		if n != nil {
			if node != nil {
				return nil, fmt.Errorf("addrs match multiple nodes: %v", nodeData.Addrs)
			}
			node = n
		}
	}
	if node == nil {
		return nil, fmt.Errorf("node not found for addrs: %v", nodeData.Addrs)
	}
	return node, nil
}
Пример #15
0
func (a *autoScaleConfig) removeMultipleNodes(event *autoScaleEvent, chosenNodes []cluster.Node) error {
	nodeAddrs := make([]string, len(chosenNodes))
	nodeHosts := make([]string, len(chosenNodes))
	for i, node := range chosenNodes {
		_, hasIaas := node.Metadata["iaas"]
		if !hasIaas {
			return fmt.Errorf("no IaaS information in node (%s) metadata: %#v", node.Address, node.Metadata)
		}
		nodeAddrs[i] = node.Address
		nodeHosts[i] = net.URLToHost(node.Address)
	}
	err := a.provisioner.Cluster().UnregisterNodes(nodeAddrs...)
	if err != nil {
		return fmt.Errorf("unable to unregister nodes (%s) for removal: %s", strings.Join(nodeAddrs, ", "), err)
	}
	buf := safe.NewBuffer(nil)
	err = a.provisioner.moveContainersFromHosts(nodeHosts, "", buf)
	if err != nil {
		for _, node := range chosenNodes {
			a.provisioner.Cluster().Register(node)
		}
		return fmt.Errorf("unable to move containers from nodes (%s): %s - log: %s", strings.Join(nodeAddrs, ", "), err, buf.String())
	}
	wg := sync.WaitGroup{}
	for i := range chosenNodes {
		wg.Add(1)
		go func(i int) {
			defer wg.Done()
			node := chosenNodes[i]
			m, err := iaas.FindMachineByIdOrAddress(node.Metadata["iaas-id"], net.URLToHost(node.Address))
			if err != nil {
				event.logMsg("unable to find machine for removal in iaas: %s", err)
				return
			}
			err = m.Destroy()
			if err != nil {
				event.logMsg("unable to destroy machine in IaaS: %s", err)
			}
		}(i)
	}
	wg.Wait()
	return nil
}
Пример #16
0
func (s *S) TestHealerHealNodeWaitAndRegisterError(c *check.C) {
	iaas.RegisterIaasProvider("my-healer-iaas", dockertest.NewHealerIaaSConstructor("127.0.0.1", nil))
	_, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{})
	c.Assert(err, check.IsNil)
	iaas.RegisterIaasProvider("my-healer-iaas", dockertest.NewHealerIaaSConstructor("localhost", nil))
	node1, err := testing.NewServer("127.0.0.1:0", nil, nil)
	c.Assert(err, check.IsNil)
	node2, err := testing.NewServer("127.0.0.1:0", nil, nil)
	c.Assert(err, check.IsNil)
	node2.PrepareFailure("ping-failure", "/_ping")
	config.Set("iaas:node-protocol", "http")
	config.Set("iaas:node-port", dockertest.URLPort(node2.URL()))
	defer config.Unset("iaas:node-protocol")
	defer config.Unset("iaas:node-port")
	p, err := s.newFakeDockerProvisioner(node1.URL())
	c.Assert(err, check.IsNil)
	defer p.Destroy()
	node1.PrepareFailure("pingErr", "/_ping")
	p.Cluster().StartActiveMonitoring(100 * time.Millisecond)
	time.Sleep(300 * time.Millisecond)
	p.Cluster().StopActiveMonitoring()
	healer := NewNodeHealer(NodeHealerArgs{
		Provisioner:           p,
		FailuresBeforeHealing: 1,
		WaitTimeNewMachine:    time.Second,
	})
	nodes, err := p.Cluster().UnfilteredNodes()
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(dockertest.URLPort(nodes[0].Address), check.Equals, dockertest.URLPort(node1.URL()))
	c.Assert(net.URLToHost(nodes[0].Address), check.Equals, "127.0.0.1")
	c.Assert(nodes[0].FailureCount() > 0, check.Equals, true)
	nodes[0].Metadata["iaas"] = "my-healer-iaas"
	created, err := healer.healNode(&nodes[0])
	c.Assert(err, check.ErrorMatches, ".*timeout waiting for result.*")
	c.Assert(created.Address, check.Equals, "")
	c.Assert(nodes[0].FailureCount(), check.Equals, 0)
	nodes, err = p.Cluster().Nodes()
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(dockertest.URLPort(nodes[0].Address), check.Equals, dockertest.URLPort(node1.URL()))
	c.Assert(net.URLToHost(nodes[0].Address), check.Equals, "127.0.0.1")
}
Пример #17
0
func (s *segregatedScheduler) getContainerFromHost(host string, appName, process string) (string, error) {
	coll := s.provisioner.Collection()
	defer coll.Close()
	var c container.Container
	query := bson.M{
		"id":       bson.M{"$nin": s.ignoredContainers},
		"appname":  appName,
		"hostaddr": net.URLToHost(host),
	}
	if process == "" {
		query["$or"] = []bson.M{{"processname": bson.M{"$exists": false}}, {"processname": ""}}
	} else {
		query["processname"] = process
	}
	err := coll.Find(query).Select(bson.M{"id": 1}).One(&c)
	if err == mgo.ErrNotFound {
		return "", &errContainerNotFound{AppName: appName, ProcessName: process, HostAddr: net.URLToHost(host)}
	}
	return c.ID, err
}
Пример #18
0
func (p *dockerProvisioner) listAppsForNodes(nodes []*cluster.Node) ([]string, error) {
	coll := p.Collection()
	defer coll.Close()
	nodeNames := make([]string, len(nodes))
	for i, n := range nodes {
		nodeNames[i] = net.URLToHost(n.Address)
	}
	var appNames []string
	err := coll.Find(bson.M{"hostaddr": bson.M{"$in": nodeNames}}).Distinct("appname", &appNames)
	return appNames, err
}
Пример #19
0
func (s *segregatedScheduler) nodesToHosts(nodes []cluster.Node) ([]string, map[string]string) {
	hosts := make([]string, len(nodes))
	hostsMap := make(map[string]string)
	// Only hostname is saved in the docker containers collection
	// so we need to extract and map then to the original node.
	for i, node := range nodes {
		host := net.URLToHost(node.Address)
		hosts[i] = host
		hostsMap[host] = node.Address
	}
	return hosts, hostsMap
}
Пример #20
0
func (p *dockerProvisioner) getNodeByHost(host string) (cluster.Node, error) {
	nodes, err := p.Cluster().Nodes()
	if err != nil {
		return cluster.Node{}, err
	}
	for _, node := range nodes {
		if net.URLToHost(node.Address) == host {
			return node, nil
		}
	}
	return cluster.Node{}, fmt.Errorf("Host `%s` not found", host)
}
Пример #21
0
func (c *Container) hostToNodeAddress(p DockerProvisioner, host string) (string, error) {
	nodes, err := p.Cluster().Nodes()
	if err != nil {
		return "", err
	}
	for _, node := range nodes {
		if net.URLToHost(node.Address) == host {
			return node.Address, nil
		}
	}
	return "", fmt.Errorf("Host `%s` not found", host)
}
Пример #22
0
func (p *FakeDockerProvisioner) GetNodeByHost(host string) (cluster.Node, error) {
	nodes, err := p.cluster.UnfilteredNodes()
	if err != nil {
		return cluster.Node{}, err
	}
	for _, node := range nodes {
		if net.URLToHost(node.Address) == host {
			return node, nil
		}
	}
	return cluster.Node{}, errors.Errorf("node with host %q not found", host)
}
Пример #23
0
func (p *swarmProvisioner) RoutableAddresses(a provision.App) ([]url.URL, error) {
	client, err := chooseDBSwarmNode()
	if err != nil {
		return nil, err
	}
	imgID, err := image.AppCurrentImageName(a.GetName())
	if err != nil {
		if err != image.ErrNoImagesAvailable {
			return nil, err
		}
		return nil, nil
	}
	webProcessName, err := image.GetImageWebProcessName(imgID)
	if err != nil {
		return nil, err
	}
	if webProcessName == "" {
		return nil, nil
	}
	srvName := serviceNameForApp(a, webProcessName)
	srv, err := client.InspectService(srvName)
	if err != nil {
		return nil, err
	}
	var pubPort uint32
	if len(srv.Endpoint.Ports) > 0 {
		pubPort = srv.Endpoint.Ports[0].PublishedPort
	}
	if pubPort == 0 {
		return nil, nil
	}
	nodes, err := listValidNodes(client)
	if err != nil {
		return nil, err
	}
	for i := len(nodes) - 1; i >= 0; i-- {
		if nodes[i].Spec.Annotations.Labels[labelNodePoolName.String()] != a.GetPool() {
			nodes[i], nodes[len(nodes)-1] = nodes[len(nodes)-1], nodes[i]
			nodes = nodes[:len(nodes)-1]
		}
	}
	addrs := make([]url.URL, len(nodes))
	for i, n := range nodes {
		addr := n.Spec.Labels[labelNodeDockerAddr.String()]
		host := tsuruNet.URLToHost(addr)
		addrs[i] = url.URL{
			Scheme: "http",
			Host:   fmt.Sprintf("%s:%d", host, pubPort),
		}
	}
	return addrs, nil
}
Пример #24
0
func (c *Container) Create(args *CreateArgs) error {
	port, err := getPort()
	if err != nil {
		log.Errorf("error on getting port for container %s - %s", c.AppName, port)
		return err
	}
	securityOpts, _ := config.GetList("docker:security-opts")
	var exposedPorts map[docker.Port]struct{}
	if !args.Deploy {
		exposedPorts = map[docker.Port]struct{}{
			docker.Port(port + "/tcp"): {},
		}
	}
	var user string
	if args.Building {
		user = c.user()
	}
	config := docker.Config{
		Image:        args.ImageID,
		Cmd:          args.Commands,
		Entrypoint:   []string{},
		ExposedPorts: exposedPorts,
		AttachStdin:  false,
		AttachStdout: false,
		AttachStderr: false,
		Memory:       args.App.GetMemory(),
		MemorySwap:   args.App.GetMemory() + args.App.GetSwap(),
		CPUShares:    int64(args.App.GetCpuShare()),
		SecurityOpts: securityOpts,
		User:         user,
	}
	c.addEnvsToConfig(args, port, &config)
	opts := docker.CreateContainerOptions{Name: c.Name, Config: &config}
	var nodeList []string
	if len(args.DestinationHosts) > 0 {
		var nodeName string
		nodeName, err = c.hostToNodeAddress(args.Provisioner, args.DestinationHosts[0])
		if err != nil {
			return err
		}
		nodeList = []string{nodeName}
	}
	schedulerOpts := []string{args.App.GetName(), args.ProcessName}
	addr, cont, err := args.Provisioner.Cluster().CreateContainerSchedulerOpts(opts, schedulerOpts, nodeList...)
	if err != nil {
		log.Errorf("error on creating container in docker %s - %s", c.AppName, err)
		return err
	}
	c.ID = cont.ID
	c.HostAddr = net.URLToHost(addr)
	return nil
}
Пример #25
0
func (s *S) TestRunContainerHealerThrottled(c *check.C) {
	p, err := dockertest.StartMultipleServersCluster()
	c.Assert(err, check.IsNil)
	defer p.Destroy()
	node1 := p.Servers()[0]
	app := newFakeAppInDB("myapp", "python", 0)
	_, err = p.StartContainers(dockertest.StartContainersArgs{
		Endpoint:  node1.URL(),
		App:       app,
		Amount:    map[string]int{"web": 2},
		Image:     "tsuru/python",
		PullImage: true,
	})
	c.Assert(err, check.IsNil)

	containers := p.AllContainers()
	c.Assert(containers, check.HasLen, 2)
	c.Assert(containers[0].HostAddr, check.Equals, net.URLToHost(node1.URL()))
	c.Assert(containers[1].HostAddr, check.Equals, net.URLToHost(node1.URL()))
	node1.MutateContainer(containers[0].ID, docker.State{Running: false, Restarting: false})
	node1.MutateContainer(containers[1].ID, docker.State{Running: false, Restarting: false})
	toMoveCont := containers[1]
	toMoveCont.LastSuccessStatusUpdate = time.Now().Add(-5 * time.Minute)
	for i := 0; i < 3; i++ {
		var evt *event.Event
		evt, err = event.NewInternal(&event.Opts{
			Target:       event.Target{Type: "container", Value: toMoveCont.ID},
			InternalKind: "healer",
			CustomData:   toMoveCont,
			Allowed:      event.Allowed(permission.PermAppReadEvents),
		})
		c.Assert(err, check.IsNil)
		err = evt.DoneCustomData(nil, nil)
		c.Assert(err, check.IsNil)
	}
	healer := NewContainerHealer(ContainerHealerArgs{Provisioner: p, Locker: dockertest.NewFakeLocker()})
	err = healer.healContainerIfNeeded(toMoveCont)
	c.Assert(err, check.ErrorMatches, "Error trying to insert container healing event, healing aborted: event throttled, limit for healer on container \".*?\" is 3 every 5m0s")
}
Пример #26
0
func initSwarm(client *docker.Client, addr string) error {
	host := tsuruNet.URLToHost(addr)
	_, err := client.InitSwarm(docker.InitSwarmOptions{
		InitRequest: swarm.InitRequest{
			ListenAddr:    fmt.Sprintf("0.0.0.0:%d", swarmConfig.swarmPort),
			AdvertiseAddr: host,
		},
	})
	if err != nil && errors.Cause(err) != docker.ErrNodeAlreadyInSwarm {
		return errors.WithStack(err)
	}
	return nil
}
Пример #27
0
func taskToUnit(task *swarm.Task, service *swarm.Service, node *swarm.Node, a provision.App) provision.Unit {
	addr := node.Spec.Labels[labelNodeDockerAddr.String()]
	host := tsuruNet.URLToHost(addr)
	return provision.Unit{
		ID:          task.ID,
		AppName:     a.GetName(),
		ProcessName: service.Spec.Annotations.Labels[labelAppProcess.String()],
		Type:        a.GetPlatform(),
		Ip:          host,
		Status:      stateMap[task.Status.State],
		Address:     &url.URL{},
	}
}
Пример #28
0
func (s *S) TestHealerHealNodeCreateMachineError(c *check.C) {
	factory, iaasInst := dockertest.NewHealerIaaSConstructorWithInst("127.0.0.1")
	iaas.RegisterIaasProvider("my-healer-iaas", factory)
	_, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{})
	c.Assert(err, check.IsNil)
	iaasInst.Addr = "localhost"
	iaasInst.Err = fmt.Errorf("my create machine error")
	node1, err := testing.NewServer("127.0.0.1:0", nil, nil)
	c.Assert(err, check.IsNil)
	p, err := s.newFakeDockerProvisioner(node1.URL())
	c.Assert(err, check.IsNil)
	defer p.Destroy()
	node1.PrepareFailure("pingErr", "/_ping")
	p.Cluster().StartActiveMonitoring(100 * time.Millisecond)
	time.Sleep(300 * time.Millisecond)
	p.Cluster().StopActiveMonitoring()
	healer := NewNodeHealer(NodeHealerArgs{
		Provisioner:           p,
		FailuresBeforeHealing: 1,
		WaitTimeNewMachine:    time.Minute,
	})
	nodes, err := p.Cluster().UnfilteredNodes()
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(dockertest.URLPort(nodes[0].Address), check.Equals, dockertest.URLPort(node1.URL()))
	c.Assert(net.URLToHost(nodes[0].Address), check.Equals, "127.0.0.1")
	c.Assert(nodes[0].FailureCount() > 0, check.Equals, true)
	nodes[0].Metadata["iaas"] = "my-healer-iaas"
	created, err := healer.healNode(&nodes[0])
	c.Assert(err, check.ErrorMatches, ".*my create machine error.*")
	c.Assert(created.Address, check.Equals, "")
	c.Assert(nodes[0].FailureCount(), check.Equals, 0)
	nodes, err = p.Cluster().UnfilteredNodes()
	c.Assert(err, check.IsNil)
	c.Assert(nodes, check.HasLen, 1)
	c.Assert(dockertest.URLPort(nodes[0].Address), check.Equals, dockertest.URLPort(node1.URL()))
	c.Assert(net.URLToHost(nodes[0].Address), check.Equals, "127.0.0.1")
}
Пример #29
0
func FindNodeByAddrs(p NodeProvisioner, addrs []string) (Node, error) {
	nodeAddrMap := map[string]Node{}
	nodes, err := p.ListNodes(nil)
	if err != nil {
		return nil, err
	}
	for i, n := range nodes {
		nodeAddrMap[net.URLToHost(n.Address())] = nodes[i]
	}
	var node Node
	for _, addr := range addrs {
		n := nodeAddrMap[net.URLToHost(addr)]
		if n != nil {
			if node != nil {
				return nil, errors.Errorf("addrs match multiple nodes: %v", addrs)
			}
			node = n
		}
	}
	if node == nil {
		return nil, ErrNodeNotFound
	}
	return node, nil
}
Пример #30
0
func (s *S) TestStartContainers(c *check.C) {
	app := provisiontest.NewFakeApp("myapp", "python", 1)
	p, err := StartMultipleServersCluster()
	c.Assert(err, check.IsNil)
	defer p.Destroy()
	containers, err := p.StartContainers(StartContainersArgs{
		Amount:    map[string]int{"web": 2, "worker": 1},
		Image:     "tsuru/python",
		PullImage: true,
		Endpoint:  p.Servers()[0].URL(),
		App:       app,
	})
	c.Assert(err, check.IsNil)
	c.Assert(containers, check.HasLen, 3)
	c.Assert(p.Containers(net.URLToHost(p.Servers()[0].URL())), check.DeepEquals, containers)
}