func (s *S) TestRunContainerHealerWithError(c *check.C) { p, err := dockertest.StartMultipleServersCluster() c.Assert(err, check.IsNil) defer p.Destroy() node1 := p.Servers()[0] app := newFakeAppInDB("myapp", "python", 0) _, err = p.StartContainers(dockertest.StartContainersArgs{ Endpoint: node1.URL(), App: app, Amount: map[string]int{"web": 2}, Image: "tsuru/python", PullImage: true, }) c.Assert(err, check.IsNil) containers := p.AllContainers() c.Assert(err, check.IsNil) c.Assert(containers, check.HasLen, 2) c.Assert(containers[0].HostAddr, check.Equals, net.URLToHost(node1.URL())) c.Assert(containers[1].HostAddr, check.Equals, net.URLToHost(node1.URL())) node1.MutateContainer(containers[0].ID, docker.State{Running: false, Restarting: false}) node1.MutateContainer(containers[1].ID, docker.State{Running: false, Restarting: false}) toMoveCont := containers[1] toMoveCont.LastSuccessStatusUpdate = time.Now().Add(-2 * time.Minute) p.PrepareListResult([]container.Container{containers[0], toMoveCont}, nil) p.FailMove( errors.New("cannot move container"), errors.New("cannot move container"), errors.New("cannot move container"), ) healer := NewContainerHealer(ContainerHealerArgs{ Provisioner: p, MaxUnresponsiveTime: time.Minute, Locker: dockertest.NewFakeLocker(), }) healer.runContainerHealerOnce() containers = p.AllContainers() c.Assert(err, check.IsNil) c.Assert(containers, check.HasLen, 2) hosts := []string{containers[0].HostAddr, containers[1].HostAddr} c.Assert(hosts[0], check.Equals, net.URLToHost(node1.URL())) c.Assert(hosts[1], check.Equals, net.URLToHost(node1.URL())) c.Assert(eventtest.EventDesc{ Target: event.Target{Type: "container", Value: toMoveCont.ID}, Kind: "healer", StartCustomData: map[string]interface{}{ "hostaddr": "127.0.0.1", "id": toMoveCont.ID, }, ErrorMatches: `.*Error trying to heal containers.*`, EndCustomData: map[string]interface{}{ "hostaddr": "", }, }, eventtest.HasEvent) }
func (s *segregatedScheduler) filterByMemoryUsage(a *app.App, nodes []cluster.Node, maxMemoryRatio float32, TotalMemoryMetadata string) ([]cluster.Node, error) { if maxMemoryRatio == 0 || TotalMemoryMetadata == "" { return nodes, nil } hosts := make([]string, len(nodes)) for i := range nodes { hosts[i] = net.URLToHost(nodes[i].Address) } containers, err := s.provisioner.ListContainers(bson.M{"hostaddr": bson.M{"$in": hosts}, "id": bson.M{"$nin": s.ignoredContainers}}) if err != nil { return nil, err } hostReserved := make(map[string]int64) for _, cont := range containers { contApp, err := app.GetByName(cont.AppName) if err != nil { return nil, err } hostReserved[cont.HostAddr] += contApp.Plan.Memory } megabyte := float64(1024 * 1024) nodeList := make([]cluster.Node, 0, len(nodes)) for _, node := range nodes { totalMemory, _ := strconv.ParseFloat(node.Metadata[TotalMemoryMetadata], 64) shouldAdd := true if totalMemory != 0 { maxMemory := totalMemory * float64(maxMemoryRatio) host := net.URLToHost(node.Address) nodeReserved := hostReserved[host] + a.Plan.Memory if nodeReserved > int64(maxMemory) { shouldAdd = false tryingToReserveMB := float64(a.Plan.Memory) / megabyte reservedMB := float64(hostReserved[host]) / megabyte limitMB := maxMemory / megabyte log.Errorf("Node %q has reached its memory limit. "+ "Limit %0.4fMB. Reserved: %0.4fMB. Needed additional %0.4fMB", host, limitMB, reservedMB, tryingToReserveMB) } } if shouldAdd { nodeList = append(nodeList, node) } } if len(nodeList) == 0 { autoScaleEnabled, _ := config.GetBool("docker:auto-scale:enabled") errMsg := fmt.Sprintf("no nodes found with enough memory for container of %q: %0.4fMB", a.Name, float64(a.Plan.Memory)/megabyte) if autoScaleEnabled { // Allow going over quota temporarily because auto-scale will be // able to detect this and automatically add a new nodes. log.Errorf("WARNING: %s. Will ignore memory restrictions.", errMsg) return nodes, nil } return nil, errors.New(errMsg) } return nodeList, nil }
func (p *FakeProvisioner) AddUnitsToNode(app provision.App, n uint, process string, w io.Writer, nodeAddr string) ([]provision.Unit, error) { if err := p.getError("AddUnits"); err != nil { return nil, err } if n == 0 { return nil, errors.New("Cannot add 0 units.") } p.mut.Lock() defer p.mut.Unlock() pApp, ok := p.apps[app.GetName()] if !ok { return nil, errNotProvisioned } name := app.GetName() platform := app.GetPlatform() length := uint(len(pApp.units)) for i := uint(0); i < n; i++ { val := atomic.AddInt32(&uniqueIpCounter, 1) var hostAddr string if nodeAddr != "" { hostAddr = net.URLToHost(nodeAddr) } else if len(p.nodes) > 0 { for _, n := range p.nodes { hostAddr = net.URLToHost(n.Address()) break } } else { hostAddr = fmt.Sprintf("10.10.10.%d", val) } unit := provision.Unit{ ID: fmt.Sprintf("%s-%d", name, pApp.unitLen), AppName: name, Type: platform, Status: provision.StatusStarted, Ip: hostAddr, ProcessName: process, Address: &url.URL{ Scheme: "http", Host: fmt.Sprintf("%s:%d", hostAddr, val), }, } err := routertest.FakeRouter.AddRoute(name, unit.Address) if err != nil { return nil, err } pApp.units = append(pApp.units, unit) pApp.unitLen++ } result := make([]provision.Unit, int(n)) copy(result, pApp.units[length:]) p.apps[app.GetName()] = pApp if w != nil { fmt.Fprintf(w, "added %d units", n) } return result, nil }
func (s *S) TestRunContainerHealerAlreadyHealed(c *check.C) { p, err := dockertest.StartMultipleServersCluster() c.Assert(err, check.IsNil) defer p.Destroy() node1 := p.Servers()[0] app := provisiontest.NewFakeApp("myapp", "python", 0) _, err = p.StartContainers(dockertest.StartContainersArgs{ Endpoint: node1.URL(), App: app, Amount: map[string]int{"web": 2}, Image: "tsuru/python", PullImage: true, }) c.Assert(err, check.IsNil) containers := p.AllContainers() c.Assert(err, check.IsNil) c.Assert(containers, check.HasLen, 2) c.Assert(containers[0].HostAddr, check.Equals, net.URLToHost(node1.URL())) c.Assert(containers[1].HostAddr, check.Equals, net.URLToHost(node1.URL())) node1.MutateContainer(containers[0].ID, docker.State{Running: false, Restarting: false}) node1.MutateContainer(containers[1].ID, docker.State{Running: false, Restarting: false}) toMoveCont := containers[1] toMoveCont.LastSuccessStatusUpdate = time.Now().Add(-5 * time.Minute) p.PrepareListResult([]container.Container{containers[0], toMoveCont}, nil) node1.PrepareFailure("createError", "/containers/create") healer := NewContainerHealer(ContainerHealerArgs{Provisioner: p, Locker: dockertest.NewFakeLocker()}) healer.healContainerIfNeeded(toMoveCont) healer.healContainerIfNeeded(toMoveCont) expected := dockertest.ContainerMoving{ ContainerID: toMoveCont.ID, HostFrom: toMoveCont.HostAddr, HostTo: "", } movings := p.Movings() c.Assert(movings, check.DeepEquals, []dockertest.ContainerMoving{expected}) healingColl, err := healingCollection() c.Assert(err, check.IsNil) defer healingColl.Close() var events []HealingEvent err = healingColl.Find(nil).All(&events) c.Assert(err, check.IsNil) c.Assert(events, check.HasLen, 1) c.Assert(events[0].Action, check.Equals, "container-healing") c.Assert(events[0].StartTime.IsZero(), check.Equals, false) c.Assert(events[0].EndTime.IsZero(), check.Equals, false) c.Assert(events[0].Error, check.Equals, "") c.Assert(events[0].Successful, check.Equals, true) c.Assert(events[0].FailingContainer.HostAddr, check.Equals, "127.0.0.1") c.Assert(events[0].CreatedContainer.HostAddr, check.Equals, "127.0.0.1") }
// removeNodeHandler calls scheduler.Unregister to unregistering a node into it. func removeNodeHandler(w http.ResponseWriter, r *http.Request, t auth.Token) error { params, err := unmarshal(r.Body) if err != nil { return err } address, _ := params["address"] if address == "" { return fmt.Errorf("Node address is required.") } node, err := mainDockerProvisioner.Cluster().GetNode(address) if err != nil { return err } allowedNodeRemove := permission.Check(t, permission.PermNodeDelete, permission.Context(permission.CtxPool, node.Metadata["pool"]), ) if !allowedNodeRemove { return permission.ErrUnauthorized } removeIaaS, _ := strconv.ParseBool(params["remove_iaas"]) if removeIaaS { allowedIaasRemove := permission.Check(t, permission.PermMachineDelete, permission.Context(permission.CtxIaaS, node.Metadata["iaas"]), ) if !allowedIaasRemove { return permission.ErrUnauthorized } } node.CreationStatus = cluster.NodeCreationStatusDisabled _, err = mainDockerProvisioner.Cluster().UpdateNode(node) if err != nil { return err } noRebalance, err := strconv.ParseBool(r.URL.Query().Get("no-rebalance")) if !noRebalance { err = mainDockerProvisioner.rebalanceContainersByHost(net.URLToHost(address), w) if err != nil { return err } } err = mainDockerProvisioner.Cluster().Unregister(address) if err != nil { return err } if removeIaaS { var m iaas.Machine m, err = iaas.FindMachineByIdOrAddress(node.Metadata["iaas-id"], net.URLToHost(address)) if err != nil && err != mgo.ErrNotFound { return err } return m.Destroy() } return nil }
func (s *S) TestRunContainerHealerShutdown(c *check.C) { p, err := dockertest.StartMultipleServersCluster() c.Assert(err, check.IsNil) defer p.Destroy() node1 := p.Servers()[0] app := provisiontest.NewFakeApp("myapp", "python", 0) _, err = p.StartContainers(dockertest.StartContainersArgs{ Endpoint: node1.URL(), App: app, Amount: map[string]int{"web": 2}, Image: "tsuru/python", PullImage: true, }) c.Assert(err, check.IsNil) containers := p.AllContainers() c.Assert(err, check.IsNil) c.Assert(containers, check.HasLen, 2) c.Assert(containers[0].HostAddr, check.Equals, net.URLToHost(node1.URL())) c.Assert(containers[1].HostAddr, check.Equals, net.URLToHost(node1.URL())) node1.MutateContainer(containers[0].ID, docker.State{Running: false, Restarting: false}) node1.MutateContainer(containers[1].ID, docker.State{Running: false, Restarting: false}) toMoveCont := containers[1] toMoveCont.LastSuccessStatusUpdate = time.Now().Add(-5 * time.Minute) p.PrepareListResult([]container.Container{containers[0], toMoveCont}, nil) node1.PrepareFailure("createError", "/containers/create") healer := NewContainerHealer(ContainerHealerArgs{ Provisioner: p, MaxUnresponsiveTime: time.Minute, Done: make(chan bool), Locker: dockertest.NewFakeLocker(), }) ch := make(chan bool) go func() { defer close(ch) healer.RunContainerHealer() }() healer.Shutdown() <-ch expected := dockertest.ContainerMoving{ ContainerID: toMoveCont.ID, HostFrom: toMoveCont.HostAddr, HostTo: "", } movings := p.Movings() c.Assert(movings, check.DeepEquals, []dockertest.ContainerMoving{expected}) }
func (n *FakeNode) Units() ([]provision.Unit, error) { n.p.mut.Lock() defer n.p.mut.Unlock() var units []provision.Unit for _, a := range n.p.apps { for _, u := range a.units { if net.URLToHost(u.Address.String()) == net.URLToHost(n.address) { units = append(units, u) } } } return units, nil }
func (s *S) TestRunContainerHealerAlreadyHealed(c *check.C) { p, err := dockertest.StartMultipleServersCluster() c.Assert(err, check.IsNil) defer p.Destroy() node1 := p.Servers()[0] app := newFakeAppInDB("myapp", "python", 0) _, err = p.StartContainers(dockertest.StartContainersArgs{ Endpoint: node1.URL(), App: app, Amount: map[string]int{"web": 2}, Image: "tsuru/python", PullImage: true, }) c.Assert(err, check.IsNil) containers := p.AllContainers() c.Assert(err, check.IsNil) c.Assert(containers, check.HasLen, 2) c.Assert(containers[0].HostAddr, check.Equals, net.URLToHost(node1.URL())) c.Assert(containers[1].HostAddr, check.Equals, net.URLToHost(node1.URL())) node1.MutateContainer(containers[0].ID, docker.State{Running: false, Restarting: false}) node1.MutateContainer(containers[1].ID, docker.State{Running: false, Restarting: false}) toMoveCont := containers[1] toMoveCont.LastSuccessStatusUpdate = time.Now().Add(-5 * time.Minute) p.PrepareListResult([]container.Container{containers[0], toMoveCont}, nil) node1.PrepareFailure("createError", "/containers/create") healer := NewContainerHealer(ContainerHealerArgs{Provisioner: p, Locker: dockertest.NewFakeLocker()}) err = healer.healContainerIfNeeded(toMoveCont) c.Assert(err, check.IsNil) err = healer.healContainerIfNeeded(toMoveCont) c.Assert(err, check.IsNil) expected := dockertest.ContainerMoving{ ContainerID: toMoveCont.ID, HostFrom: toMoveCont.HostAddr, HostTo: "", } movings := p.Movings() c.Assert(movings, check.DeepEquals, []dockertest.ContainerMoving{expected}) c.Assert(eventtest.EventDesc{ Target: event.Target{Type: "container", Value: toMoveCont.ID}, Kind: "healer", StartCustomData: map[string]interface{}{ "hostaddr": "127.0.0.1", "id": toMoveCont.ID, }, EndCustomData: map[string]interface{}{ "hostaddr": "127.0.0.1", "id": bson.M{"$ne": ""}, }, }, eventtest.HasEvent) }
func (p *dockerProvisioner) runningContainersByNode(nodes []*cluster.Node) (map[string][]container.Container, error) { appNames, err := p.listAppsForNodes(nodes) if err != nil { return nil, err } for _, appName := range appNames { locked, err := app.AcquireApplicationLock(appName, app.InternalAppName, "node auto scale") if err != nil { return nil, err } if !locked { return nil, fmt.Errorf("unable to lock app %s, aborting", appName) } defer app.ReleaseApplicationLock(appName) } result := map[string][]container.Container{} for _, n := range nodes { nodeConts, err := p.listRunningContainersByHost(net.URLToHost(n.Address)) if err != nil { return nil, err } result[n.Address] = nodeConts } return result, nil }
func joinSwarm(existingClient *docker.Client, newClient *docker.Client, addr string) error { swarmInfo, err := existingClient.InspectSwarm(nil) if err != nil { return errors.WithStack(err) } dockerInfo, err := existingClient.Info() if err != nil { return errors.WithStack(err) } if len(dockerInfo.Swarm.RemoteManagers) == 0 { return errors.Errorf("no remote managers found in node %#v", dockerInfo) } addrs := make([]string, len(dockerInfo.Swarm.RemoteManagers)) for i, peer := range dockerInfo.Swarm.RemoteManagers { addrs[i] = peer.Addr } host := tsuruNet.URLToHost(addr) opts := docker.JoinSwarmOptions{ JoinRequest: swarm.JoinRequest{ ListenAddr: fmt.Sprintf("0.0.0.0:%d", swarmConfig.swarmPort), AdvertiseAddr: host, JoinToken: swarmInfo.JoinTokens.Worker, RemoteAddrs: addrs, }, } err = newClient.JoinSwarm(opts) if err != nil && err != docker.ErrNodeAlreadyInSwarm { return errors.WithStack(err) } return redistributeManagers(existingClient) }
func listContainersHandler(w http.ResponseWriter, r *http.Request, t auth.Token) error { address := r.URL.Query().Get(":address") if address != "" { node, err := mainDockerProvisioner.Cluster().GetNode(address) if err != nil { return err } hasAccess := permission.Check(t, permission.PermNodeRead, permission.Context(permission.CtxPool, node.Metadata["pool"])) if !hasAccess { return permission.ErrUnauthorized } containerList, err := mainDockerProvisioner.listContainersByHost(net.URLToHost(address)) if err != nil { return err } return json.NewEncoder(w).Encode(containerList) } appName := r.URL.Query().Get(":appname") a, err := app.GetByName(appName) if err != nil { return err } hasAccess := permission.Check(t, permission.PermNodeRead, permission.Context(permission.CtxPool, a.Pool)) if !hasAccess { return permission.ErrUnauthorized } containerList, err := mainDockerProvisioner.listContainersByApp(appName) if err != nil { return err } return json.NewEncoder(w).Encode(containerList) }
func (s *segregatedScheduler) Schedule(c *cluster.Cluster, opts docker.CreateContainerOptions, schedulerOpts cluster.SchedulerOptions) (cluster.Node, error) { schedOpts, ok := schedulerOpts.(*container.SchedulerOpts) if !ok { return cluster.Node{}, &container.SchedulerError{ Base: errors.Errorf("invalid scheduler opts: %#v", schedulerOpts), } } a, _ := app.GetByName(schedOpts.AppName) nodes, err := s.provisioner.Nodes(a) if err != nil { return cluster.Node{}, &container.SchedulerError{Base: err} } nodes, err = s.filterByMemoryUsage(a, nodes, s.maxMemoryRatio, s.TotalMemoryMetadata) if err != nil { return cluster.Node{}, &container.SchedulerError{Base: err} } node, err := s.chooseNodeToAdd(nodes, opts.Name, schedOpts.AppName, schedOpts.ProcessName) if err != nil { return cluster.Node{}, &container.SchedulerError{Base: err} } if schedOpts.ActionLimiter != nil { schedOpts.LimiterDone = schedOpts.ActionLimiter.Start(net.URLToHost(node)) } return cluster.Node{Address: node}, nil }
func (s *S) TestRebalanceContainersByHost(c *check.C) { otherServer, err := dtesting.NewServer("localhost:0", nil, nil) c.Assert(err, check.IsNil) defer otherServer.Stop() otherUrl := strings.Replace(otherServer.URL(), "127.0.0.1", "localhost", 1) p := &dockerProvisioner{} err = p.Initialize() c.Assert(err, check.IsNil) p.storage = &cluster.MapStorage{} p.scheduler = &segregatedScheduler{provisioner: p} p.cluster, err = cluster.New(p.scheduler, p.storage, cluster.Node{Address: s.server.URL(), Metadata: map[string]string{"pool": "pool1"}}, cluster.Node{Address: otherUrl, Metadata: map[string]string{"pool": "pool1"}}, ) c.Assert(err, check.IsNil) opts := provision.AddPoolOptions{Name: "pool1"} err = provision.AddPool(opts) c.Assert(err, check.IsNil) err = provision.AddTeamsToPool("pool1", []string{"team1"}) c.Assert(err, check.IsNil) err = s.newFakeImage(p, "tsuru/app-myapp", nil) c.Assert(err, check.IsNil) appInstance := provisiontest.NewFakeApp("myapp", "python", 0) defer p.Destroy(appInstance) p.Provision(appInstance) imageId, err := appCurrentImageName(appInstance.GetName()) c.Assert(err, check.IsNil) _, err = addContainersWithHost(&changeUnitsPipelineArgs{ toHost: "localhost", toAdd: map[string]*containersToAdd{"web": {Quantity: 5}}, app: appInstance, imageId: imageId, provisioner: p, }) c.Assert(err, check.IsNil) appStruct := &app.App{ Name: appInstance.GetName(), TeamOwner: "team1", Pool: "pool1", } err = s.storage.Apps().Insert(appStruct) c.Assert(err, check.IsNil) c1, err := p.listContainersByHost("localhost") c.Assert(err, check.IsNil) c.Assert(c1, check.HasLen, 5) c2, err := p.listContainersByHost("127.0.0.1") c.Assert(err, check.IsNil) c.Assert(c2, check.HasLen, 0) err = p.Cluster().Unregister(otherUrl) c.Assert(err, check.IsNil) buf := safe.NewBuffer(nil) err = p.rebalanceContainersByHost(net.URLToHost(otherUrl), buf) c.Assert(err, check.IsNil) c.Assert(p.scheduler.ignoredContainers, check.IsNil) c2, err = p.listContainersByHost("127.0.0.1") c.Assert(err, check.IsNil) c.Assert(c2, check.HasLen, 5) }
func (h *NodeHealer) findNodeForNodeData(nodeData provision.NodeStatusData) (*cluster.Node, error) { nodes, err := h.provisioner.Cluster().UnfilteredNodes() if err != nil { return nil, err } nodeSet := map[string]*cluster.Node{} for i := range nodes { nodeSet[net.URLToHost(nodes[i].Address)] = &nodes[i] } containerIDs := make([]string, 0, len(nodeData.Units)) containerNames := make([]string, 0, len(nodeData.Units)) for _, u := range nodeData.Units { if u.ID != "" { containerIDs = append(containerIDs, u.ID) } if u.Name != "" { containerNames = append(containerNames, u.Name) } } containersForNode, err := h.provisioner.ListContainers(bson.M{ "$or": []bson.M{ {"name": bson.M{"$in": containerNames}}, {"id": bson.M{"$in": containerIDs}}, }, }) if err != nil { return nil, err } var node *cluster.Node for _, c := range containersForNode { n := nodeSet[c.HostAddr] if n != nil { if node != nil && node.Address != n.Address { return nil, fmt.Errorf("containers match multiple nodes: %s and %s", node.Address, n.Address) } node = n } } if node != nil { return node, nil } // Node not found through containers, try finding using addrs. for _, addr := range nodeData.Addrs { n := nodeSet[addr] if n != nil { if node != nil { return nil, fmt.Errorf("addrs match multiple nodes: %v", nodeData.Addrs) } node = n } } if node == nil { return nil, fmt.Errorf("node not found for addrs: %v", nodeData.Addrs) } return node, nil }
func (a *autoScaleConfig) removeMultipleNodes(event *autoScaleEvent, chosenNodes []cluster.Node) error { nodeAddrs := make([]string, len(chosenNodes)) nodeHosts := make([]string, len(chosenNodes)) for i, node := range chosenNodes { _, hasIaas := node.Metadata["iaas"] if !hasIaas { return fmt.Errorf("no IaaS information in node (%s) metadata: %#v", node.Address, node.Metadata) } nodeAddrs[i] = node.Address nodeHosts[i] = net.URLToHost(node.Address) } err := a.provisioner.Cluster().UnregisterNodes(nodeAddrs...) if err != nil { return fmt.Errorf("unable to unregister nodes (%s) for removal: %s", strings.Join(nodeAddrs, ", "), err) } buf := safe.NewBuffer(nil) err = a.provisioner.moveContainersFromHosts(nodeHosts, "", buf) if err != nil { for _, node := range chosenNodes { a.provisioner.Cluster().Register(node) } return fmt.Errorf("unable to move containers from nodes (%s): %s - log: %s", strings.Join(nodeAddrs, ", "), err, buf.String()) } wg := sync.WaitGroup{} for i := range chosenNodes { wg.Add(1) go func(i int) { defer wg.Done() node := chosenNodes[i] m, err := iaas.FindMachineByIdOrAddress(node.Metadata["iaas-id"], net.URLToHost(node.Address)) if err != nil { event.logMsg("unable to find machine for removal in iaas: %s", err) return } err = m.Destroy() if err != nil { event.logMsg("unable to destroy machine in IaaS: %s", err) } }(i) } wg.Wait() return nil }
func (s *S) TestHealerHealNodeWaitAndRegisterError(c *check.C) { iaas.RegisterIaasProvider("my-healer-iaas", dockertest.NewHealerIaaSConstructor("127.0.0.1", nil)) _, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{}) c.Assert(err, check.IsNil) iaas.RegisterIaasProvider("my-healer-iaas", dockertest.NewHealerIaaSConstructor("localhost", nil)) node1, err := testing.NewServer("127.0.0.1:0", nil, nil) c.Assert(err, check.IsNil) node2, err := testing.NewServer("127.0.0.1:0", nil, nil) c.Assert(err, check.IsNil) node2.PrepareFailure("ping-failure", "/_ping") config.Set("iaas:node-protocol", "http") config.Set("iaas:node-port", dockertest.URLPort(node2.URL())) defer config.Unset("iaas:node-protocol") defer config.Unset("iaas:node-port") p, err := s.newFakeDockerProvisioner(node1.URL()) c.Assert(err, check.IsNil) defer p.Destroy() node1.PrepareFailure("pingErr", "/_ping") p.Cluster().StartActiveMonitoring(100 * time.Millisecond) time.Sleep(300 * time.Millisecond) p.Cluster().StopActiveMonitoring() healer := NewNodeHealer(NodeHealerArgs{ Provisioner: p, FailuresBeforeHealing: 1, WaitTimeNewMachine: time.Second, }) nodes, err := p.Cluster().UnfilteredNodes() c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(dockertest.URLPort(nodes[0].Address), check.Equals, dockertest.URLPort(node1.URL())) c.Assert(net.URLToHost(nodes[0].Address), check.Equals, "127.0.0.1") c.Assert(nodes[0].FailureCount() > 0, check.Equals, true) nodes[0].Metadata["iaas"] = "my-healer-iaas" created, err := healer.healNode(&nodes[0]) c.Assert(err, check.ErrorMatches, ".*timeout waiting for result.*") c.Assert(created.Address, check.Equals, "") c.Assert(nodes[0].FailureCount(), check.Equals, 0) nodes, err = p.Cluster().Nodes() c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(dockertest.URLPort(nodes[0].Address), check.Equals, dockertest.URLPort(node1.URL())) c.Assert(net.URLToHost(nodes[0].Address), check.Equals, "127.0.0.1") }
func (s *segregatedScheduler) getContainerFromHost(host string, appName, process string) (string, error) { coll := s.provisioner.Collection() defer coll.Close() var c container.Container query := bson.M{ "id": bson.M{"$nin": s.ignoredContainers}, "appname": appName, "hostaddr": net.URLToHost(host), } if process == "" { query["$or"] = []bson.M{{"processname": bson.M{"$exists": false}}, {"processname": ""}} } else { query["processname"] = process } err := coll.Find(query).Select(bson.M{"id": 1}).One(&c) if err == mgo.ErrNotFound { return "", &errContainerNotFound{AppName: appName, ProcessName: process, HostAddr: net.URLToHost(host)} } return c.ID, err }
func (p *dockerProvisioner) listAppsForNodes(nodes []*cluster.Node) ([]string, error) { coll := p.Collection() defer coll.Close() nodeNames := make([]string, len(nodes)) for i, n := range nodes { nodeNames[i] = net.URLToHost(n.Address) } var appNames []string err := coll.Find(bson.M{"hostaddr": bson.M{"$in": nodeNames}}).Distinct("appname", &appNames) return appNames, err }
func (s *segregatedScheduler) nodesToHosts(nodes []cluster.Node) ([]string, map[string]string) { hosts := make([]string, len(nodes)) hostsMap := make(map[string]string) // Only hostname is saved in the docker containers collection // so we need to extract and map then to the original node. for i, node := range nodes { host := net.URLToHost(node.Address) hosts[i] = host hostsMap[host] = node.Address } return hosts, hostsMap }
func (p *dockerProvisioner) getNodeByHost(host string) (cluster.Node, error) { nodes, err := p.Cluster().Nodes() if err != nil { return cluster.Node{}, err } for _, node := range nodes { if net.URLToHost(node.Address) == host { return node, nil } } return cluster.Node{}, fmt.Errorf("Host `%s` not found", host) }
func (c *Container) hostToNodeAddress(p DockerProvisioner, host string) (string, error) { nodes, err := p.Cluster().Nodes() if err != nil { return "", err } for _, node := range nodes { if net.URLToHost(node.Address) == host { return node.Address, nil } } return "", fmt.Errorf("Host `%s` not found", host) }
func (p *FakeDockerProvisioner) GetNodeByHost(host string) (cluster.Node, error) { nodes, err := p.cluster.UnfilteredNodes() if err != nil { return cluster.Node{}, err } for _, node := range nodes { if net.URLToHost(node.Address) == host { return node, nil } } return cluster.Node{}, errors.Errorf("node with host %q not found", host) }
func (p *swarmProvisioner) RoutableAddresses(a provision.App) ([]url.URL, error) { client, err := chooseDBSwarmNode() if err != nil { return nil, err } imgID, err := image.AppCurrentImageName(a.GetName()) if err != nil { if err != image.ErrNoImagesAvailable { return nil, err } return nil, nil } webProcessName, err := image.GetImageWebProcessName(imgID) if err != nil { return nil, err } if webProcessName == "" { return nil, nil } srvName := serviceNameForApp(a, webProcessName) srv, err := client.InspectService(srvName) if err != nil { return nil, err } var pubPort uint32 if len(srv.Endpoint.Ports) > 0 { pubPort = srv.Endpoint.Ports[0].PublishedPort } if pubPort == 0 { return nil, nil } nodes, err := listValidNodes(client) if err != nil { return nil, err } for i := len(nodes) - 1; i >= 0; i-- { if nodes[i].Spec.Annotations.Labels[labelNodePoolName.String()] != a.GetPool() { nodes[i], nodes[len(nodes)-1] = nodes[len(nodes)-1], nodes[i] nodes = nodes[:len(nodes)-1] } } addrs := make([]url.URL, len(nodes)) for i, n := range nodes { addr := n.Spec.Labels[labelNodeDockerAddr.String()] host := tsuruNet.URLToHost(addr) addrs[i] = url.URL{ Scheme: "http", Host: fmt.Sprintf("%s:%d", host, pubPort), } } return addrs, nil }
func (c *Container) Create(args *CreateArgs) error { port, err := getPort() if err != nil { log.Errorf("error on getting port for container %s - %s", c.AppName, port) return err } securityOpts, _ := config.GetList("docker:security-opts") var exposedPorts map[docker.Port]struct{} if !args.Deploy { exposedPorts = map[docker.Port]struct{}{ docker.Port(port + "/tcp"): {}, } } var user string if args.Building { user = c.user() } config := docker.Config{ Image: args.ImageID, Cmd: args.Commands, Entrypoint: []string{}, ExposedPorts: exposedPorts, AttachStdin: false, AttachStdout: false, AttachStderr: false, Memory: args.App.GetMemory(), MemorySwap: args.App.GetMemory() + args.App.GetSwap(), CPUShares: int64(args.App.GetCpuShare()), SecurityOpts: securityOpts, User: user, } c.addEnvsToConfig(args, port, &config) opts := docker.CreateContainerOptions{Name: c.Name, Config: &config} var nodeList []string if len(args.DestinationHosts) > 0 { var nodeName string nodeName, err = c.hostToNodeAddress(args.Provisioner, args.DestinationHosts[0]) if err != nil { return err } nodeList = []string{nodeName} } schedulerOpts := []string{args.App.GetName(), args.ProcessName} addr, cont, err := args.Provisioner.Cluster().CreateContainerSchedulerOpts(opts, schedulerOpts, nodeList...) if err != nil { log.Errorf("error on creating container in docker %s - %s", c.AppName, err) return err } c.ID = cont.ID c.HostAddr = net.URLToHost(addr) return nil }
func (s *S) TestRunContainerHealerThrottled(c *check.C) { p, err := dockertest.StartMultipleServersCluster() c.Assert(err, check.IsNil) defer p.Destroy() node1 := p.Servers()[0] app := newFakeAppInDB("myapp", "python", 0) _, err = p.StartContainers(dockertest.StartContainersArgs{ Endpoint: node1.URL(), App: app, Amount: map[string]int{"web": 2}, Image: "tsuru/python", PullImage: true, }) c.Assert(err, check.IsNil) containers := p.AllContainers() c.Assert(containers, check.HasLen, 2) c.Assert(containers[0].HostAddr, check.Equals, net.URLToHost(node1.URL())) c.Assert(containers[1].HostAddr, check.Equals, net.URLToHost(node1.URL())) node1.MutateContainer(containers[0].ID, docker.State{Running: false, Restarting: false}) node1.MutateContainer(containers[1].ID, docker.State{Running: false, Restarting: false}) toMoveCont := containers[1] toMoveCont.LastSuccessStatusUpdate = time.Now().Add(-5 * time.Minute) for i := 0; i < 3; i++ { var evt *event.Event evt, err = event.NewInternal(&event.Opts{ Target: event.Target{Type: "container", Value: toMoveCont.ID}, InternalKind: "healer", CustomData: toMoveCont, Allowed: event.Allowed(permission.PermAppReadEvents), }) c.Assert(err, check.IsNil) err = evt.DoneCustomData(nil, nil) c.Assert(err, check.IsNil) } healer := NewContainerHealer(ContainerHealerArgs{Provisioner: p, Locker: dockertest.NewFakeLocker()}) err = healer.healContainerIfNeeded(toMoveCont) c.Assert(err, check.ErrorMatches, "Error trying to insert container healing event, healing aborted: event throttled, limit for healer on container \".*?\" is 3 every 5m0s") }
func initSwarm(client *docker.Client, addr string) error { host := tsuruNet.URLToHost(addr) _, err := client.InitSwarm(docker.InitSwarmOptions{ InitRequest: swarm.InitRequest{ ListenAddr: fmt.Sprintf("0.0.0.0:%d", swarmConfig.swarmPort), AdvertiseAddr: host, }, }) if err != nil && errors.Cause(err) != docker.ErrNodeAlreadyInSwarm { return errors.WithStack(err) } return nil }
func taskToUnit(task *swarm.Task, service *swarm.Service, node *swarm.Node, a provision.App) provision.Unit { addr := node.Spec.Labels[labelNodeDockerAddr.String()] host := tsuruNet.URLToHost(addr) return provision.Unit{ ID: task.ID, AppName: a.GetName(), ProcessName: service.Spec.Annotations.Labels[labelAppProcess.String()], Type: a.GetPlatform(), Ip: host, Status: stateMap[task.Status.State], Address: &url.URL{}, } }
func (s *S) TestHealerHealNodeCreateMachineError(c *check.C) { factory, iaasInst := dockertest.NewHealerIaaSConstructorWithInst("127.0.0.1") iaas.RegisterIaasProvider("my-healer-iaas", factory) _, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{}) c.Assert(err, check.IsNil) iaasInst.Addr = "localhost" iaasInst.Err = fmt.Errorf("my create machine error") node1, err := testing.NewServer("127.0.0.1:0", nil, nil) c.Assert(err, check.IsNil) p, err := s.newFakeDockerProvisioner(node1.URL()) c.Assert(err, check.IsNil) defer p.Destroy() node1.PrepareFailure("pingErr", "/_ping") p.Cluster().StartActiveMonitoring(100 * time.Millisecond) time.Sleep(300 * time.Millisecond) p.Cluster().StopActiveMonitoring() healer := NewNodeHealer(NodeHealerArgs{ Provisioner: p, FailuresBeforeHealing: 1, WaitTimeNewMachine: time.Minute, }) nodes, err := p.Cluster().UnfilteredNodes() c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(dockertest.URLPort(nodes[0].Address), check.Equals, dockertest.URLPort(node1.URL())) c.Assert(net.URLToHost(nodes[0].Address), check.Equals, "127.0.0.1") c.Assert(nodes[0].FailureCount() > 0, check.Equals, true) nodes[0].Metadata["iaas"] = "my-healer-iaas" created, err := healer.healNode(&nodes[0]) c.Assert(err, check.ErrorMatches, ".*my create machine error.*") c.Assert(created.Address, check.Equals, "") c.Assert(nodes[0].FailureCount(), check.Equals, 0) nodes, err = p.Cluster().UnfilteredNodes() c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(dockertest.URLPort(nodes[0].Address), check.Equals, dockertest.URLPort(node1.URL())) c.Assert(net.URLToHost(nodes[0].Address), check.Equals, "127.0.0.1") }
func FindNodeByAddrs(p NodeProvisioner, addrs []string) (Node, error) { nodeAddrMap := map[string]Node{} nodes, err := p.ListNodes(nil) if err != nil { return nil, err } for i, n := range nodes { nodeAddrMap[net.URLToHost(n.Address())] = nodes[i] } var node Node for _, addr := range addrs { n := nodeAddrMap[net.URLToHost(addr)] if n != nil { if node != nil { return nil, errors.Errorf("addrs match multiple nodes: %v", addrs) } node = n } } if node == nil { return nil, ErrNodeNotFound } return node, nil }
func (s *S) TestStartContainers(c *check.C) { app := provisiontest.NewFakeApp("myapp", "python", 1) p, err := StartMultipleServersCluster() c.Assert(err, check.IsNil) defer p.Destroy() containers, err := p.StartContainers(StartContainersArgs{ Amount: map[string]int{"web": 2, "worker": 1}, Image: "tsuru/python", PullImage: true, Endpoint: p.Servers()[0].URL(), App: app, }) c.Assert(err, check.IsNil) c.Assert(containers, check.HasLen, 3) c.Assert(p.Containers(net.URLToHost(p.Servers()[0].URL())), check.DeepEquals, containers) }