func toAutoScaleEvent(evt *event.Event) (autoScaleEvent, error) { var data evtCustomData err := evt.EndData(&data) if err != nil { return autoScaleEvent{}, err } autoScaleEvt := autoScaleEvent{ ID: evt.UniqueID, MetadataValue: evt.Target.Value, Nodes: data.Nodes, StartTime: evt.StartTime, EndTime: evt.EndTime, Successful: evt.Error == "", Error: evt.Error, Log: evt.Log, } if data.Result != nil { if data.Result.ToAdd > 0 { autoScaleEvt.Action = scaleActionAdd } else if len(data.Result.ToRemove) > 0 { autoScaleEvt.Action = scaleActionRemove } else if data.Result.ToRebalance { autoScaleEvt.Action = scaleActionRebalance } autoScaleEvt.Reason = data.Result.Reason } return autoScaleEvt, nil }
func (s *S) TestHealerHandleErrorThrottled(c *check.C) { factory, iaasInst := iaasTesting.NewHealerIaaSConstructorWithInst("addr1") iaas.RegisterIaasProvider("my-healer-iaas", factory) _, err := iaas.CreateMachineForIaaS("my-healer-iaas", map[string]string{}) c.Assert(err, check.IsNil) iaasInst.Addr = "addr2" config.Set("iaas:node-protocol", "http") config.Set("iaas:node-port", 2) defer config.Unset("iaas:node-protocol") defer config.Unset("iaas:node-port") p := provisiontest.ProvisionerInstance err = p.AddNode(provision.AddNodeOptions{ Address: "http://addr1:1", Metadata: map[string]string{"iaas": "my-healer-iaas"}, }) c.Assert(err, check.IsNil) node, err := p.GetNode("http://addr1:1") c.Assert(err, check.IsNil) healer := newNodeHealer(nodeHealerArgs{ FailuresBeforeHealing: 1, WaitTimeNewMachine: time.Minute, }) healer.Shutdown() healer.started = time.Now().Add(-3 * time.Second) conf := healerConfig() err = conf.SaveBase(NodeHealerConfig{Enabled: boolPtr(true), MaxUnresponsiveTime: intPtr(1)}) c.Assert(err, check.IsNil) err = healer.UpdateNodeData(node, []provision.NodeCheckResult{}) c.Assert(err, check.IsNil) time.Sleep(1200 * time.Millisecond) nodes, err := p.ListNodes(nil) c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(nodes[0].Address(), check.Equals, "http://addr1:1") machines, err := iaas.ListMachines() c.Assert(err, check.IsNil) c.Assert(machines, check.HasLen, 1) c.Assert(machines[0].Address, check.Equals, "addr1") for i := 0; i < 3; i++ { var evt *event.Event evt, err = event.NewInternal(&event.Opts{ Target: event.Target{Type: "node", Value: nodes[0].Address()}, InternalKind: "healer", Allowed: event.Allowed(permission.PermPoolReadEvents), }) c.Assert(err, check.IsNil) err = evt.Done(nil) c.Assert(err, check.IsNil) } err = healer.tryHealingNode(nodes[0], "myreason", nil) c.Assert(err, check.ErrorMatches, "Error trying to insert node healing event, healing aborted: event throttled, limit for healer on node \".*?\" is 3 every 5m0s") nodes, err = p.ListNodes(nil) c.Assert(err, check.IsNil) c.Assert(nodes, check.HasLen, 1) c.Assert(nodes[0].Address(), check.Equals, "http://addr1:1") }
func (p *FakeProvisioner) Rollback(app provision.App, img string, evt *event.Event) (string, error) { if err := p.getError("ImageDeploy"); err != nil { return "", err } p.mut.Lock() defer p.mut.Unlock() pApp, ok := p.apps[app.GetName()] if !ok { return "", errNotProvisioned } evt.Write([]byte("Rollback deploy called")) p.apps[app.GetName()] = pApp return img, nil }
func checkCanceled(evt *event.Event) error { if evt == nil { return nil } canceled, err := evt.AckCancel() if err != nil { log.Errorf("unable to check if event should be canceled, ignoring: %s", err) return nil } if canceled { return ErrDeployCanceled } return nil }
func (p *FakeProvisioner) ArchiveDeploy(app provision.App, archiveURL string, evt *event.Event) (string, error) { if err := p.getError("ArchiveDeploy"); err != nil { return "", err } p.mut.Lock() defer p.mut.Unlock() pApp, ok := p.apps[app.GetName()] if !ok { return "", errNotProvisioned } evt.Write([]byte("Archive deploy called")) pApp.lastArchive = archiveURL p.apps[app.GetName()] = pApp return "app-image", nil }
func (s *S) TestMigrateRCEventsNoApp(c *check.C) { now := time.Unix(time.Now().Unix(), 0) id := bson.NewObjectId() var expected event.Event expected.UniqueID = id expected.Target = event.Target{Type: event.TargetTypeApp, Value: "a1"} expected.Owner = event.Owner{Type: event.OwnerTypeUser, Name: "u1"} expected.Kind = event.Kind{Type: event.KindTypePermission, Name: permission.PermAppDeploy.FullName()} expected.StartTime = now expected.EndTime = now.Add(time.Minute) expected.Error = "err1" expected.Log = "log1" expected.Allowed = event.Allowed(permission.PermAppReadEvents) s.checkEvtMatch(&expected, c) }
func (p *FakeProvisioner) UploadDeploy(app provision.App, file io.ReadCloser, fileSize int64, build bool, evt *event.Event) (string, error) { if err := p.getError("UploadDeploy"); err != nil { return "", err } p.mut.Lock() defer p.mut.Unlock() pApp, ok := p.apps[app.GetName()] if !ok { return "", errNotProvisioned } evt.Write([]byte("Upload deploy called")) pApp.lastFile = file p.apps[app.GetName()] = pApp return "app-image", nil }
func (s *S) checkEvtMatch(evt *event.Event, c *check.C) { rawEvt := bson.M{ "_id": evt.UniqueID, "uniqueid": evt.UniqueID, "starttime": evt.StartTime, "endtime": evt.EndTime, "target": evt.Target, "kind": evt.Kind, "owner": evt.Owner, "error": evt.Error, "log": evt.Log, "cancelable": evt.Cancelable, "running": evt.Running, } conn, err := db.Conn() c.Assert(err, check.IsNil) defer conn.Close() err = conn.Events().Insert(rawEvt) c.Assert(err, check.IsNil) err = MigrateRCEvents() c.Assert(err, check.IsNil) evts, err := event.All() c.Assert(err, check.IsNil) c.Assert(evts, check.HasLen, 1) evt.ID = evts[0].ID c.Assert(&evts[0], check.DeepEquals, evt) }
func (a *autoScaleConfig) rebalanceIfNeeded(evt *event.Event, pool string, nodes []*cluster.Node, sResult *scalerResult) error { if len(sResult.ToRemove) > 0 { return nil } if sResult.ToAdd > 0 { sResult.ToRebalance = true } rebalanceFilter := map[string]string{poolMetadataName: pool} if !sResult.ToRebalance { // No action yet, check if we need rebalance _, gap, err := a.provisioner.containerGapInNodes(nodes) if err != nil { return errors.Wrapf(err, "unable to obtain container gap in nodes") } buf := safe.NewBuffer(nil) dryProvisioner, err := a.provisioner.rebalanceContainersByFilter(buf, nil, rebalanceFilter, true) if err != nil { return errors.Wrapf(err, "unable to run dry rebalance to check if rebalance is needed. log: %s", buf.String()) } if dryProvisioner == nil { return nil } _, gapAfter, err := dryProvisioner.containerGapInNodes(nodes) if err != nil { return errors.Wrap(err, "couldn't find containers from rebalanced nodes") } if math.Abs((float64)(gap-gapAfter)) > 2.0 { sResult.ToRebalance = true if sResult.Reason == "" { sResult.Reason = fmt.Sprintf("gap is %d, after rebalance gap will be %d", gap, gapAfter) } } } if sResult.ToRebalance { evt.Logf("running rebalance, for %q: %#v", pool, sResult) buf := safe.NewBuffer(nil) writer := io.MultiWriter(buf, evt) _, err := a.provisioner.rebalanceContainersByFilter(writer, nil, rebalanceFilter, false) if err != nil { return errors.Wrapf(err, "unable to rebalance containers. log: %s", buf.String()) } } return nil }
func (a *autoScaleConfig) removeMultipleNodes(evt *event.Event, chosenNodes []cluster.Node) error { nodeAddrs := make([]string, len(chosenNodes)) nodeHosts := make([]string, len(chosenNodes)) for i, node := range chosenNodes { _, hasIaas := node.Metadata["iaas"] if !hasIaas { return errors.Errorf("no IaaS information in node (%s) metadata: %#v", node.Address, node.Metadata) } nodeAddrs[i] = node.Address nodeHosts[i] = net.URLToHost(node.Address) } err := a.provisioner.Cluster().UnregisterNodes(nodeAddrs...) if err != nil { return errors.Wrapf(err, "unable to unregister nodes (%s) for removal", strings.Join(nodeAddrs, ", ")) } buf := safe.NewBuffer(nil) err = a.provisioner.moveContainersFromHosts(nodeHosts, "", buf) if err != nil { for _, node := range chosenNodes { a.provisioner.Cluster().Register(node) } return errors.Wrapf(err, "unable to move containers from nodes (%s). log: %s", strings.Join(nodeAddrs, ", "), buf.String()) } wg := sync.WaitGroup{} for i := range chosenNodes { wg.Add(1) go func(i int) { defer wg.Done() node := chosenNodes[i] m, err := iaas.FindMachineByIdOrAddress(node.Metadata["iaas-id"], net.URLToHost(node.Address)) if err != nil { evt.Logf("unable to find machine for removal in iaas: %s", err) return } err = m.Destroy() if err != nil { evt.Logf("unable to destroy machine in IaaS: %s", err) } }(i) } wg.Wait() return nil }
func (s *S) TestRunContainerHealerThrottled(c *check.C) { p, err := dockertest.StartMultipleServersCluster() c.Assert(err, check.IsNil) defer p.Destroy() node1 := p.Servers()[0] app := newFakeAppInDB("myapp", "python", 0) _, err = p.StartContainers(dockertest.StartContainersArgs{ Endpoint: node1.URL(), App: app, Amount: map[string]int{"web": 2}, Image: "tsuru/python", PullImage: true, }) c.Assert(err, check.IsNil) containers := p.AllContainers() c.Assert(containers, check.HasLen, 2) c.Assert(containers[0].HostAddr, check.Equals, net.URLToHost(node1.URL())) c.Assert(containers[1].HostAddr, check.Equals, net.URLToHost(node1.URL())) node1.MutateContainer(containers[0].ID, docker.State{Running: false, Restarting: false}) node1.MutateContainer(containers[1].ID, docker.State{Running: false, Restarting: false}) toMoveCont := containers[1] toMoveCont.LastSuccessStatusUpdate = time.Now().Add(-5 * time.Minute) for i := 0; i < 3; i++ { var evt *event.Event evt, err = event.NewInternal(&event.Opts{ Target: event.Target{Type: "container", Value: toMoveCont.ID}, InternalKind: "healer", CustomData: toMoveCont, Allowed: event.Allowed(permission.PermAppReadEvents), }) c.Assert(err, check.IsNil) err = evt.DoneCustomData(nil, nil) c.Assert(err, check.IsNil) } healer := NewContainerHealer(ContainerHealerArgs{Provisioner: p, Locker: dockertest.NewFakeLocker()}) err = healer.healContainerIfNeeded(toMoveCont) c.Assert(err, check.ErrorMatches, "Error trying to insert container healing event, healing aborted: event throttled, limit for healer on container \".*?\" is 3 every 5m0s") }
func (a *autoScaleConfig) addNode(evt *event.Event, modelNodes []*cluster.Node) (*cluster.Node, error) { metadata, err := chooseMetadataFromNodes(modelNodes) if err != nil { return nil, err } _, hasIaas := metadata["iaas"] if !hasIaas { return nil, errors.Errorf("no IaaS information in nodes metadata: %#v", metadata) } machine, err := iaas.CreateMachineForIaaS(metadata["iaas"], metadata) if err != nil { return nil, errors.Wrap(err, "unable to create machine") } newAddr := machine.FormatNodeAddress() evt.Logf("new machine created: %s - Waiting for docker to start...", newAddr) createOpts := provision.AddNodeOptions{ Address: newAddr, Metadata: metadata, WaitTO: a.WaitTimeNewMachine, CaCert: machine.CaCert, ClientCert: machine.ClientCert, ClientKey: machine.ClientKey, } err = a.provisioner.AddNode(createOpts) if err != nil { machine.Destroy() a.provisioner.Cluster().Unregister(newAddr) return nil, errors.Wrapf(err, "error adding new node %s", newAddr) } createdNode, err := a.provisioner.Cluster().GetNode(newAddr) if err != nil { machine.Destroy() a.provisioner.Cluster().Unregister(newAddr) return nil, errors.Wrapf(err, "error retrieving new node %s", newAddr) } evt.Logf("new machine created: %s - started!", newAddr) return &createdNode, nil }
func (s *S) TestMigrateRCEventsWithApp(c *check.C) { a := app.App{Name: "a1", Platform: "zend", TeamOwner: s.team.Name} err := app.CreateApp(&a, s.user) c.Assert(err, check.IsNil) now := time.Unix(time.Now().Unix(), 0) id := bson.NewObjectId() var expected event.Event expected.UniqueID = id expected.Target = event.Target{Type: event.TargetTypeApp, Value: "a1"} expected.Owner = event.Owner{Type: event.OwnerTypeUser, Name: "u1"} expected.Kind = event.Kind{Type: event.KindTypePermission, Name: permission.PermAppDeploy.FullName()} expected.StartTime = now expected.EndTime = now.Add(time.Minute) expected.Error = "err1" expected.Log = "log1" expected.Allowed = event.Allowed(permission.PermAppReadEvents, append(permission.Contexts(permission.CtxTeam, a.Teams), permission.Context(permission.CtxApp, a.Name), permission.Context(permission.CtxPool, a.Pool), )..., ) s.checkEvtMatch(&expected, c) }
func toHealingEvt(evt *event.Event) (HealingEvent, error) { healingEvt := HealingEvent{ ID: evt.UniqueID, StartTime: evt.StartTime, EndTime: evt.EndTime, Action: fmt.Sprintf("%s-healing", evt.Target.Type), Successful: evt.Error == "", Error: evt.Error, } switch evt.Target.Type { case event.TargetTypeContainer: err := evt.StartData(&healingEvt.FailingContainer) if err != nil { return healingEvt, err } err = evt.EndData(&healingEvt.CreatedContainer) if err != nil { return healingEvt, err } case event.TargetTypeNode: var data healer.NodeHealerCustomData err := evt.StartData(&data) if err != nil { return healingEvt, err } if data.LastCheck != nil { healingEvt.Extra = data.LastCheck } healingEvt.Reason = data.Reason healingEvt.FailingNode = data.Node var createdNode provision.NodeSpec err = evt.EndData(&createdNode) if err != nil { return healingEvt, err } healingEvt.CreatedNode = createdNode } return healingEvt, nil }
func eventToDeployData(evt *event.Event, validImages set.Set, full bool) *DeployData { data := &DeployData{ ID: evt.UniqueID, App: evt.Target.Value, Timestamp: evt.StartTime, Duration: evt.EndTime.Sub(evt.StartTime), Error: evt.Error, User: evt.Owner.Name, } var startOpts DeployOptions err := evt.StartData(&startOpts) if err == nil { data.Commit = startOpts.Commit data.Origin = startOpts.GetOrigin() } if full { data.Log = evt.Log var otherData map[string]string err = evt.OtherData(&otherData) if err == nil { data.Diff = otherData["diff"] } } var endData map[string]string err = evt.EndData(&endData) if err == nil { data.Image = endData["image"] if validImages != nil { data.CanRollback = validImages.Includes(data.Image) if reImageVersion.MatchString(data.Image) { parts := reImageVersion.FindAllStringSubmatch(data.Image, -1) data.Image = parts[0][0] } } } return data }
func deployDataToEvent(data *DeployData) error { var evt event.Event evt.UniqueID = data.ID evt.Target = event.Target{Type: event.TargetTypeApp, Value: data.App} evt.Owner = event.Owner{Type: event.OwnerTypeUser, Name: data.User} evt.Kind = event.Kind{Type: event.KindTypePermission, Name: permission.PermAppDeploy.FullName()} evt.StartTime = data.Timestamp evt.EndTime = data.Timestamp.Add(data.Duration) evt.Error = data.Error evt.Log = data.Log evt.RemoveDate = data.RemoveDate a, err := GetByName(data.App) if err == nil { evt.Allowed = event.Allowed(permission.PermAppReadEvents, append(permission.Contexts(permission.CtxTeam, a.Teams), permission.Context(permission.CtxApp, a.Name), permission.Context(permission.CtxPool, a.Pool), )...) } else { evt.Allowed = event.Allowed(permission.PermAppReadEvents) } startOpts := DeployOptions{ Commit: data.Commit, Origin: data.Origin, } var otherData map[string]string if data.Diff != "" { otherData = map[string]string{"diff": data.Diff} } endData := map[string]string{"image": data.Image} err = evt.RawInsert(startOpts, otherData, endData) if mgo.IsDup(err) { return nil } return err }
func healingEventToEvent(data *HealingEvent) error { var evt event.Event evt.UniqueID = data.ID.(bson.ObjectId) var startOpts, endOpts interface{} switch data.Action { case "node-healing": evt.Target = event.Target{Type: event.TargetTypeNode, Value: data.FailingNode.Address} var lastCheck *healer.NodeChecks if data.Extra != nil { checkRaw, err := json.Marshal(data.Extra) if err == nil { json.Unmarshal(checkRaw, &lastCheck) } } startOpts = healer.NodeHealerCustomData{ Node: data.FailingNode, Reason: data.Reason, LastCheck: lastCheck, } endOpts = data.CreatedNode poolName := data.FailingNode.Metadata["pool"] evt.Allowed = event.Allowed(permission.PermPoolReadEvents, permission.Context(permission.CtxPool, poolName)) case "container-healing": evt.Target = event.Target{Type: event.TargetTypeContainer, Value: data.FailingContainer.ID} startOpts = data.FailingContainer endOpts = data.CreatedContainer a, err := app.GetByName(data.FailingContainer.AppName) if err == nil { evt.Allowed = event.Allowed(permission.PermAppReadEvents, append(permission.Contexts(permission.CtxTeam, a.Teams), permission.Context(permission.CtxApp, a.Name), permission.Context(permission.CtxPool, a.Pool), )...) } else { evt.Allowed = event.Allowed(permission.PermAppReadEvents) } default: return errors.Errorf("invalid action %q", data.Action) } evt.Owner = event.Owner{Type: event.OwnerTypeInternal} evt.Kind = event.Kind{Type: event.KindTypeInternal, Name: "healer"} evt.StartTime = data.StartTime evt.EndTime = data.EndTime evt.Error = data.Error err := evt.RawInsert(startOpts, nil, endOpts) if mgo.IsDup(err) { return nil } return err }
func setAllowed(evt *event.Event) (err error) { defer func() { if err != nil { fmt.Printf("setting global context to evt %q: %s\n", evt.String(), err) err = nil } }() switch evt.Target.Type { case event.TargetTypeApp: var a *app.App a, err = app.GetByName(evt.Target.Value) if err != nil { evt.Allowed = event.Allowed(permission.PermAppReadEvents) if evt.Cancelable { evt.Allowed = event.Allowed(permission.PermAppUpdateEvents) } return err } ctxs := append(permission.Contexts(permission.CtxTeam, a.Teams), permission.Context(permission.CtxApp, a.Name), permission.Context(permission.CtxPool, a.Pool), ) evt.Allowed = event.Allowed(permission.PermAppReadEvents, ctxs...) if evt.Cancelable { evt.Allowed = event.Allowed(permission.PermAppUpdateEvents, ctxs...) } case event.TargetTypeTeam: evt.Allowed = event.Allowed(permission.PermTeamReadEvents, permission.Context(permission.CtxTeam, evt.Target.Value)) case event.TargetTypeService: s := service.Service{Name: evt.Target.Value} err = s.Get() if err != nil { evt.Allowed = event.Allowed(permission.PermServiceReadEvents) return err } evt.Allowed = event.Allowed(permission.PermServiceReadEvents, append(permission.Contexts(permission.CtxTeam, s.OwnerTeams), permission.Context(permission.CtxService, s.Name), )..., ) case event.TargetTypeServiceInstance: v := strings.SplitN(evt.Target.Value, "/", 2) if len(v) != 2 { evt.Allowed = event.Allowed(permission.PermServiceInstanceReadEvents) return nil } var si *service.ServiceInstance si, err = service.GetServiceInstance(v[0], v[1]) if err != nil { evt.Allowed = event.Allowed(permission.PermServiceInstanceReadEvents) return err } evt.Allowed = event.Allowed(permission.PermServiceReadEvents, append(permission.Contexts(permission.CtxTeam, si.Teams), permission.Context(permission.CtxServiceInstance, evt.Target.Value), )..., ) case event.TargetTypePool: evt.Allowed = event.Allowed(permission.PermPoolReadEvents, permission.Context(permission.CtxPool, evt.Target.Value)) case event.TargetTypeUser: evt.Allowed = event.Allowed(permission.PermUserReadEvents, permission.Context(permission.CtxUser, evt.Target.Value)) case event.TargetTypeIaas: evt.Allowed = event.Allowed(permission.PermMachineReadEvents, permission.Context(permission.CtxIaaS, evt.Target.Value)) case event.TargetTypeContainer: var provisioners []provision.Provisioner provisioners, err = provision.Registry() if err != nil { return err } var a provision.App for _, p := range provisioners { if finderProv, ok := p.(provision.UnitFinderProvisioner); ok { a, err = finderProv.GetAppFromUnitID(evt.Target.Value) _, isNotFound := err.(*provision.UnitNotFoundError) if err == nil || !isNotFound { break } } } if err != nil { return err } evt.Allowed = event.Allowed(permission.PermAppReadEvents, append(permission.Contexts(permission.CtxTeam, a.GetTeamsName()), permission.Context(permission.CtxApp, a.GetName()), permission.Context(permission.CtxPool, a.GetPool()), )..., ) case event.TargetTypeNode: var provisioners []provision.Provisioner provisioners, err = provision.Registry() if err != nil { return err } var ctxs []permission.PermissionContext for _, p := range provisioners { if nodeProvisioner, ok := p.(provision.NodeProvisioner); ok { var nodes []provision.Node nodes, err = nodeProvisioner.ListNodes([]string{evt.Target.Value}) if err != nil { return err } ctxs = append(ctxs, permission.Context(permission.CtxPool, nodes[0].Pool())) } } evt.Allowed = event.Allowed(permission.PermPoolReadEvents, ctxs...) case event.TargetTypeRole: evt.Allowed = event.Allowed(permission.PermRoleReadEvents) default: evt.Allowed = event.Allowed(permission.PermDebug) } return nil }