func (e *UnregisterSupervisorExecutor) Execute(t *Task) error { if e.arg.Host == "" { return errors.New("Please specify a host to unregister") } // TODO(edanaher): With appropriate confirmation (--force?), tear down containers on the host and delete // metadata. Until then, cowardly refuse to tear down supervisors with containers. listResponse, err := supervisor.List(e.arg.Host) if err != nil { return err } containerCount := len(listResponse.Containers) if containerCount > 0 { plural := "" if containerCount > 1 { plural = "s" } return errors.New(fmt.Sprintf("Supervisor still has %d running container%s", +containerCount, plural)) } supervisor.Teardown(e.arg.Host, []string{}, true) err = datamodel.Supervisor(e.arg.Host).Delete() if err != nil { return err } e.reply.Status = StatusOk return nil }
func (e *RegisterSupervisorExecutor) Execute(t *Task) error { if e.arg.Host == "" { return errors.New("Please specify a host to register") } // check health of to be registered supervisor health, err := supervisor.HealthCheck(e.arg.Host) if err != nil { e.reply.Status = StatusError return err } if health.Status != StatusOk && health.Status != StatusFull { e.reply.Status = health.Status return errors.New("Status is " + health.Status) } if health.Region != Region { e.reply.Status = "Region Mismatch" return errors.New("Supervisor Region (" + health.Region + ") does not match Manager Region (" + Region + ")") } err = datamodel.Supervisor(e.arg.Host).Touch() if err != nil { e.reply.Status = StatusError return err } // try to push all ip groups to this new supervisor if err := netsec.UpdateSupervisor(e.arg.Host); err != nil { return err } if err == nil { e.reply.Status = StatusOk } else { e.reply.Status = StatusError } return err }
func cleanup(removeContainerFromHost bool, deployedContainers []*Container, t *Task) { // kill all references to deployed containers as well as the container itself for _, container := range deployedContainers { supervisor.Teardown(container.Host, []string{container.ID}, false) if instance, err := datamodel.GetInstance(container.ID); err == nil { instance.Delete() } else { t.Log(fmt.Sprintf("Failed to clean up instance %s: %s", container.ID, err.Error())) } DeleteAppShaFromEnv(container.App, container.Sha, container.Env) if removeContainerFromHost { datamodel.Supervisor(container.Host).RemoveContainer(container.ID) } } }
func (e *TeardownExecutor) Execute(t *Task) error { hostMap, err := getContainerIDsToTeardown(t, e.arg) if err != nil { return err } if e.arg.All { tl := datamodel.NewTeardownLock(t.ID) if err := tl.Lock(); err != nil { return err } defer tl.Unlock() } else if e.arg.Env != "" { tl := datamodel.NewTeardownLock(t.ID, e.arg.App, e.arg.Sha, e.arg.Env) if err := tl.Lock(); err != nil { return err } defer tl.Unlock() } else if e.arg.Sha != "" { tl := datamodel.NewTeardownLock(t.ID, e.arg.App, e.arg.Sha) if err := tl.Lock(); err != nil { return err } defer tl.Unlock() } else if e.arg.App != "" { tl := datamodel.NewTeardownLock(t.ID, e.arg.App) if err := tl.Lock(); err != nil { return err } defer tl.Unlock() } tornContainers := []string{} for host, containerIDs := range hostMap { if e.arg.All { t.LogStatus("Tearing Down * from %s", host) } else { t.LogStatus("Tearing Down %v from %s", containerIDs, host) } ihReply, err := supervisor.Teardown(host, containerIDs, e.arg.All) if err != nil { return errors.New(fmt.Sprintf("Error Tearing Down %v from %s : %s", containerIDs, host, err.Error())) } tornContainers = append(tornContainers, ihReply.ContainerIDs...) for _, tornContainerID := range ihReply.ContainerIDs { t.LogStatus("%s has been removed from host %s; removing zookeeper record about the container", tornContainerID, host) err := datamodel.DeleteFromPool([]string{tornContainerID}) if err != nil { t.Log("Error removing %s from pool: %v", tornContainerID, err) } datamodel.Supervisor(host).RemoveContainer(tornContainerID) instance, err := datamodel.GetInstance(tornContainerID) if err != nil { continue } last, _ := instance.Delete() if last { t.LogStatus("%s is the last one of its kind [app: %s SHA: %s Env: %s]", tornContainerID, instance.App, instance.Sha, instance.Env) DeleteAppShaFromEnv(instance.App, instance.Sha, instance.Env) } t.LogStatus("Successfully teardown %s", tornContainerID) } } e.reply.ContainerIDs = tornContainers return nil }
func deployToHostsInZones(deps map[string]DepsType, manifest *Manifest, sha, env string, hosts map[string][]string, zones []string, t *Task) ([]*Container, error) { deployedContainers := []*Container{} // fetch the app zkApp, err := datamodel.GetApp(manifest.Name) if err != nil { return nil, err } // first check if zones have enough hosts for _, zone := range zones { // fail if zone has no hosts if hosts[zone] == nil || len(hosts[zone]) == 0 { return nil, errors.New(fmt.Sprintf("No hosts available for app %s in zone %s", manifest.Name, zone)) } } // now that we know that enough hosts are available t.LogStatus("Deploying to zones: %v", zones) respCh := make(chan *DeployZoneResult, len(zones)) for _, zone := range zones { go deployToZone(respCh, deps, manifest, sha, env, hosts[zone], zone) } numResults := 0 status := "Deployed to zones: " for result := range respCh { deployedContainers = append(deployedContainers, result.Containers...) if result.Error != nil { err = result.Error t.Log(err.Error()) status += result.Zone + ":FAIL " } else { status += result.Zone + ":SUCCESS " } t.LogStatus(status) numResults++ if numResults >= len(zones) { // we're done close(respCh) } } if err != nil { cleanup(false, deployedContainers, t) return nil, err } // set ports on zk supervisor - can't do this in parallel. we may deploy to the same host at the same time // and since we don't lock zookeeper (maybe we should), this would result in a race condition. t.LogStatus("Updating Zookeeper") for _, cont := range deployedContainers { datamodel.Supervisor(cont.Host).SetContainerAndPort(cont.ID, cont.PrimaryPort) } // we're good now, so lets move on t.LogStatus("Updating Router") deployedIDs := make([]string, len(deployedContainers)) count := 0 for _, cont := range deployedContainers { deployedIDs[count] = cont.ID count++ } err = datamodel.AddToPool(deployedIDs) if err != nil { // if we can't add the pool, clean up and fail cleanup(true, deployedContainers, t) return nil, errors.New("Update Pool Error: " + err.Error()) } if zkApp.Internal { // reserve router port if needed and add app+env _, _, err = datamodel.ReserveRouterPortAndUpdateTrie(zkApp.Internal, manifest.Name, sha, env) if err != nil { datamodel.DeleteFromPool(deployedIDs) cleanup(true, deployedContainers, t) return nil, errors.New("Reserve Router Port Error: " + err.Error()) } } else { // only update trie _, err = datamodel.UpdateAppEnvTrie(zkApp.Internal, manifest.Name, sha, env) if err != nil { datamodel.DeleteFromPool(deployedIDs) cleanup(true, deployedContainers, t) return nil, errors.New("Reserve Router Port Error: " + err.Error()) } } return deployedContainers, nil }