func (this *ControlPlaneDao) GetRunningServicesForService(serviceID string, services *[]dao.RunningService) error { // we initialize the data container to something here in case it has not been initialized yet *services = make([]dao.RunningService, 0) poolID, err := this.facade.GetPoolForService(datastore.Get(), serviceID) if err != nil { glog.Errorf("Unable to get service %v: %v", serviceID, err) return err } poolBasedConn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(poolID)) if err != nil { glog.Errorf("Error in getting a connection based on pool %v: %v", poolID, err) return err } svcs, err := zkservice.LoadRunningServicesByService(poolBasedConn, serviceID) if err != nil { glog.Errorf("LoadRunningServicesByService failed (conn: %+v serviceID: %v): %v", poolBasedConn, serviceID, err) return err } for _, svc := range svcs { *services = append(*services, svc) } return nil }
func (z *zkf) RemoveVirtualIP(virtualIP *pool.VirtualIP) error { conn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(virtualIP.PoolID)) if err != nil { return err } return zkvirtualip.RemoveVirtualIP(conn, virtualIP.IP) }
func (this *ControlPlaneDao) GetRunningServicesForHost(hostID string, services *[]dao.RunningService) error { // we initialize the data container to something here in case it has not been initialized yet *services = make([]dao.RunningService, 0) myHost, err := this.facade.GetHost(datastore.Get(), hostID) if err != nil { glog.Errorf("Unable to get host %v: %v", hostID, err) return err } else if myHost == nil { return nil } poolBasedConn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(myHost.PoolID)) if err != nil { glog.Errorf("Error in getting a connection based on pool %v: %v", myHost.PoolID, err) return err } *services, err = zkservice.LoadRunningServicesByHost(poolBasedConn, hostID) if err != nil { glog.Errorf("zkservice.LoadRunningServicesByHost (conn: %+v host: %v) failed: %v", poolBasedConn, hostID, err) return err } return nil }
func (z *zkf) UpdateHost(host *host.Host) error { conn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(host.PoolID)) if err != nil { return err } return zkhost.UpdateHost(conn, host) }
func (this *ControlPlaneDao) GetRunningService(request dao.ServiceStateRequest, running *dao.RunningService) error { glog.V(3).Infof("ControlPlaneDao.GetRunningService: request=%v", request) *running = dao.RunningService{} serviceID := request.ServiceID poolID, err := this.facade.GetPoolForService(datastore.Get(), serviceID) if err != nil { glog.Errorf("Unable to get service %v: %v", serviceID, err) return err } poolBasedConn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(poolID)) if err != nil { glog.Errorf("Error in getting a connection based on pool %v: %v", poolID, err) return err } if thisRunning, err := zkservice.LoadRunningService(poolBasedConn, request.ServiceID, request.ServiceStateID); err != nil { glog.Errorf("zkservice.LoadRunningService failed (conn: %+v serviceID: %v): %v", poolBasedConn, request.ServiceID, err) return err } else { if thisRunning != nil { *running = *thisRunning } } return nil }
func (this *ControlPlaneDao) GetRunningServices(request dao.EntityRequest, allRunningServices *[]dao.RunningService) error { // we initialize the data container to something here in case it has not been initialized yet *allRunningServices = make([]dao.RunningService, 0) allPools, err := this.facade.GetResourcePools(datastore.Get()) if err != nil { glog.Error("runningservice.go failed to get resource pool") return err } else if allPools == nil || len(allPools) == 0 { return fmt.Errorf("no resource pools found") } for _, aPool := range allPools { poolBasedConn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(aPool.ID)) if err != nil { glog.Error("runningservice.go Failed to get connection based on pool: %v", aPool.ID) return err } singlePoolRunningServices := []dao.RunningService{} singlePoolRunningServices, err = zkservice.LoadRunningServices(poolBasedConn) if err != nil { glog.Errorf("Failed GetAllRunningServices: %v", err) return err } for _, rs := range singlePoolRunningServices { *allRunningServices = append(*allRunningServices, rs) } } return nil }
func (zk *zkf) RemoveService(service *service.Service) error { // acquire the service lock to prevent that service from being scheduled // as it is being deleted conn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(service.PoolID)) if err != nil { return err } // remove the global list of all vhosts deployed if rootconn, err := zzk.GetLocalConnection("/"); err != nil { return err } else if err := zkservice.RemoveServiceVhosts(rootconn, service); err != nil { return err } // Ensure that the service's pool is locked for the duration finish := make(chan interface{}) defer close(finish) if err := zkservice.EnsureServiceLock(nil, finish, conn); err != nil { return err } // FIXME: this may be a long-running operation, should we institute a timeout? return zkservice.RemoveService(conn, service.ID) }
func (zk *zkf) StopServiceInstance(poolID, hostID, stateID string) error { conn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(poolID)) if err != nil { return err } return zkservice.StopServiceInstance(conn, hostID, stateID) }
func (zk *zkf) WaitService(service *service.Service, state service.DesiredState, cancel <-chan interface{}) error { conn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(service.PoolID)) if err != nil { return err } return zkservice.WaitService(cancel, conn, service.ID, state) }
func (z *zkf) GetActiveHosts(poolID string, hosts *[]string) error { conn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(poolID)) if err != nil { return err } *hosts, err = zkhost.GetActiveHosts(conn) return err }
func (zk *zkf) GetServiceStates(poolID string, states *[]servicestate.ServiceState, serviceIDs ...string) error { conn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(poolID)) if err != nil { return err } *states, err = zkservice.GetServiceStates(conn, serviceIDs...) return err }
func (dfs *DistributedFilesystem) desynchronize(image *docker.Image) error { // inspect the image dImg, err := image.Inspect() if err != nil { glog.Errorf("Could not inspect image %s (%s): %s", image.ID, image.UUID, err) return err } // look up services for that tenant svcs, err := dfs.facade.GetServices(datastore.Get(), dao.ServiceRequest{TenantID: image.ID.User}) if err != nil { glog.Errorf("Could not get services for tenant %s from %s (%s): %s", image.ID.User, image.ID, image.UUID, err) return err } for _, svc := range svcs { // figure out which services are using the provided image svcImageID, err := commons.ParseImageID(svc.ImageID) if err != nil { glog.Warningf("Could not parse image %s for %s (%s): %s", svc.ImageID, svc.Name, svc.ID) continue } else if !svcImageID.Equals(image.ID) { continue } // TODO: we need to switch to using dao.ControlPlane conn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(svc.PoolID)) if err != nil { glog.Warningf("Could not acquire connection to the coordinator (%s): %s", svc.PoolID, err) continue } states, err := zkservice.GetServiceStates(conn, svc.ID) if err != nil { glog.Warningf("Could not get running services for %s (%s): %s", svc.Name, svc.ID) continue } for _, state := range states { // check if the instance has been running since before the commit if state.IsRunning() && state.Started.Before(dImg.Created) { state.InSync = false if err := zkservice.UpdateServiceState(conn, &state); err != nil { glog.Warningf("Could not update service state %s for %s (%s) as out of sync: %s", state.ID, svc.Name, svc.ID, err) continue } } } } return nil }
func (this *ControlPlaneDao) getPoolBasedConnection(serviceID string) (client.Connection, error) { poolID, err := this.facade.GetPoolForService(datastore.Get(), serviceID) if err != nil { glog.V(2).Infof("ControlPlaneDao.GetPoolForService service=%+v err=%s", serviceID, err) return nil, err } poolBasedConn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(poolID)) if err != nil { glog.Errorf("Error in getting a connection based on pool %v: %v", poolID, err) return nil, err } return poolBasedConn, nil }
func (zk *zkf) UpdateService(service *service.Service) error { conn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(service.PoolID)) if err != nil { return err } if err := zkservice.UpdateService(conn, service); err != nil { return err } rootconn, err := zzk.GetLocalConnection("/") if err != nil { return err } return zkservice.UpdateServiceVhosts(rootconn, service) }
func (z *zkf) RemoveHost(host *host.Host) error { // acquire the service lock to prevent services from being scheduled // to that pool conn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(host.PoolID)) if err != nil { return err } cancel := make(chan interface{}) go func() { defer close(cancel) <-time.After(2 * time.Minute) }() // Ensure that the service's pool is locked for the duration finish := make(chan interface{}) defer close(finish) if err := zkservice.EnsureServiceLock(cancel, finish, conn); err != nil { return err } return zkhost.RemoveHost(cancel, conn, host.ID) }
func (this *ControlPlaneDao) StopRunningInstance(request dao.HostServiceRequest, unused *int) error { myHost, err := this.facade.GetHost(datastore.Get(), request.HostID) if err != nil { glog.Errorf("Unable to get host %v: %v", request.HostID, err) return err } if myHost == nil { return fmt.Errorf("Host %s does not exist", request.HostID) } poolBasedConn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(myHost.PoolID)) if err != nil { glog.Errorf("Error in getting a connection based on pool %v: %v", myHost.PoolID, err) return err } if err := zkservice.StopServiceInstance(poolBasedConn, request.HostID, request.ServiceStateID); err != nil { glog.Errorf("zkservice.StopServiceInstance failed (conn: %+v hostID: %v serviceStateID: %v): %v", poolBasedConn, request.HostID, request.ServiceStateID, err) return err } return nil }
func (this *ControlPlaneDao) Action(request dao.AttachRequest, unused *int) error { ctx := datastore.Get() svc, err := this.facade.GetService(ctx, request.Running.ServiceID) if err != nil { return err } var command []string if request.Command == "" { return fmt.Errorf("missing command") } if err := svc.EvaluateActionsTemplate(serviceGetter(ctx, this.facade), childFinder(ctx, this.facade), request.Running.InstanceID); err != nil { return err } action, ok := svc.Actions[request.Command] if !ok { return fmt.Errorf("action not found for service %s: %s", svc.ID, request.Command) } command = append([]string{action}, request.Args...) req := zkdocker.Action{ HostID: request.Running.HostID, DockerID: request.Running.DockerID, Command: command, } conn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(svc.PoolID)) if err != nil { return err } _, err = zkdocker.SendAction(conn, &req) return err }
func (s *scheduler) localSync(shutdown <-chan interface{}, rootConn client.Connection) { wait := time.After(0) retry: for { select { case <-wait: case <-shutdown: return } pools, err := s.GetResourcePools() if err != nil { glog.Errorf("Could not get resource pools: %s", err) wait = time.After(minWait) continue } else if err := zkservice.SyncResourcePools(rootConn, pools); err != nil { glog.Errorf("Could not do a local sync of resource pools: %s", err) wait = time.After(minWait) continue } for _, pool := range pools { conn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(pool.ID)) if err != nil { glog.Errorf("Could not get a pool-based connection for %s to zookeeper: %s", pool.ID, err) wait = time.After(minWait) continue retry } // Update the hosts if hosts, err := s.GetHostsByPool(pool.ID); err != nil { glog.Errorf("Could not get hosts in pool %s: %s", pool.ID, err) wait = time.After(minWait) continue retry } else if err := zkservice.SyncHosts(conn, hosts); err != nil { glog.Errorf("Could not do a local sync of hosts: %s", err) wait = time.After(minWait) continue retry } // Update the services if svcs, err := s.GetServicesByPool(pool.ID); err != nil { glog.Errorf("Could not get services: %s", err) wait = time.After(minWait) continue retry } else if zkservice.SyncServices(conn, svcs); err != nil { glog.Error("Could not do a local sync of services: %s", err) wait = time.After(minWait) continue retry } // Update Virtual IPs if err := zkvirtualips.SyncVirtualIPs(conn, pool.VirtualIPs); err != nil { glog.Errorf("Could not sync virtual ips for %s: %s", pool.ID, err) wait = time.After(minWait) continue retry } } wait = time.After(maxWait) } }
func (dfs *DistributedFilesystem) Restore(filename string) error { // fail if any services are running dfs.log("Checking running services") svcs, err := dfs.facade.GetServices(datastore.Get(), dao.ServiceRequest{}) if err != nil { glog.Errorf("Could not acquire the list of all services: %s", err) return err } for _, svc := range svcs { conn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(svc.PoolID)) if err != nil { glog.Errorf("Could not acquire connection to coordinator (%s): %s", svc.PoolID, err) return err } if states, err := zkservice.GetServiceStates(conn, svc.ID); err != nil { glog.Errorf("Could not look up running instances for %s (%s): %s", svc.Name, svc.ID, err) return err } else if running := len(states); running > 0 { err := fmt.Errorf("service %s (%s) has %d running instances", svc.Name, svc.ID, running) glog.Errorf("Cannot restore to %s: %s", filename, err) return err } } var reloadLogstashContainer bool defer func() { if reloadLogstashContainer { go facade.LogstashContainerReloader(datastore.Get(), dfs.facade) // don't block main thread } }() dirpath := filepath.Join(utils.BackupDir(dfs.varpath), "restore") if err := os.RemoveAll(dirpath); err != nil { glog.Errorf("Could not remove %s: %s", dirpath, err) return err } if err := mkdir(dirpath); err != nil { glog.Errorf("Could neither find nor create %s: %s", dirpath, err) return err } defer func() { if err := os.RemoveAll(dirpath); err != nil { glog.Warningf("Could not remove %s: %s", dirpath, err) } }() dfs.log("Extracting backup file %s", filename) if err := importTGZ(dirpath, filename); err != nil { glog.Errorf("Could not expand %s to %s: %s", filename, dirpath, err) return err } var metadata metadata if err := importJSON(filepath.Join(dirpath, meta), &metadata); err != nil { glog.Errorf("Could not import %s: %s", meta, err) return err } else if metadata.FSType != dfs.fsType { err = fmt.Errorf("this backup can only be run on %s", metadata.FSType) glog.Errorf("Could not extract backup %s: %s", filename, err) return err } var pools []pool.ResourcePool if err := importJSON(filepath.Join(dirpath, poolJSON), &pools); err != nil { glog.Errorf("Could not read resource pools from %s: %s", filename, err) return err } // restore the resource pools (and virtual ips) dfs.log("Loading resource pools") for _, pool := range pools { pool.DatabaseVersion = 0 glog.V(1).Infof("Restoring resource pool %s", pool.ID) var err error if err = dfs.facade.AddResourcePool(datastore.Get(), &pool); err == facade.ErrPoolExists { err = dfs.facade.UpdateResourcePool(datastore.Get(), &pool) } if err != nil { glog.Errorf("Could not restore resource pool %s: %s", pool.ID, err) return err } } dfs.log("Resource pool load successful") var hosts []host.Host if err := importJSON(filepath.Join(dirpath, hostJSON), &hosts); err != nil { glog.Errorf("Could not read hosts from %s: %s", filename, err) return err } // restore the hosts (add it if it doesn't exist; assume hosts don't need to be updated from backup) dfs.log("Loading static ips") for _, host := range hosts { host.DatabaseVersion = 0 glog.V(1).Infof("Restoring host %s (%s)", host.ID, host.IPAddr) if exists, err := dfs.facade.HasIP(datastore.Get(), host.PoolID, host.IPAddr); err != nil { glog.Errorf("Could not check IP %s for pool %s: %s", host.IPAddr, host.PoolID, err) return err } else if exists { glog.Warningf("Could not restore host %s (%s): ip already exists", host.ID, host.IPAddr) } else if err := dfs.facade.AddHost(datastore.Get(), &host); err != nil { glog.Errorf("Could not add host %s: %s", host.ID, err) return err } } dfs.log("Static ip load successful") var templates map[string]servicetemplate.ServiceTemplate if err := importJSON(filepath.Join(dirpath, templateJSON), &templates); err != nil { glog.Errorf("Could not read templates from %s: %s", filename, err) return err } // restore the service templates dfs.log("Loading service templates") for templateID, template := range templates { template.DatabaseVersion = 0 glog.V(1).Infof("Restoring service template %s", templateID) template.ID = templateID if err := dfs.facade.UpdateServiceTemplate(datastore.Get(), template); err != nil { glog.Errorf("Could not restore template %s: %s", templateID, err) return err } reloadLogstashContainer = true } dfs.log("Service template load successful") // Get the tenant of all the services to be restored snapshotFiles, err := ls(filepath.Join(dirpath, snapshotDir)) tenantIDs := make(map[string]struct{}) for _, f := range snapshotFiles { tenantIDs[strings.TrimSuffix(f, ".tgz")] = struct{}{} } // restore docker images dfs.log("Loading docker images") var images []imagemeta if err := importJSON(filepath.Join(dirpath, imageJSON), &images); err != nil { glog.Errorf("Could not read images from %s: %s", filename, err) return err } if err := dfs.importImages(filepath.Join(dirpath, imageDir), images, tenantIDs); err != nil { glog.Errorf("Could not import images from %s: %s", filename, err) return err } dfs.log("Docker image load successful") // restore snapshots glog.V(1).Infof("Restoring services and snapshots") for _, f := range snapshotFiles { dfs.log("Loading %s", f) if err := dfs.loadSnapshots(strings.TrimSuffix(f, ".tgz"), filepath.Join(dirpath, snapshotDir, f)); err != nil { glog.Errorf("Could not import snapshot from %s: %s", f, err) return err } dfs.log("Successfully loaded %s", f) } glog.Infof("Restore succeeded with fsType:%s from file:%s", dfs.fsType, filename) return nil }
func (d *daemon) startAgent() error { muxListener, err := createMuxListener() if err != nil { return err } mux, err := proxy.NewTCPMux(muxListener) if err != nil { return err } agentIP := options.OutboundIP if agentIP == "" { var err error agentIP, err = utils.GetIPAddress() if err != nil { glog.Fatalf("Failed to acquire ip address: %s", err) } } rpcPort := "0" parts := strings.Split(options.Listen, ":") if len(parts) > 1 { rpcPort = parts[1] } thisHost, err := host.Build(agentIP, rpcPort, "unknown") if err != nil { panic(err) } myHostID, err := utils.HostID() if err != nil { return fmt.Errorf("HostID failed: %v", err) } else if err := validation.ValidHostID(myHostID); err != nil { glog.Errorf("invalid hostid: %s", myHostID) } go func() { var poolID string for { poolID = func() string { glog.Infof("Trying to discover my pool...") var myHost *host.Host masterClient, err := master.NewClient(d.servicedEndpoint) if err != nil { glog.Errorf("master.NewClient failed (endpoint %+v) : %v", d.servicedEndpoint, err) return "" } defer masterClient.Close() myHost, err = masterClient.GetHost(myHostID) if err != nil { glog.Warningf("masterClient.GetHost %v failed: %v (has this host been added?)", myHostID, err) return "" } poolID = myHost.PoolID glog.Infof(" My PoolID: %v", poolID) //send updated host info updatedHost, err := host.UpdateHostInfo(*myHost) if err != nil { glog.Infof("Could not send updated host information: %v", err) return poolID } err = masterClient.UpdateHost(updatedHost) if err != nil { glog.Warningf("Could not update host information: %v", err) return poolID } glog.V(2).Infof("Sent updated host info %#v", updatedHost) return poolID }() if poolID != "" { break } select { case <-d.shutdown: return case <-time.After(5 * time.Second): continue } } thisHost.PoolID = poolID basePoolPath := "/pools/" + poolID dsn := coordzk.NewDSN(options.Zookeepers, time.Second*15).String() glog.Infof("zookeeper dsn: %s", dsn) zClient, err := coordclient.New("zookeeper", dsn, basePoolPath, nil) if err != nil { glog.Errorf("failed create a new coordclient: %v", err) } zzk.InitializeLocalClient(zClient) poolBasedConn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(poolID)) if err != nil { glog.Errorf("Error in getting a connection based on pool %v: %v", poolID, err) } if options.NFSClient != "0" { nfsClient, err := storage.NewClient(thisHost, path.Join(options.VarPath, "volumes")) if err != nil { glog.Fatalf("could not create an NFS client: %s", err) } go func() { <-d.shutdown glog.Infof("shutting down storage client") nfsClient.Close() }() //loop and log waiting for Storage Leader nfsDone := make(chan struct{}) go func() { defer close(nfsDone) nfsClient.Wait() }() //wait indefinitely(?) for storage to work before starting glog.Info("Waiting for Storage Leader") nfsUp := false for !nfsUp { select { case <-nfsDone: nfsUp = true glog.Info("Found Storage Leader") break case <-time.After(time.Second * 30): glog.Info("Waiting for Storage Leader, will not be available for running services. ") continue } } } else { glog.Info("NFS Client disabled") } agentOptions := node.AgentOptions{ PoolID: thisHost.PoolID, Master: options.Endpoint, UIPort: options.UIPort, RPCPort: options.RPCPort, DockerDNS: options.DockerDNS, VarPath: options.VarPath, Mount: options.Mount, FSType: options.FSType, Zookeepers: options.Zookeepers, Mux: mux, UseTLS: options.TLS, DockerRegistry: dockerRegistry, MaxContainerAge: time.Duration(int(time.Second) * options.MaxContainerAge), VirtualAddressSubnet: options.VirtualAddressSubnet, } // creates a zClient that is not pool based! hostAgent, err := node.NewHostAgent(agentOptions) d.hostAgent = hostAgent d.waitGroup.Add(1) go func() { hostAgent.Start(d.shutdown) glog.Info("Host Agent has shutdown") d.waitGroup.Done() }() // register the API glog.V(0).Infoln("registering ControlPlaneAgent service") if err = d.rpcServer.RegisterName("ControlPlaneAgent", hostAgent); err != nil { glog.Fatalf("could not register ControlPlaneAgent RPC server: %v", err) } if options.ReportStats { statsdest := fmt.Sprintf("http://%s/api/metrics/store", options.HostStats) statsduration := time.Duration(options.StatsPeriod) * time.Second glog.V(1).Infoln("Staring container statistics reporter") statsReporter, err := stats.NewStatsReporter(statsdest, statsduration, poolBasedConn) if err != nil { glog.Errorf("Error kicking off stats reporter %v", err) } else { go func() { defer statsReporter.Close() <-d.shutdown }() } } }() glog.Infof("agent start staticips: %v [%d]", d.staticIPs, len(d.staticIPs)) if err = d.rpcServer.RegisterName("Agent", agent.NewServer(d.staticIPs)); err != nil { glog.Fatalf("could not register Agent RPC server: %v", err) } if err != nil { glog.Fatalf("Could not start ControlPlane agent: %v", err) } // TODO: Integrate this server into the rpc server, or something. // Currently its only use is for command execution. go func() { sio := shell.NewProcessExecutorServer(options.Endpoint, dockerRegistry) http.ListenAndServe(":50000", sio) }() return nil }
// main loop of the HostAgent func (a *HostAgent) Start(shutdown <-chan interface{}) { glog.Info("Starting HostAgent") var wg sync.WaitGroup defer func() { glog.Info("Waiting for agent routines...") wg.Wait() glog.Info("Agent routines ended") }() wg.Add(1) go func() { glog.Info("reapOldContainersLoop starting") a.reapOldContainersLoop(time.Minute, shutdown) glog.Info("reapOldContainersLoop Done") wg.Done() }() // Increase the number of maximal tracked connections for iptables maxConnections := "655360" if cnxns := strings.TrimSpace(os.Getenv("SERVICED_IPTABLES_MAX_CONNECTIONS")); cnxns != "" { maxConnections = cnxns } glog.Infof("Set sysctl maximum tracked connections for iptables to %s", maxConnections) utils.SetSysctl("net.netfilter.nf_conntrack_max", maxConnections) // Clean up any extant iptables chain, just in case a.servicedChain.Remove() // Add our chain for assigned IP rules if err := a.servicedChain.Inject(); err != nil { glog.Errorf("Error creating SERVICED iptables chain (%v)", err) } // Clean up when we're done defer a.servicedChain.Remove() for { // handle shutdown if we are waiting for a zk connection var conn coordclient.Connection select { case conn = <-zzk.Connect(zzk.GeneratePoolPath(a.poolID), zzk.GetLocalConnection): case <-shutdown: return } if conn == nil { continue } glog.Info("Got a connected client") // watch virtual IP zookeeper nodes virtualIPListener := virtualips.NewVirtualIPListener(a, a.hostID) // watch docker action nodes actionListener := zkdocker.NewActionListener(a, a.hostID) // watch the host state nodes // this blocks until // 1) has a connection // 2) its node is registered // 3) receieves signal to shutdown or breaks hsListener := zkservice.NewHostStateListener(a, a.hostID) glog.Infof("Host Agent successfully started") zzk.Start(shutdown, conn, hsListener, virtualIPListener, actionListener) select { case <-shutdown: glog.Infof("Host Agent shutting down") return default: glog.Infof("Host Agent restarting") } } }
// getEndpoints builds exportedEndpoints and importedEndpoints func (c *Controller) getEndpoints(service *service.Service) error { var err error c.zkInfo, err = getAgentZkInfo(c.options.ServicedEndpoint) if err != nil { glog.Errorf("Invalid zk info: %v", err) return err //ErrInvalidZkInfo } glog.Infof(" c.zkInfo: %+v", c.zkInfo) // endpoints are created at the root level (not pool aware) rootBasePath := "" zClient, err := coordclient.New("zookeeper", c.zkInfo.ZkDSN, rootBasePath, nil) if err != nil { glog.Errorf("failed create a new coordclient: %v", err) return err } zzk.InitializeLocalClient(zClient) // get zookeeper connection conn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(service.PoolID)) if err != nil { return fmt.Errorf("getEndpoints zzk.GetLocalConnection failed: %v", err) } if os.Getenv("SERVICED_IS_SERVICE_SHELL") == "true" { // this is not a running service, i.e. serviced shell/run if hostname, err := os.Hostname(); err != nil { glog.Errorf("could not get hostname: %s", err) return fmt.Errorf("getEndpoints failed could not get hostname: %v", err) } else { c.dockerID = hostname } // TODO: deal with exports in the future when there is a use case for it sstate, err := servicestate.BuildFromService(service, c.hostID) if err != nil { return fmt.Errorf("Unable to create temporary service state") } // initialize importedEndpoints c.importedEndpoints, err = buildImportedEndpoints(conn, c.tenantID, sstate) if err != nil { glog.Errorf("Invalid ImportedEndpoints") return ErrInvalidImportedEndpoints } } else { // get service state glog.Infof("getting service state: %s %v", c.options.Service.ID, c.options.Service.InstanceID) sstate, err := getServiceState(conn, c.options.Service.ID, c.options.Service.InstanceID) if err != nil { return fmt.Errorf("getEndpoints getServiceState failed: %v", err) } c.dockerID = sstate.DockerID // keep a copy of the service EndPoint exports c.exportedEndpoints, err = buildExportedEndpoints(conn, c.tenantID, sstate) if err != nil { glog.Errorf("Invalid ExportedEndpoints") return ErrInvalidExportedEndpoints } // initialize importedEndpoints c.importedEndpoints, err = buildImportedEndpoints(conn, c.tenantID, sstate) if err != nil { glog.Errorf("Invalid ImportedEndpoints") return ErrInvalidImportedEndpoints } } return nil }