// Spawn listens for changes in the host state and manages running instances func (l *HostStateListener) Spawn(shutdown <-chan interface{}, stateID string) { var processDone <-chan struct{} // Let's have exclusive access to this node lock := newInstanceLock(l.conn, stateID) if err := lock.Lock(); err != nil { glog.Errorf("Could not lock service instance %s on host %s: %s", stateID, l.hostID, err) return } // Get the HostState node var hs HostState if err := l.conn.Get(hostpath(l.hostID, stateID), &hs); err != nil { glog.Errorf("Could not load host instance %s on host %s: %s", stateID, l.hostID, err) l.conn.Delete(hostpath(l.hostID, stateID)) lock.Unlock() return } defer removeInstance(l.conn, hs.ServiceID, hs.HostID, hs.ServiceStateID) // Get the ServiceState node var ss servicestate.ServiceState if err := l.conn.Get(servicepath(hs.ServiceID, hs.ServiceStateID), &ServiceStateNode{ServiceState: &ss}); err != nil { glog.Errorf("Could not load service instance %s for service %s on host %s: %s", hs.ServiceStateID, hs.ServiceID, hs.HostID, err) lock.Unlock() return } defer l.stopInstance(processDone, &ss) lock.Unlock() for { // Get the HostState instance hsEvt, err := l.conn.GetW(hostpath(l.hostID, stateID), &hs) if err != nil { glog.Errorf("Could not load host instance %s on host %s: %s", stateID, l.hostID, err) return } // Get the ServiceState instance ssEvt, err := l.conn.GetW(servicepath(hs.ServiceID, stateID), &ServiceStateNode{ServiceState: &ss}) if err != nil { glog.Errorf("Could not load service state %s for service %s on host %s: %s", stateID, hs.ServiceID, l.hostID, err) return } // Get the service var svc service.Service if err := l.conn.Get(servicepath(hs.ServiceID), &ServiceNode{Service: &svc}); err != nil { glog.Errorf("Could not load service %s for service instance %s on host %s: %s", hs.ServiceID, stateID, l.hostID, err) return } // Process the desired state glog.V(2).Infof("Processing %s (%s); Desired State: %d", svc.Name, svc.ID, hs.DesiredState) switch service.DesiredState(hs.DesiredState) { case service.SVCRun: var err error if !ss.IsRunning() { // process has stopped glog.Infof("Starting a new instance for %s (%s): %s", svc.Name, svc.ID, stateID) if processDone, err = l.startInstance(&svc, &ss); err != nil { glog.Errorf("Could not start service instance %s for service %s on host %s: %s", hs.ServiceStateID, hs.ServiceID, hs.HostID, err) return } } else if processDone == nil { glog.Infof("Attaching to instance %s for %s (%s) via %s", stateID, svc.Name, svc.ID, ss.DockerID) if processDone, err = l.attachInstance(&svc, &ss); err != nil { glog.Errorf("Could not start service instance %s for service %s on host %s: %s", hs.ServiceStateID, hs.ServiceID, hs.HostID, err) return } } if ss.IsPaused() { glog.Infof("Resuming paused instance %s for service %s (%s)", stateID, svc.Name, svc.ID) if err := l.resumeInstance(&svc, &ss); err != nil { glog.Errorf("Could not resume paused instance %s for service %s (%s): %s", stateID, svc.Name, svc.ID, err) return } } case service.SVCPause: if !ss.IsPaused() { if err := l.pauseInstance(&svc, &ss); err != nil { glog.Errorf("Could not pause instance %s for service %s (%s): %s", stateID, svc.Name, svc.ID, err) return } } case service.SVCStop: return default: glog.V(2).Infof("Unhandled state (%d) of instance %s for service %s (%s)", hs.DesiredState, stateID, svc.Name, svc.ID, err) } select { case <-processDone: glog.V(2).Infof("Process ended for instance %s for service %s (%s)", stateID, svc.Name, svc.ID) case e := <-hsEvt: glog.V(3).Infof("Host instance %s for service %s (%s) received an event: %+v", stateID, svc.Name, svc.ID, e) if e.Type == client.EventNodeDeleted { return } case e := <-ssEvt: glog.V(3).Infof("Service instance %s for service %s (%s) received an event: %+v", stateID, svc.Name, svc.ID, e) if e.Type == client.EventNodeDeleted { return } case <-shutdown: glog.V(2).Infof("Host instance %s for service %s (%s) received signal to shutdown", stateID, svc.Name, svc.ID) return } } }
// Spawn watches a service and syncs the number of running instances func (l *ServiceListener) Spawn(shutdown <-chan interface{}, serviceID string) { // CC-1050: create the lock path for the service slockpath := path.Join(zkStateLock, serviceID) l.conn.CreateDir(slockpath) defer l.conn.Delete(slockpath) for { var retry <-chan time.Time var err error // set up the global lock var glock <-chan client.Event if exists, err := l.conn.Exists(zkServiceLock); err != nil && err != client.ErrNoNode { glog.Errorf("Could not monitor service lock: %s", err) return } else if exists { if _, glock, err = l.conn.ChildrenW(zkServiceLock); err != nil { glog.Errorf("Could not monitor service lock: %s", err) return } } // CC-1050: set up the service lock var slock <-chan client.Event if _, slock, err = l.conn.ChildrenW(slockpath); err != nil { glog.Errorf("Could not monitor state lock for service %s; %s", serviceID, err) return } var svc service.Service serviceEvent, err := l.conn.GetW(l.GetPath(serviceID), &ServiceNode{Service: &svc}) if err != nil { glog.Errorf("Could not load service %s: %s", serviceID, err) return } stateIDs, stateEvent, err := l.conn.ChildrenW(l.GetPath(serviceID)) if err != nil { glog.Errorf("Could not load service states for %s: %s", serviceID, err) return } rss, err := l.getServiceStates(&svc, stateIDs) if err != nil { glog.Warningf("Could not load service states for %s: %s", serviceID, err) retry = time.After(retryTimeout) } else { // Should the service be running at all? switch service.DesiredState(svc.DesiredState) { case service.SVCStop: l.stop(rss) case service.SVCRun: if !l.sync(&svc, rss) { retry = time.After(retryTimeout) } case service.SVCPause: l.pause(rss) default: glog.Warningf("Unexpected desired state %d for service %s (%s)", svc.DesiredState, svc.Name, svc.ID) } } glog.V(2).Infof("Service %s (%s) waiting for event", svc.Name, svc.ID) select { case <-glock: // passthrough glog.V(3).Infof("Receieved a global lock event, resyncing") case <-slock: // passthrough glog.V(3).Infof("Receieved a service lock event, resyncing") case e := <-serviceEvent: if e.Type == client.EventNodeDeleted { glog.V(2).Infof("Shutting down service %s (%s) due to node delete", svc.Name, svc.ID) l.stop(rss) return } glog.V(2).Infof("Service %s (%s) received event: %v", svc.Name, svc.ID, e) case e := <-stateEvent: if e.Type == client.EventNodeDeleted { glog.V(2).Infof("Shutting down service %s (%s) due to node delete", svc.Name, svc.ID) l.stop(rss) return } glog.V(2).Infof("Service %s (%s) received event: %v", svc.Name, svc.ID, e) case <-retry: glog.Infof("Re-syncing service %s (%s)", svc.Name, svc.ID) case <-shutdown: glog.V(2).Infof("Leader stopping watch for %s (%s)", svc.Name, svc.ID) return } } }
// serviced service status func (c *ServicedCli) cmdServiceStatus(ctx *cli.Context) { var services []service.Service if len(ctx.Args()) > 0 { svc, err := c.searchForService(ctx.Args()[0]) if err != nil { fmt.Fprintln(os.Stderr, err) return } else if svc == nil { fmt.Fprintln(os.Stderr, "service not found") return } services = []service.Service{*svc} // ensure that parent services are in services for _, s := range services { parentID := s.ParentServiceID for parentID != "" { svc, err := c.driver.GetService(parentID) if err != nil || svc == nil { fmt.Fprintln(os.Stderr, "unable to retrieve service for id:%s %s", parentID, err) return } services = append(services, *svc) parentID = svc.ParentServiceID } } } else { var err error services, err = c.driver.GetServices() if err != nil { fmt.Fprintln(os.Stderr, err) return } else if services == nil || len(services) == 0 { fmt.Fprintln(os.Stderr, "no services found") return } } hosts, err := c.driver.GetHosts() if err != nil { fmt.Fprintln(os.Stderr, err) return } hostmap := make(map[string]host.Host) for _, host := range hosts { hostmap[host.ID] = host } lines := make(map[string]map[string]string) for _, svc := range services { glog.V(2).Infof("Getting service status for %s %s", svc.ID, svc.Name) statemap, err := c.driver.GetServiceStatus(svc.ID) if err != nil { fmt.Fprintln(os.Stderr, err) return } iid := svc.ID lines[iid] = map[string]string{ "ID": svc.ID, "ServiceID": svc.ID, "Name": svc.Name, "ParentID": svc.ParentServiceID, } if statemap == nil || len(statemap) == 0 { if svc.Instances > 0 { switch service.DesiredState(svc.DesiredState) { case service.SVCRun: lines[iid]["Status"] = dao.Scheduled.String() case service.SVCPause: lines[iid]["Status"] = dao.Paused.String() case service.SVCStop: lines[iid]["Status"] = dao.Stopped.String() } } } else { if svc.Instances > 1 { delete(lines, iid) } for _, svcstatus := range statemap { if svc.Instances > 1 { iid = fmt.Sprintf("%s/%d", svc.ID, svcstatus.State.InstanceID) lines[iid] = map[string]string{ "ID": iid, "ServiceID": svc.ID, "Name": fmt.Sprintf("%s/%d", svc.Name, svcstatus.State.InstanceID), "ParentID": svc.ParentServiceID, } } lines[iid]["Hostname"] = hostmap[svcstatus.State.HostID].Name lines[iid]["DockerID"] = fmt.Sprintf("%.12s", svcstatus.State.DockerID) lines[iid]["Uptime"] = svcstatus.State.Uptime().String() lines[iid]["Status"] = svcstatus.Status.String() insync := "Y" if !svcstatus.State.InSync { insync = "N" } lines[iid]["InSync"] = insync } } } childMap := make(map[string][]string) top := make([]string, 0) for _, line := range lines { children := make([]string, 0) for _, cline := range lines { if cline["ParentID"] == line["ID"] { children = append(children, cline["ID"]) } } if len(children) > 0 { childMap[line["ID"]] = children } if line["ParentID"] == "" { top = append(top, line["ID"]) } } cmdSetTreeCharset(ctx) childMap[""] = top t := NewTable("NAME,ID,STATUS,UPTIME,HOST,IN_SYNC,DOCKER_ID") var addRows func(string) addRows = func(root string) { rowids := childMap[root] if len(rowids) > 0 { sort.Strings(rowids) t.IndentRow() defer t.DedentRow() for _, rowid := range rowids { row := lines[rowid] t.AddRow(map[string]interface{}{ "NAME": row["Name"], "ID": row["ID"], "STATUS": row["Status"], "UPTIME": row["Uptime"], "HOST": row["Hostname"], "IN_SYNC": row["InSync"], "DOCKER_ID": row["DockerID"], }) addRows(row["ID"]) } } } addRows("") t.Padding = 6 t.Print() return }