// sync synchronizes the number of running instances for this service func (l *ServiceListener) sync(svc *service.Service, rss []dao.RunningService) bool { // sort running services by instance ID, so that you stop instances by the // lowest instance ID first and start instances with the greatest instance // ID last. sort.Sort(instances(rss)) // resume any paused running services for _, state := range rss { // resumeInstance updates the service state ONLY if it has a PAUSED DesiredState if err := resumeInstance(l.conn, state.HostID, state.ID); err != nil { glog.Warningf("Could not resume paused service instance %s (%s) for service %s on host %s: %s", state.ID, state.Name, state.ServiceID, state.HostID, err) } } // if the service has a change option for restart all on changed, stop all // instances and wait for the nodes to stop. Once all service instances // have been stopped (deleted), then go ahead and start the instances back // up. if count := len(rss); count > 0 && count != svc.Instances && utils.StringInSlice("restartAllOnInstanceChanged", svc.ChangeOptions) { svc.Instances = 0 // NOTE: this will not update the node in zk or elastic } // netInstances is the difference between the number of instances that // should be running, as described by the service from the number of // instances that are currently running netInstances := svc.Instances - len(rss) if netInstances > 0 { // If the service lock is enabled, do not try to start any service instances // This will prevent the retry restart from activating if locked, err := IsServiceLocked(l.conn); err != nil { glog.Errorf("Could not check service lock: %s", err) return true } else if locked { glog.Warningf("Could not start %d instances; service %s (%s) is locked", netInstances, svc.Name, svc.ID) return true } // the number of running instances is *less* than the number of // instances that need to be running, so schedule instances to start glog.V(2).Infof("Starting %d instances of service %s (%s)", netInstances, svc.Name, svc.ID) var ( last = 0 instanceIDs = make([]int, netInstances) ) // Find which instances IDs are being unused and add those instances // first. All SERVICES must have an instance ID of 0, if instance ID // zero dies for whatever reason, then the service must schedule // another 0-id instance to take its place. j := 0 for i := range instanceIDs { for j < len(rss) && last == rss[j].InstanceID { // if instance ID exists, then keep searching the list for // the next unique instance ID last += 1 j += 1 } instanceIDs[i] = last last += 1 } return netInstances == l.start(svc, instanceIDs) } else if netInstances = -netInstances; netInstances > 0 { // the number of running instances is *greater* than the number of // instances that need to be running, so schedule instances to stop of // the highest instance IDs. glog.V(2).Infof("Stopping %d of %d instances of service %s (%s)", netInstances, len(rss), svc.Name, svc.ID) l.stop(rss[svc.Instances:]) } return true }