// StopServiceInstance stops a host state instance func StopServiceInstance(conn client.Connection, hostID, stateID string) error { // verify that the host is active var isActive bool hostIDs, err := GetActiveHosts(conn) if err != nil { glog.Warningf("Could not verify if host %s is active: %s", hostID, err) isActive = false } else { for _, hid := range hostIDs { if isActive = hid == hostID; isActive { break } } } if isActive { // try to stop the instance nicely return updateInstance(conn, hostID, stateID, func(hsdata *HostState, _ *ss.ServiceState) { glog.V(2).Infof("Stopping service instance via %s host %s", stateID, hostID) hsdata.DesiredState = int(service.SVCStop) }) } else { // if the host isn't active, then remove the instance var hs HostState if err := conn.Get(hostpath(hostID, stateID), &hs); err != nil { glog.Errorf("Could not look up host instance %s on host %s: %s", stateID, hostID, err) return err } return removeInstance(conn, hs.ServiceID, hs.HostID, hs.ServiceStateID) } }
//Set node to the key in registry. Returns the path of the node in the registry func (r *registryType) setItem(conn client.Connection, key string, nodeID string, node client.Node) (string, error) { if err := r.ensureKey(conn, key); err != nil { return "", err } //TODO: make ephemeral path := r.getPath(key, nodeID) exists, err := zzk.PathExists(conn, path) if err != nil { return "", err } if exists { glog.V(3).Infof("Set to %s: %#v", path, node) epn := EndpointNode{} if err := conn.Get(path, &epn); err != nil { return "", err } node.SetVersion(epn.Version()) if err := conn.Set(path, node); err != nil { return "", err } } else { if addPath, err := r.addItem(conn, key, nodeID, node); err != nil { return "", err } else { path = addPath } glog.V(3).Infof("Add to %s: %#v", path, node) } return path, nil }
// LoadRunningServicesByHost returns a slice of RunningServices given a host(s) func LoadRunningServicesByHost(conn client.Connection, hostIDs ...string) ([]dao.RunningService, error) { var rss []dao.RunningService = make([]dao.RunningService, 0) for _, hostID := range hostIDs { if exists, err := zzk.PathExists(conn, hostpath(hostID)); err != nil { return nil, err } else if !exists { continue } stateIDs, err := conn.Children(hostpath(hostID)) if err != nil { return nil, err } for _, ssID := range stateIDs { var hs HostState if err := conn.Get(hostpath(hostID, ssID), &hs); err != nil { return nil, err } rs, err := LoadRunningService(conn, hs.ServiceID, hs.ServiceStateID) if err != nil { return nil, err } rss = append(rss, *rs) } } return rss, nil }
// GetItem gets EndpointNode at the given path. func (ar *EndpointRegistry) GetItem(conn client.Connection, path string) (*EndpointNode, error) { var ep EndpointNode if err := conn.Get(path, &ep); err != nil { glog.Errorf("Could not get EndpointNode at %s: %s", path, err) return nil, err } return &ep, nil }
//GetItem gets VhostEndpoint at the given path. func (vr *VhostRegistry) GetItem(conn client.Connection, path string) (*VhostEndpoint, error) { var vep VhostEndpoint if err := conn.Get(path, &vep); err != nil { glog.Infof("Could not get vhost endpoint at %s: %s", path, err) return nil, err } return &vep, nil }
func UpdateResourcePool(conn client.Connection, pool *pool.ResourcePool) error { var node PoolNode if err := conn.Get(poolpath(pool.ID), &node); err != nil { return err } node.ResourcePool = pool return conn.Set(poolpath(pool.ID), &node) }
// StopService schedules a service to stop func StopService(conn client.Connection, serviceID string) error { glog.Infof("Scheduling service %s to stop", serviceID) var node ServiceNode path := servicepath(serviceID) if err := conn.Get(path, &node); err != nil { return err } node.Service.DesiredState = int(service.SVCStop) return conn.Set(path, &node) }
// getStatus computes the status of a service state func getStatus(conn client.Connection, state *servicestate.ServiceState) (dao.Status, error) { var status dao.Status // Set the state based on the service state object if !state.IsRunning() { status = dao.Stopped } else if state.IsPaused() { status = dao.Paused } else { status = dao.Running } // Set the state based on the host state object var hostState HostState if err := conn.Get(hostpath(state.HostID, state.ID), &hostState); err != nil && err != client.ErrNoNode { return dao.Status{}, err } if hostState.DesiredState == int(service.SVCStop) { switch status { case dao.Running, dao.Paused: status = dao.Stopping case dao.Stopped: // pass default: return dao.Status{}, ErrUnknownState } } else if hostState.DesiredState == int(service.SVCRun) { switch status { case dao.Stopped: status = dao.Starting case dao.Paused: status = dao.Resuming case dao.Running: // pass default: return dao.Status{}, ErrUnknownState } } else if hostState.DesiredState == int(service.SVCPause) { switch status { case dao.Running: status = dao.Pausing case dao.Paused, dao.Stopped: // pass default: return dao.Status{}, ErrUnknownState } } else { return dao.Status{}, ErrUnknownState } return status, nil }
// LoadRunningService returns a RunningService object given a coordinator connection func LoadRunningService(conn client.Connection, serviceID, ssID string) (*dao.RunningService, error) { var service ServiceNode if err := conn.Get(servicepath(serviceID), &service); err != nil { return nil, err } var state ServiceStateNode if err := conn.Get(servicepath(serviceID, ssID), &state); err != nil { return nil, err } return NewRunningService(service.Service, state.ServiceState) }
// UpdateService updates a service node if it exists, otherwise creates it func UpdateService(conn client.Connection, svc *service.Service) error { var node ServiceNode spath := servicepath(svc.ID) // For some reason you can't just create the node with the service data // already set. Trust me, I tried. It was very aggravating. if err := conn.Get(spath, &node); err != nil { if err := conn.Create(spath, &node); err != nil { glog.Errorf("Error trying to create node at %s: %s", spath, err) } } node.Service = svc return conn.Set(spath, &node) }
// UpdateServiceVhost updates a service vhost node if it exists, otherwise creates it func UpdateServiceVhost(conn client.Connection, serviceID, vhostname string) error { glog.V(2).Infof("UpdateServiceVhost serviceID:%s vhostname:%s", serviceID, vhostname) var node ServiceVhostNode spath := servicevhostpath(serviceID, vhostname) // For some reason you can't just create the node with the service data // already set. Trust me, I tried. It was very aggravating. if err := conn.Get(spath, &node); err != nil { if err := conn.Create(spath, &node); err != nil { glog.Errorf("Error trying to create node at %s: %s", spath, err) } } node.ServiceID = serviceID node.Vhost = vhostname glog.V(2).Infof("Adding service vhost at path:%s %+v", spath, node) return conn.Set(spath, &node) }
// updateInstance updates the service state and host instances func updateInstance(conn client.Connection, hostID, stateID string, mutate func(*HostState, *ss.ServiceState)) error { glog.V(2).Infof("Updating instance %s", stateID) // do not lock if parent lock does not exist if exists, err := conn.Exists(path.Join(zkInstanceLock, stateID)); err != nil && err != client.ErrNoNode { glog.Errorf("Could not check for lock on instance %s: %s", stateID, err) return err } else if !exists { glog.Errorf("Lock not found for instance %s", stateID) return ErrLockNotFound } lock := newInstanceLock(conn, stateID) if err := lock.Lock(); err != nil { glog.Errorf("Could not set lock for service instance %s on host %s: %s", stateID, hostID, err) return err } defer lock.Unlock() glog.V(2).Infof("Acquired lock for instance %s", stateID) hpath := hostpath(hostID, stateID) var hsdata HostState if err := conn.Get(hpath, &hsdata); err != nil { glog.Errorf("Could not get instance %s for host %s: %s", stateID, hostID, err) return err } serviceID := hsdata.ServiceID spath := servicepath(serviceID, stateID) var ssnode ServiceStateNode if err := conn.Get(spath, &ssnode); err != nil { glog.Errorf("Could not get instance %s for service %s: %s", stateID, serviceID, err) return err } mutate(&hsdata, ssnode.ServiceState) if err := conn.Set(hpath, &hsdata); err != nil { glog.Errorf("Could not update instance %s for host %s: %s", stateID, hostID, err) return err } if err := conn.Set(spath, &ssnode); err != nil { glog.Errorf("Could not update instance %s for service %s: %s", stateID, serviceID, err) return err } glog.V(2).Infof("Releasing lock for instance %s", stateID) return nil }
// GetVHostKeyChildren gets the ephemeral nodes of a vhost key (example of a key is 'hbase') func (vr *VhostRegistry) GetVHostKeyChildren(conn client.Connection, vhostKey string) ([]VhostEndpoint, error) { var vhostEphemeralNodes []VhostEndpoint vhostChildren, err := conn.Children(vhostPath(vhostKey)) if err == client.ErrNoNode { return vhostEphemeralNodes, nil } if err != nil { return vhostEphemeralNodes, err } for _, vhostChild := range vhostChildren { var vep VhostEndpoint if err := conn.Get(vhostPath(vhostKey, vhostChild), &vep); err != nil { return vhostEphemeralNodes, err } vhostEphemeralNodes = append(vhostEphemeralNodes, vep) } return vhostEphemeralNodes, nil }
// getActiveRemoteHosts returns a slice of activeClientIPs func getActiveRemoteHosts(conn client.Connection, storageClientsPath string, monitorInterval time.Duration) []string { // clients is not full list of remotes when called from server.go - retrieve our own list from zookeeper var err error remoteIPs, err := getAllRemoteHostsFromZookeeper(conn, storageClientsPath) if err != nil { return []string{} } glog.V(2).Infof("DFS remote IPs: %+v", remoteIPs) // determine active hosts var activeClientIPs []string now := time.Now() for _, clnt := range remoteIPs { cp := path.Join(storageClientsPath, clnt) glog.V(2).Infof("retrieving info for DFS for remoteIP %s at zookeeper node %s", clnt, cp) hnode := StorageClientHostNode{} err := conn.Get(cp, &hnode) if err != nil && err != client.ErrEmptyNode { glog.Errorf("DFS could not get remote host zookeeper node %s: %s", cp, err) continue } if hnode.Host.UpdatedAt.IsZero() { glog.Infof("DFS not monitoring non-active host %+v: HostID:%v UpdatedAt:%+v", clnt, hnode.ID, hnode.UpdatedAt) continue } elapsed := now.Sub(hnode.Host.UpdatedAt) glog.V(2).Infof("retrieved info for DFS for remoteIP %s UpdatedAt:%s elapsed:%s monitorInterval:%s", clnt, hnode.Host.UpdatedAt, elapsed, monitorInterval) if elapsed > monitorInterval { glog.Infof("DFS not monitoring non-active host %+v: HostID:%v UpdatedAt:%+v lastseen:%s ago", clnt, hnode.ID, hnode.UpdatedAt, elapsed) continue } activeClientIPs = append(activeClientIPs, clnt) } glog.V(2).Infof("DFS remote active IPs: %+v", activeClientIPs) return activeClientIPs }
// WaitService waits for a particular service's instances to reach a particular state func WaitService(shutdown <-chan interface{}, conn client.Connection, serviceID string, desiredState service.DesiredState) error { for { // Get the list of service states stateIDs, event, err := conn.ChildrenW(servicepath(serviceID)) if err != nil { return err } count := len(stateIDs) switch desiredState { case service.SVCStop: // if there are no instances, then the service is stopped if count == 0 { return nil } case service.SVCRun, service.SVCRestart: // figure out which service instances are actively running and decrement non-running instances for _, stateID := range stateIDs { var state ServiceStateNode if err := conn.Get(servicepath(serviceID, stateID), &state); err == client.ErrNoNode { // if the instance does not exist, then that instance is no running count-- } else if err != nil { return err } else if !state.IsRunning() { count-- } } // Get the service node and verify that the number of running instances meets or exceeds the number // of instances required by the service var service ServiceNode if err := conn.Get(servicepath(serviceID), &service); err != nil { return err } else if count >= service.Instances { return nil } case service.SVCPause: // figure out which services have stopped or paused for _, stateID := range stateIDs { var state ServiceStateNode if err := conn.Get(servicepath(serviceID, stateID), &state); err == client.ErrNoNode { // if the instance does not exist, then it is not runng (so it is paused) count-- } else if err != nil { return err } else if state.IsPaused() { count-- } } // no instances should be running for all instances to be considered paused if count == 0 { return nil } default: return fmt.Errorf("invalid desired state") } if len(stateIDs) > 0 { // wait for each instance to reach the desired state for _, stateID := range stateIDs { if err := wait(shutdown, conn, serviceID, stateID, desiredState); err != nil { return err } } select { case <-shutdown: return zzk.ErrShutdown default: } } else { // otherwise, wait for a change in the number of children select { case <-event: case <-shutdown: return zzk.ErrShutdown } } } }
// GetServiceState gets a service state func GetServiceState(conn client.Connection, state *servicestate.ServiceState, serviceID string, stateID string) error { return conn.Get(servicepath(serviceID, stateID), &ServiceStateNode{ServiceState: state}) }