func MonitorResourcePool(shutdown <-chan interface{}, conn client.Connection, poolID string) <-chan *pool.ResourcePool { monitor := make(chan *pool.ResourcePool) go func() { defer close(monitor) if err := zzk.Ready(shutdown, conn, poolpath(poolID)); err != nil { glog.V(2).Infof("Could not watch pool %s: %s", poolID, err) return } for { var node PoolNode event, err := conn.GetW(poolpath(poolID), &node) if err != nil { glog.V(2).Infof("Could not get pool %s: %s", poolID, err) return } select { case monitor <- node.ResourcePool: case <-shutdown: return } select { case <-event: case <-shutdown: return } } }() return monitor }
// wait waits for an individual service state to reach its desired state func wait(shutdown <-chan interface{}, conn client.Connection, serviceID, stateID string, dstate service.DesiredState) error { for { var node ServiceStateNode event, err := conn.GetW(servicepath(serviceID, stateID), &node) if err == client.ErrNoNode { // if the node no longer exists, then there is nothing to watch, so we are done return nil } else if err != nil { glog.Errorf("Got an error while looking for %s (%s): %s", stateID, serviceID, err) return err } switch dstate { case service.SVCStop: // pass through, because the node needs to be deleted to be considered Stopped case service.SVCRun, service.SVCRestart: if node.IsRunning() { // instance reached the desired state return nil } case service.SVCPause: if node.IsPaused() { // instance reached the desired state return nil } } // wait for something to change on the node or shutdown select { case <-event: case <-shutdown: return zzk.ErrShutdown } } }
// Ready waits for a node to be available for watching func Ready(shutdown <-chan interface{}, conn client.Connection, p string) error { if exists, err := PathExists(conn, p); err != nil { return err } else if exists { return nil } else if p == "/" || p == "." { return fmt.Errorf("base path not found") } for { if err := Ready(shutdown, conn, path.Dir(p)); err != nil { return err } _, event, err := conn.ChildrenW(path.Dir(p)) if err != nil { return err } if exists, err := PathExists(conn, p); err != nil { return err } else if exists { return nil } select { case <-event: // pass case <-shutdown: return ErrShutdown } } }
func (r *registryType) watchItem(conn client.Connection, path string, nodeType client.Node, cancel <-chan bool, processNode func(conn client.Connection, node client.Node), errorHandler WatchError) error { exists, err := zzk.PathExists(conn, path) if err != nil { return err } if !exists { return client.ErrNoNode } for { event, err := conn.GetW(path, nodeType) if err != nil { glog.Errorf("Could not watch %s: %s", path, err) defer errorHandler(path, err) return err } processNode(conn, nodeType) //This blocks until a change happens under the key select { case ev := <-event: glog.V(2).Infof("watch event %+v at path: %s", ev, path) case <-cancel: return nil } } return nil }
// IsServiceLocked verifies whether services are locked func IsServiceLocked(conn client.Connection) (bool, error) { locks, err := conn.Children(zkServiceLock) if err == client.ErrNoNode { return false, nil } return len(locks) > 0, err }
// PathExists verifies if a path exists and does not raise an exception if the // path does not exist func PathExists(conn client.Connection, p string) (bool, error) { exists, err := conn.Exists(p) if err == client.ErrNoNode { return false, nil } return exists, err }
// WatchServiceLock waits for a service lock to be enabled/disabled func WaitServiceLock(shutdown <-chan interface{}, conn client.Connection, enabled bool) error { for { // make sure the parent exists if err := zzk.Ready(shutdown, conn, zkServiceLock); err != nil { return err } // check if the lock is enabled nodes, event, err := conn.ChildrenW(zkServiceLock) if err != nil { return err } // check that the states match if locked := len(nodes) > 0; locked == enabled { return nil } // wait to reach the desired state or shutdown select { case <-event: // pass case <-shutdown: return zzk.ErrTimeout } } }
// LoadRunningServicesByHost returns a slice of RunningServices given a host(s) func LoadRunningServicesByHost(conn client.Connection, hostIDs ...string) ([]dao.RunningService, error) { var rss []dao.RunningService = make([]dao.RunningService, 0) for _, hostID := range hostIDs { if exists, err := zzk.PathExists(conn, hostpath(hostID)); err != nil { return nil, err } else if !exists { continue } stateIDs, err := conn.Children(hostpath(hostID)) if err != nil { return nil, err } for _, ssID := range stateIDs { var hs HostState if err := conn.Get(hostpath(hostID, ssID), &hs); err != nil { return nil, err } rs, err := LoadRunningService(conn, hs.ServiceID, hs.ServiceStateID) if err != nil { return nil, err } rss = append(rss, *rs) } } return rss, nil }
// StopServiceInstance stops a host state instance func StopServiceInstance(conn client.Connection, hostID, stateID string) error { // verify that the host is active var isActive bool hostIDs, err := GetActiveHosts(conn) if err != nil { glog.Warningf("Could not verify if host %s is active: %s", hostID, err) isActive = false } else { for _, hid := range hostIDs { if isActive = hid == hostID; isActive { break } } } if isActive { // try to stop the instance nicely return updateInstance(conn, hostID, stateID, func(hsdata *HostState, _ *ss.ServiceState) { glog.V(2).Infof("Stopping service instance via %s host %s", stateID, hostID) hsdata.DesiredState = int(service.SVCStop) }) } else { // if the host isn't active, then remove the instance var hs HostState if err := conn.Get(hostpath(hostID, stateID), &hs); err != nil { glog.Errorf("Could not look up host instance %s on host %s: %s", stateID, hostID, err) return err } return removeInstance(conn, hs.ServiceID, hs.HostID, hs.ServiceStateID) } }
//Set node to the key in registry. Returns the path of the node in the registry func (r *registryType) setItem(conn client.Connection, key string, nodeID string, node client.Node) (string, error) { if err := r.ensureKey(conn, key); err != nil { return "", err } //TODO: make ephemeral path := r.getPath(key, nodeID) exists, err := zzk.PathExists(conn, path) if err != nil { return "", err } if exists { glog.V(3).Infof("Set to %s: %#v", path, node) epn := EndpointNode{} if err := conn.Get(path, &epn); err != nil { return "", err } node.SetVersion(epn.Version()) if err := conn.Set(path, node); err != nil { return "", err } } else { if addPath, err := r.addItem(conn, key, nodeID, node); err != nil { return "", err } else { path = addPath } glog.V(3).Infof("Add to %s: %#v", path, node) } return path, nil }
func watch(conn client.Connection, path string, cancel <-chan bool, processChildren ProcessChildrenFunc, errorHandler WatchError) error { exists, err := zzk.PathExists(conn, path) if err != nil { return err } if !exists { return client.ErrNoNode } for { glog.V(1).Infof("watching children at path: %s", path) nodeIDs, event, err := conn.ChildrenW(path) glog.V(1).Infof("child watch for path %s returned: %#v", path, nodeIDs) if err != nil { glog.Errorf("Could not watch %s: %s", path, err) defer errorHandler(path, err) return err } processChildren(conn, path, nodeIDs...) select { case ev := <-event: glog.V(1).Infof("watch event %+v at path: %s", ev, path) case <-cancel: glog.V(1).Infof("watch cancel at path: %s", path) return nil } } glog.V(1).Infof("no longer watching children at path: %s", path) return nil }
func UpdateResourcePool(conn client.Connection, pool *pool.ResourcePool) error { var node PoolNode if err := conn.Get(poolpath(pool.ID), &node); err != nil { return err } node.ResourcePool = pool return conn.Set(poolpath(pool.ID), &node) }
// GetItem gets EndpointNode at the given path. func (ar *EndpointRegistry) GetItem(conn client.Connection, path string) (*EndpointNode, error) { var ep EndpointNode if err := conn.Get(path, &ep); err != nil { glog.Errorf("Could not get EndpointNode at %s: %s", path, err) return nil, err } return &ep, nil }
func RemoveVirtualIP(conn client.Connection, ip string) error { glog.V(1).Infof("Removing virtual ip from zookeeper: %s", vippath(ip)) err := conn.Delete(vippath(ip)) if err == nil || err == client.ErrNoNode { return nil } return err }
//GetItem gets VhostEndpoint at the given path. func (vr *VhostRegistry) GetItem(conn client.Connection, path string) (*VhostEndpoint, error) { var vep VhostEndpoint if err := conn.Get(path, &vep); err != nil { glog.Infof("Could not get vhost endpoint at %s: %s", path, err) return nil, err } return &vep, nil }
// getAllRemoteHostsFromZookeeper retrieves a list of remote storage clients from zookeeper func getAllRemoteHostsFromZookeeper(conn client.Connection, storageClientsPath string) ([]string, error) { clients, err := conn.Children(storageClientsPath) if err != nil { glog.Errorf("unable to retrieve list of DFS remote hosts: %s", err) return []string{}, err } glog.V(4).Infof("DFS remote IPs: %+v", clients) return clients, nil }
func AddVirtualIP(conn client.Connection, virtualIP *pool.VirtualIP) error { var node VirtualIPNode path := vippath(virtualIP.IP) glog.V(1).Infof("Adding virtual ip to zookeeper: %s", path) if err := conn.Create(path, &node); err != nil { return err } node.VirtualIP = virtualIP return conn.Set(path, &node) }
// StopService schedules a service to stop func StopService(conn client.Connection, serviceID string) error { glog.Infof("Scheduling service %s to stop", serviceID) var node ServiceNode path := servicepath(serviceID) if err := conn.Get(path, &node); err != nil { return err } node.Service.DesiredState = int(service.SVCStop) return conn.Set(path, &node) }
// getStatus computes the status of a service state func getStatus(conn client.Connection, state *servicestate.ServiceState) (dao.Status, error) { var status dao.Status // Set the state based on the service state object if !state.IsRunning() { status = dao.Stopped } else if state.IsPaused() { status = dao.Paused } else { status = dao.Running } // Set the state based on the host state object var hostState HostState if err := conn.Get(hostpath(state.HostID, state.ID), &hostState); err != nil && err != client.ErrNoNode { return dao.Status{}, err } if hostState.DesiredState == int(service.SVCStop) { switch status { case dao.Running, dao.Paused: status = dao.Stopping case dao.Stopped: // pass default: return dao.Status{}, ErrUnknownState } } else if hostState.DesiredState == int(service.SVCRun) { switch status { case dao.Stopped: status = dao.Starting case dao.Paused: status = dao.Resuming case dao.Running: // pass default: return dao.Status{}, ErrUnknownState } } else if hostState.DesiredState == int(service.SVCPause) { switch status { case dao.Running: status = dao.Pausing case dao.Paused, dao.Stopped: // pass default: return dao.Status{}, ErrUnknownState } } else { return dao.Status{}, ErrUnknownState } return status, nil }
// CreateEndpointRegistry creates the endpoint registry and returns the EndpointRegistry type // This is created in the leader, most other calls will just get that one func CreateEndpointRegistry(conn client.Connection) (*EndpointRegistry, error) { path := zkEndpointsPath() if exists, err := zzk.PathExists(conn, path); err != nil { return nil, err } else if !exists { if err := conn.CreateDir(path); err != nil { glog.Errorf("error with CreateDir(%s) %+v", path, err) return nil, err } } return &EndpointRegistry{registryType{getPath: zkEndpointsPath, ephemeral: true}}, nil }
// LoadRunningService returns a RunningService object given a coordinator connection func LoadRunningService(conn client.Connection, serviceID, ssID string) (*dao.RunningService, error) { var service ServiceNode if err := conn.Get(servicepath(serviceID), &service); err != nil { return nil, err } var state ServiceStateNode if err := conn.Get(servicepath(serviceID, ssID), &state); err != nil { return nil, err } return NewRunningService(service.Service, state.ServiceState) }
// getChildren gets all child paths for the given nodeID func (r *registryType) getChildren(conn client.Connection, nodeID string) ([]string, error) { path := r.getPath(nodeID) glog.V(4).Infof("Getting children for %v", path) names, err := conn.Children(path) if err != nil { return []string{}, err } result := []string{} for _, name := range names { result = append(result, r.getPath(nodeID, name)) } return result, nil }
// SendAction sends an action request to a particular host func SendAction(conn client.Connection, action *Action) (string, error) { uuid, err := utils.NewUUID() if err != nil { return "", err } node := actionPath(action.HostID, uuid) if err := conn.Create(node, action); err != nil { return "", err } else if err := conn.Set(node, action); err != nil { return "", err } return uuid, nil }
// monitor checks for changes in a path-based connection func (zconn *zconn) monitor(path string) { var ( connC chan<- client.Connection conn client.Connection err error ) defer func() { if conn != nil { conn.Close() } }() for { // wait for someone to request a connection, or shutdown select { case connC = <-zconn.connC: case <-zconn.shutdownC: return } retry: // create a connection if it doesn't exist or ping the existing connection if conn == nil { conn, err = zconn.client.GetCustomConnection(path) if err != nil { glog.Warningf("Could not obtain a connection to %s: %s", path, err) } } else if _, err := conn.Children("/"); err == client.ErrConnectionClosed { glog.Warningf("Could not ping connection to %s: %s", path, err) conn = nil } // send the connection back if conn != nil { connC <- conn continue } // if conn is nil, try to create a new connection select { case <-time.After(time.Second): glog.Infof("Refreshing connection to zookeeper") goto retry case <-zconn.shutdownC: return } } }
// RemoveServiceVhost deletes a service vhost func RemoveServiceVhost(conn client.Connection, serviceID, vhostname string) error { glog.V(2).Infof("RemoveServiceVhost serviceID:%s vhostname:%s", serviceID, vhostname) // Check if the path exists spath := servicevhostpath(serviceID, vhostname) if exists, err := zzk.PathExists(conn, spath); err != nil { glog.Errorf("unable to determine whether removal path exists %s %s", spath, err) return err } else if !exists { glog.Errorf("service vhost removal path does not exist %s", spath) return nil } // Delete the service vhost glog.V(2).Infof("Deleting service vhost at path:%s", spath) return conn.Delete(spath) }
func (r *registryType) ensureDir(conn client.Connection, path string) error { timeout := time.After(time.Second * 60) var err error for { err = conn.CreateDir(path) if err == client.ErrNodeExists || err == nil { return nil } select { case <-timeout: break default: } } return fmt.Errorf("could not create dir: %s", path, err) }
// GetServiceStates gets all service states for a particular service func GetServiceStates(conn client.Connection, serviceIDs ...string) (states []servicestate.ServiceState, err error) { for _, serviceID := range serviceIDs { stateIDs, err := conn.Children(servicepath(serviceID)) if err != nil { return nil, err } for _, stateID := range stateIDs { var state servicestate.ServiceState if err := GetServiceState(conn, &state, serviceID, stateID); err != nil { return nil, err } states = append(states, state) } } return states, nil }
// updateInstance updates the service state and host instances func updateInstance(conn client.Connection, hostID, stateID string, mutate func(*HostState, *ss.ServiceState)) error { glog.V(2).Infof("Updating instance %s", stateID) // do not lock if parent lock does not exist if exists, err := conn.Exists(path.Join(zkInstanceLock, stateID)); err != nil && err != client.ErrNoNode { glog.Errorf("Could not check for lock on instance %s: %s", stateID, err) return err } else if !exists { glog.Errorf("Lock not found for instance %s", stateID) return ErrLockNotFound } lock := newInstanceLock(conn, stateID) if err := lock.Lock(); err != nil { glog.Errorf("Could not set lock for service instance %s on host %s: %s", stateID, hostID, err) return err } defer lock.Unlock() glog.V(2).Infof("Acquired lock for instance %s", stateID) hpath := hostpath(hostID, stateID) var hsdata HostState if err := conn.Get(hpath, &hsdata); err != nil { glog.Errorf("Could not get instance %s for host %s: %s", stateID, hostID, err) return err } serviceID := hsdata.ServiceID spath := servicepath(serviceID, stateID) var ssnode ServiceStateNode if err := conn.Get(spath, &ssnode); err != nil { glog.Errorf("Could not get instance %s for service %s: %s", stateID, serviceID, err) return err } mutate(&hsdata, ssnode.ServiceState) if err := conn.Set(hpath, &hsdata); err != nil { glog.Errorf("Could not update instance %s for host %s: %s", stateID, hostID, err) return err } if err := conn.Set(spath, &ssnode); err != nil { glog.Errorf("Could not update instance %s for service %s: %s", stateID, serviceID, err) return err } glog.V(2).Infof("Releasing lock for instance %s", stateID) return nil }
func removeNode(conn client.Connection, path string) error { exists, err := zzk.PathExists(conn, path) if err != nil { return err } if !exists { return nil } if err := conn.Delete(path); err != nil { glog.Errorf("Unable to delete path:%s error:%v", path, err) return err } return nil }
// RemoveService deletes a service func RemoveService(conn client.Connection, serviceID string) error { // Check if the path exists if exists, err := zzk.PathExists(conn, servicepath(serviceID)); err != nil { return err } else if !exists { return nil } // If the service has any children, do not delete if states, err := conn.Children(servicepath(serviceID)); err != nil { return err } else if instances := len(states); instances > 0 { return fmt.Errorf("service %s has %d running instances", serviceID, instances) } // Delete the service return conn.Delete(servicepath(serviceID)) }