func watch(conn client.Connection, path string, cancel <-chan bool, processChildren ProcessChildrenFunc, errorHandler WatchError) error { exists, err := zzk.PathExists(conn, path) if err != nil { return err } if !exists { return client.ErrNoNode } for { glog.V(1).Infof("watching children at path: %s", path) nodeIDs, event, err := conn.ChildrenW(path) glog.V(1).Infof("child watch for path %s returned: %#v", path, nodeIDs) if err != nil { glog.Errorf("Could not watch %s: %s", path, err) defer errorHandler(path, err) return err } processChildren(conn, path, nodeIDs...) select { case ev := <-event: glog.V(1).Infof("watch event %+v at path: %s", ev, path) case <-cancel: glog.V(1).Infof("watch cancel at path: %s", path) return nil } } glog.V(1).Infof("no longer watching children at path: %s", path) return nil }
// Ready waits for a node to be available for watching func Ready(shutdown <-chan interface{}, conn client.Connection, p string) error { if exists, err := PathExists(conn, p); err != nil { return err } else if exists { return nil } else if p == "/" || p == "." { return fmt.Errorf("base path not found") } for { if err := Ready(shutdown, conn, path.Dir(p)); err != nil { return err } _, event, err := conn.ChildrenW(path.Dir(p)) if err != nil { return err } if exists, err := PathExists(conn, p); err != nil { return err } else if exists { return nil } select { case <-event: // pass case <-shutdown: return ErrShutdown } } }
// WatchServiceLock waits for a service lock to be enabled/disabled func WaitServiceLock(shutdown <-chan interface{}, conn client.Connection, enabled bool) error { for { // make sure the parent exists if err := zzk.Ready(shutdown, conn, zkServiceLock); err != nil { return err } // check if the lock is enabled nodes, event, err := conn.ChildrenW(zkServiceLock) if err != nil { return err } // check that the states match if locked := len(nodes) > 0; locked == enabled { return nil } // wait to reach the desired state or shutdown select { case <-event: // pass case <-shutdown: return zzk.ErrTimeout } } }
func (s *Server) Run(shutdown <-chan interface{}, conn client.Connection) error { node := &Node{ Host: *s.host, ExportPath: fmt.Sprintf("%s:%s", s.host.IPAddr, s.driver.ExportPath()), ExportTime: strconv.FormatInt(time.Now().UnixNano(), 16), } // Create the storage leader and client nodes if exists, _ := conn.Exists("/storage/leader"); !exists { conn.CreateDir("/storage/leader") } storageClientsPath := "/storage/clients" if exists, _ := conn.Exists(storageClientsPath); !exists { conn.CreateDir(storageClientsPath) } leader := conn.NewLeader("/storage/leader", node) leaderW, err := leader.TakeLead() if err != zookeeper.ErrDeadlock && err != nil { glog.Errorf("Could not take storage lead: %s", err) return err } // monitor dfs; log warnings each cycle; restart dfs if needed go s.monitor.MonitorDFSVolume(path.Join("/exports", s.driver.ExportPath()), s.host.IPAddr, node.ExportTime, shutdown, s.monitor.DFSVolumeMonitorPollUpdateFunc) // loop until shutdown event defer leader.ReleaseLead() for { clients, clientW, err := conn.ChildrenW(storageClientsPath) if err != nil { glog.Errorf("Could not set up watch for storage clients: %s", err) return err } s.monitor.SetMonitorStorageClients(conn, storageClientsPath) s.driver.SetClients(clients...) if err := s.driver.Sync(); err != nil { glog.Errorf("Error syncing driver: %s", err) return err } select { case e := <-clientW: glog.Info("storage.server: receieved event: %s", e) case <-leaderW: err := fmt.Errorf("storage.server: lost lead") return err case <-shutdown: return nil } } }
func MonitorRealm(shutdown <-chan interface{}, conn client.Connection, path string) <-chan string { realmC := make(chan string) go func() { defer close(realmC) var realm string leader := conn.NewLeader(path, &HostLeader{}) for { // monitor path for changes _, event, err := conn.ChildrenW(path) if err != nil { return } // Get the current leader and check for changes in its realm var hl HostLeader if err := leader.Current(&hl); err == zookeeper.ErrNoLeaderFound { // pass } else if err != nil { return } else if hl.Realm != realm { realm = hl.Realm select { case realmC <- realm: case <-shutdown: return } } select { case <-event: case <-shutdown: return } } }() return realmC }
// WaitService waits for a particular service's instances to reach a particular state func WaitService(shutdown <-chan interface{}, conn client.Connection, serviceID string, desiredState service.DesiredState) error { for { // Get the list of service states stateIDs, event, err := conn.ChildrenW(servicepath(serviceID)) if err != nil { return err } count := len(stateIDs) switch desiredState { case service.SVCStop: // if there are no instances, then the service is stopped if count == 0 { return nil } case service.SVCRun, service.SVCRestart: // figure out which service instances are actively running and decrement non-running instances for _, stateID := range stateIDs { var state ServiceStateNode if err := conn.Get(servicepath(serviceID, stateID), &state); err == client.ErrNoNode { // if the instance does not exist, then that instance is no running count-- } else if err != nil { return err } else if !state.IsRunning() { count-- } } // Get the service node and verify that the number of running instances meets or exceeds the number // of instances required by the service var service ServiceNode if err := conn.Get(servicepath(serviceID), &service); err != nil { return err } else if count >= service.Instances { return nil } case service.SVCPause: // figure out which services have stopped or paused for _, stateID := range stateIDs { var state ServiceStateNode if err := conn.Get(servicepath(serviceID, stateID), &state); err == client.ErrNoNode { // if the instance does not exist, then it is not runng (so it is paused) count-- } else if err != nil { return err } else if state.IsPaused() { count-- } } // no instances should be running for all instances to be considered paused if count == 0 { return nil } default: return fmt.Errorf("invalid desired state") } if len(stateIDs) > 0 { // wait for each instance to reach the desired state for _, stateID := range stateIDs { if err := wait(shutdown, conn, serviceID, stateID, desiredState); err != nil { return err } } select { case <-shutdown: return zzk.ErrShutdown default: } } else { // otherwise, wait for a change in the number of children select { case <-event: case <-shutdown: return zzk.ErrShutdown } } } }
// Listen initializes a listener for a particular zookeeper node // shutdown: signal to shutdown the listener // ready: signal to indicate that the listener has started watching its // child nodes (must set buffer size >= 1) // l: object that manages the zk interface for a specific path func Listen(shutdown <-chan interface{}, ready chan<- error, conn client.Connection, l Listener) { var ( _shutdown = make(chan interface{}) done = make(chan string) processing = make(map[string]struct{}) ) l.SetConnection(conn) glog.Infof("Starting a listener at %s", l.GetPath()) if err := Ready(shutdown, conn, l.GetPath()); err != nil { glog.Errorf("Could not start listener at %s: %s", l.GetPath(), err) ready <- err return } else if err := l.Ready(); err != nil { glog.Errorf("Could not start listener at %s: %s", l.GetPath(), err) ready <- err return } close(ready) defer func() { glog.Infof("Listener at %s receieved interrupt", l.GetPath()) l.Done() close(_shutdown) for len(processing) > 0 { delete(processing, <-done) } }() glog.V(1).Infof("Listener %s started; waiting for data", l.GetPath()) for { nodes, event, err := conn.ChildrenW(l.GetPath()) if err != nil { glog.Errorf("Could not watch for nodes at %s: %s", l.GetPath(), err) return } for _, node := range nodes { if _, ok := processing[node]; !ok { glog.V(1).Infof("Spawning a goroutine for %s", l.GetPath(node)) processing[node] = struct{}{} go func(node string) { defer func() { glog.V(1).Infof("Goroutine at %s was shutdown", l.GetPath(node)) done <- node }() l.Spawn(_shutdown, node) }(node) } } l.PostProcess(processing) select { case e := <-event: if e.Type == client.EventNodeDeleted { glog.V(1).Infof("Node %s has been removed; shutting down listener", l.GetPath()) return } else if e.Type == client.EventSession || e.Type == client.EventNotWatching { glog.Warningf("Node %s had a reconnect; resetting listener", l.GetPath()) if err := l.Ready(); err != nil { glog.Errorf("Could not ready listener; shutting down") return } } glog.V(4).Infof("Node %s receieved event %v", l.GetPath(), e) case node := <-done: glog.V(3).Infof("Cleaning up %s", l.GetPath(node)) delete(processing, node) case <-shutdown: return } } }