func CreateOrUpdate(zconn Conn, zkPath, value string, flags int, aclv []zk.ACL, recursive bool) (pathCreated string, err error) { if recursive { pathCreated, err = CreateRecursive(zconn, zkPath, value, 0, zk.WorldACL(zk.PermAll)) } else { pathCreated, err = zconn.Create(zkPath, []byte(value), 0, zk.WorldACL(zk.PermAll)) } if err != nil && ZkErrorEqual(err, zk.ErrNodeExists) { pathCreated = "" _, err = zconn.Set(zkPath, []byte(value), -1) } return }
func DeleteRecursive(zconn Conn, zkPath string, version int) error { // version: -1 delete any version of the node at path - only applies to the top node err := zconn.Delete(zkPath, int32(version)) if err == nil { return nil } if !ZkErrorEqual(err, zk.ErrNotEmpty) { return err } // Remove the ability for other nodes to get created while we are trying to delete. // Otherwise, you can enter a race condition, or get starved out from deleting. _, err = zconn.SetACL(zkPath, zk.WorldACL(zk.PermAdmin|zk.PermDelete|zk.PermRead), int32(version)) if err != nil { return err } children, _, err := zconn.Children(zkPath) if err != nil { return err } for _, child := range children { err := DeleteRecursive(zconn, path.Join(zkPath, child), -1) if err != nil && !ZkErrorEqual(err, zk.ErrNoNode) { return fmt.Errorf("zkutil: recursive delete failed: %v", err) } } err = zconn.Delete(zkPath, int32(version)) if err != nil && !ZkErrorEqual(err, zk.ErrNotEmpty) { err = fmt.Errorf("zkutil: nodes getting recreated underneath delete (app race condition): %v", zkPath) } return err }
func TestChildren(t *testing.T) { conn := NewConn() defer conn.Close() nodes := []string{"/zk", "/zk/foo", "/zk/bar"} wantChildren := []string{"bar", "foo"} for _, path := range nodes { if _, err := conn.Create(path, nil, 0, zk.WorldACL(zk.PermAll)); err != nil { t.Fatalf("conn.Create: %v", err) } } children, _, err := conn.Children("/zk") if err != nil { t.Fatalf(`conn.Children("/zk"): %v`, err) } sort.Strings(children) if length := len(children); length != 2 { t.Errorf("children: got %v, wanted %v", children, wantChildren) } for i, path := range children { if wantChildren[i] != path { t.Errorf("children: got %v, wanted %v", children, wantChildren) break } } }
func TestSequence(t *testing.T) { conn := NewConn() defer conn.Close() if _, err := conn.Create("/zk", nil, 0, zk.WorldACL(zk.PermAll)); err != nil { t.Fatalf("conn.Create: %v", err) } newPath, err := conn.Create("/zk/", nil, zk.FlagSequence, zk.WorldACL(zk.PermAll)) if err != nil { t.Errorf("conn.Create: %v", err) } if wanted := "/zk/0000000001"; newPath != wanted { t.Errorf("new path: got %q, wanted %q", newPath, wanted) } newPath, err = conn.Create("/zk/", nil, zk.FlagSequence, zk.WorldACL(zk.PermAll)) if err != nil { t.Errorf("conn.Create: %v", err) } if wanted := "/zk/0000000002"; newPath != wanted { t.Errorf("new path: got %q, wanted %q", newPath, wanted) } if err := conn.Delete("/zk/0000000002", -1); err != nil { t.Fatalf("conn.Delete: %v", err) } newPath, err = conn.Create("/zk/", nil, zk.FlagSequence, zk.WorldACL(zk.PermAll)) if err != nil { t.Errorf("conn.Create: %v", err) } if wanted := "/zk/0000000003"; newPath != wanted { t.Errorf("new path: got %q, wanted %q", newPath, wanted) } newPath, err = conn.Create("/zk/action_", nil, zk.FlagSequence, zk.WorldACL(zk.PermAll)) if err != nil { t.Errorf("conn.Create: %v", err) } if wanted := "/zk/action_0000000004"; newPath != wanted { t.Errorf("new path: got %q, wanted %q", newPath, wanted) } }
// LockWithTimeout returns nil when the lock is acquired. A lock is // held if the file exists and you are the creator. Setting the wait // to zero makes this a nonblocking lock check. // // FIXME(msolo) Disallow non-super users from removing the lock? func (zm *zMutex) LockWithTimeout(wait time.Duration, desc string) (err error) { timer := time.NewTimer(wait) defer func() { if panicErr := recover(); panicErr != nil || err != nil { zm.deleteLock() } }() // Ensure the rendezvous node is here. // FIXME(msolo) Assuming locks are contended, it will be cheaper to assume this just // exists. _, err = zkhelper.CreateRecursive(zm.zconn, zm.path, "", 0, zk.WorldACL(zkhelper.PERM_DIRECTORY)) if err != nil && !zkhelper.ZkErrorEqual(err, zk.ErrNodeExists) { return err } lockPrefix := path.Join(zm.path, "lock-") zflags := zk.FlagSequence if zm.ephemeral { zflags = zflags | zk.FlagEphemeral } // update node content var lockContent map[string]interface{} err = json.Unmarshal([]byte(zm.contents), &lockContent) if err != nil { return err } lockContent["desc"] = desc newContent, err := json.Marshal(lockContent) if err != nil { return err } createlock: lockCreated, err := zm.zconn.Create(lockPrefix, newContent, int32(zflags), zk.WorldACL(zkhelper.PERM_FILE)) if err != nil { return err } name := path.Base(lockCreated) zm.mu.Lock() zm.name = name zm.mu.Unlock() trylock: children, _, err := zm.zconn.Children(zm.path) if err != nil { return fmt.Errorf("zkutil: trylock failed %v", err) } sort.Strings(children) if len(children) == 0 { return fmt.Errorf("zkutil: empty lock: %v", zm.path) } if children[0] == name { // We are the lock owner. return nil } if zm.onRetryLock != nil { zm.onRetryLock() } // This is the degenerate case of a nonblocking lock check. It's not optimal, but // also probably not worth optimizing. if wait == 0 { return zkhelper.ErrTimeout } prevLock := "" for i := 1; i < len(children); i++ { if children[i] == name { prevLock = children[i-1] break } } if prevLock == "" { // This is an interesting case. The node disappeared // underneath us, probably due to a session loss. We can // recreate the lock node (with a new sequence number) and // keep trying. log.Warnf("zkutil: no lock node found: %v/%v", zm.path, zm.name) goto createlock } zkPrevLock := path.Join(zm.path, prevLock) exist, stat, watch, err := zm.zconn.ExistsW(zkPrevLock) if err != nil { // FIXME(msolo) Should this be a retry? return fmt.Errorf("zkutil: unable to watch previous lock node %v %v", zkPrevLock, err) } if stat == nil || !exist { goto trylock } select { case <-timer.C: return zkhelper.ErrTimeout case <-zm.interrupted: return zkhelper.ErrInterrupted case event := <-watch: log.Infof("zkutil: lock event: %v", event) // The precise event doesn't matter - try to read again regardless. goto trylock } panic("unexpected") }
// RunTask returns nil when the underlyingtask ends or the error it // generated. func (ze *zElector) RunTask(task *electorTask) error { leaderPath := path.Join(ze.path, "leader") for { _, err := zkhelper.CreateRecursive(ze.zconn, leaderPath, "", 0, zk.WorldACL(zkhelper.PERM_FILE)) if err == nil || zkhelper.ZkErrorEqual(err, zk.ErrNodeExists) { break } log.Warnf("election leader create failed: %v", err) time.Sleep(500 * time.Millisecond) } for { err := ze.Lock("RunTask") if err != nil { log.Warnf("election lock failed: %v", err) if err == zkhelper.ErrInterrupted { return zkhelper.ErrInterrupted } continue } // Confirm your win and deliver acceptance speech. This notifies // listeners who will have been watching the leader node for // changes. _, err = ze.zconn.Set(leaderPath, []byte(ze.contents), -1) if err != nil { log.Warnf("election promotion failed: %v", err) continue } log.Infof("election promote leader %v", leaderPath) taskErrChan := make(chan error) go func() { taskErrChan <- task.Run() }() watchLeader: // Watch the leader so we can get notified if something goes wrong. data, _, watch, err := ze.zconn.GetW(leaderPath) if err != nil { log.Warnf("election unable to watch leader node %v %v", leaderPath, err) // FIXME(msolo) Add delay goto watchLeader } if string(data) != ze.contents { log.Warnf("election unable to promote leader") task.Stop() // We won the election, but we didn't become the leader. How is that possible? // (see Bush v. Gore for some inspiration) // It means: // 1. Someone isn't playing by the election rules (a bad actor). // Hard to detect - let's assume we don't have this problem. :) // 2. We lost our connection somehow and the ephemeral lock was cleared, // allowing someone else to win the election. continue } // This is where we start our target process and watch for its failure. waitForEvent: select { case <-ze.interrupted: log.Warn("election interrupted - stop child process") task.Stop() // Once the process dies from the signal, this will all tear down. goto waitForEvent case taskErr := <-taskErrChan: // If our code fails, unlock to trigger an election. log.Infof("election child process ended: %v", taskErr) ze.Unlock() if task.Interrupted() { log.Warnf("election child process interrupted - stepping down") return zkhelper.ErrInterrupted } continue case zevent := <-watch: // We had a zk connection hiccup. We have a few choices, // but it depends on the constraints and the events. // // If we get SESSION_EXPIRED our connection loss triggered an // election that we won't have won and the thus the lock was // automatically freed. We have no choice but to start over. if zevent.State == zk.StateExpired { log.Warnf("election leader watch expired") task.Stop() continue } // Otherwise, we had an intermittent issue or something touched // the node. Either we lost our position or someone broke // protocol and touched the leader node. We just reconnect and // revalidate. In the meantime, assume we are still the leader // until we determine otherwise. // // On a reconnect we will be able to see the leader // information. If we still hold the position, great. If not, we // kill the associated process. // // On a leader node change, we need to perform the same // validation. It's possible an election completes without the // old leader realizing he is out of touch. log.Warnf("election leader watch event %v", zevent) goto watchLeader } } panic("unreachable") }
func DefaultFileACLs() []zk.ACL { return zk.WorldACL(PERM_FILE) }
func DefaultDirACLs() []zk.ACL { return zk.WorldACL(PERM_DIRECTORY) }
func DefaultACLs() []zk.ACL { return zk.WorldACL(zk.PermAll) }
// Close the release channel when you want to clean up nicely. func CreatePidNode(zconn Conn, zkPath string, contents string, done chan struct{}) error { // On the first try, assume the cluster is up and running, that will // help hunt down any config issues present at startup if _, err := zconn.Create(zkPath, []byte(contents), zk.FlagEphemeral, zk.WorldACL(PERM_FILE)); err != nil { if ZkErrorEqual(err, zk.ErrNodeExists) { err = zconn.Delete(zkPath, -1) } if err != nil { return fmt.Errorf("zkutil: failed deleting pid node: %v: %v", zkPath, err) } _, err = zconn.Create(zkPath, []byte(contents), zk.FlagEphemeral, zk.WorldACL(PERM_FILE)) if err != nil { return fmt.Errorf("zkutil: failed creating pid node: %v: %v", zkPath, err) } } go func() { for { _, _, watch, err := zconn.GetW(zkPath) if err != nil { if ZkErrorEqual(err, zk.ErrNoNode) { _, err = zconn.Create(zkPath, []byte(contents), zk.FlagEphemeral, zk.WorldACL(zk.PermAll)) if err != nil { log.Warnf("failed recreating pid node: %v: %v", zkPath, err) } else { log.Infof("recreated pid node: %v", zkPath) continue } } else { log.Warnf("failed reading pid node: %v", err) } } else { select { case event := <-watch: if ZkEventOk(event) && event.Type == zk.EventNodeDeleted { // Most likely another process has started up. However, // there is a chance that an ephemeral node is deleted by // the session expiring, yet that same session gets a watch // notification. This seems like buggy behavior, but rather // than race too hard on the node, just wait a bit and see // if the situation resolves itself. log.Warnf("pid deleted: %v", zkPath) } else { log.Infof("pid node event: %v", event) } // break here and wait for a bit before attempting case <-done: log.Infof("pid watcher stopped on done: %v", zkPath) return } } select { // No one likes a thundering herd, least of all zk. case <-time.After(5*time.Second + time.Duration(rand.Int63n(55e9))): case <-done: log.Infof("pid watcher stopped on done: %v", zkPath) return } } }() return nil }
func TestBasic(t *testing.T) { conn := NewConn() defer conn.Close() // Make sure Conn implements the interface. var _ Conn = conn if _, err := conn.Create("/zk", nil, 0, zk.WorldACL(zk.PermAll)); err != nil { t.Fatalf("conn.Create: %v", err) } if _, err := conn.Create("/zk/foo", []byte("foo"), 0, zk.WorldACL(zk.PermAll)); err != nil { t.Fatalf("conn.Create: %v", err) } data, _, err := conn.Get("/zk/foo") if err != nil { t.Fatalf("conn.Get: %v", err) } if string(data) != "foo" { t.Errorf("got %q, wanted %q", data, "foo") } if _, err := conn.Set("/zk/foo", []byte("bar"), -1); err != nil { t.Fatalf("conn.Set: %v", err) } data, _, err = conn.Get("/zk/foo") if err != nil { t.Fatalf("conn.Get: %v", err) } if string(data) != "bar" { t.Errorf("got %q, wanted %q", data, "bar") } // Try Set with the wrong version. if _, err := conn.Set("/zk/foo", []byte("bar"), 0); err == nil { t.Error("conn.Set with a wrong version: expected error") } // Try Get with a node that doesn't exist. if _, _, err := conn.Get("/zk/rabarbar"); err == nil { t.Error("conn.Get with a node that doesn't exist: expected error") } // Try Set with a node that doesn't exist. if _, err := conn.Set("/zk/barbarbar", []byte("bar"), -1); err == nil { t.Error("conn.Get with a node that doesn't exist: expected error") } // Try Create with a node that exists. if _, err := conn.Create("/zk/foo", []byte("foo"), 0, zk.WorldACL(zk.PermAll)); err == nil { t.Errorf("conn.Create with a node that exists: expected error") } // Try Create with a node whose parents don't exist. if _, err := conn.Create("/a/b/c", []byte("foo"), 0, zk.WorldACL(zk.PermAll)); err == nil { t.Errorf("conn.Create with a node whose parents don't exist: expected error") } if err := conn.Delete("/zk/foo", -1); err != nil { t.Errorf("conn.Delete: %v", err) } _, stat, err := conn.Exists("/zk/foo") if err != nil { println(conn.(*zconn).String()) t.Errorf("conn.Exists: %v", err) } if stat != nil { t.Errorf("/zk/foo should be deleted, got: %v", stat) } }
func TestWatches(t *testing.T) { conn := NewConn() defer conn.Close() // Creating sends an event to ExistsW. _, stat, watch, err := conn.ExistsW("/zk") if err != nil { t.Errorf("conn.ExistsW: %v", err) } if stat != nil { t.Errorf("stat is not nil: %v", stat) } if _, err := conn.Create("/zk", nil, 0, zk.WorldACL(zk.PermAll)); err != nil { t.Fatalf("conn.Create: %v", err) } fireWatch(t, watch) // Creating a child sends an event to ChildrenW. _, _, watch, err = conn.ChildrenW("/zk") if err != nil { t.Errorf(`conn.ChildrenW("/zk"): %v`, err) } if _, err := conn.Create("/zk/foo", nil, 0, zk.WorldACL(zk.PermAll)); err != nil { t.Fatalf("conn.Create: %v", err) } fireWatch(t, watch) // Updating sends an event to GetW. _, _, watch, err = conn.GetW("/zk") if err != nil { t.Errorf(`conn.GetW("/zk"): %v`, err) } if _, err := conn.Set("/zk", []byte("foo"), -1); err != nil { t.Errorf("conn.Set /zk: %v", err) } fireWatch(t, watch) // Deleting sends an event to ExistsW and to ChildrenW of the // parent. _, _, watch, err = conn.ExistsW("/zk/foo") if err != nil { t.Errorf("conn.ExistsW: %v", err) } _, _, parentWatch, err := conn.ChildrenW("/zk") if err != nil { t.Errorf(`conn.ChildrenW("/zk"): %v`, err) } if err := conn.Delete("/zk/foo", -1); err != nil { t.Errorf("conn.Delete: %v", err) } fireWatch(t, watch) fireWatch(t, parentWatch) }