// group: // /zk/myshard/groups/group_000001 // /zk/myshard/groups/group_000002 func (t *Topo) CreateGroup(groupID int) (string, error) { group := model.Group{groupID, model.GroupUpStatus, nil} zkhelper.CreateRecursive(t.conn, t.GetGroupBasePath(), "", 0, zkhelper.DefaultDirACLs()) data, _ := json.Marshal(group) return t.conn.Create(t.GetGroupPath(groupID), data, 0, zkhelper.DefaultDirACLs()) }
func (t *Topo) CreateProxy(p *model.Proxy) (string, error) { data, err := json.Marshal(p) if err != nil { return "", err } base := t.GetProxyBasePath() zkhelper.CreateRecursive(t.conn, base, "", 0, zkhelper.DefaultDirACLs()) return t.conn.Create(path.Join(base, p.Addr), data, zk.FlagEphemeral, zkhelper.DefaultDirACLs()) }
func (t *Topo) CreateSchema(db string, groupIDs []int, defaultGroupID int) (string, error) { s := model.Schema{Name: db, GroupIDs: groupIDs, DefaultGroupID: defaultGroupID, Status: ""} sort.Ints(groupIDs) if !checkGroupIDExist(defaultGroupID, groupIDs) { return "", fmt.Errorf("group %d not in schema groups", defaultGroupID) } zkhelper.CreateRecursive(t.conn, t.GetSchemaBasePath(), "", 0, zkhelper.DefaultDirACLs()) data, _ := json.Marshal(s) return t.conn.Create(t.GetSchemaPath(db), data, 0, zkhelper.DefaultDirACLs()) }
func (t *Topo) CreateAgent(nodeAddr string) (string, error) { zkhelper.CreateRecursive(t.conn, t.GetAgentBasePath(), "", 0, zkhelper.DefaultDirACLs()) return t.conn.Create(path.Join(t.GetAgentBasePath(), nodeAddr), nil, zk.FlagEphemeral, zkhelper.DefaultFileACLs()) }
// LockWithTimeout returns nil when the lock is acquired. A lock is // held if the file exists and you are the creator. Setting the wait // to zero makes this a nonblocking lock check. // // FIXME(msolo) Disallow non-super users from removing the lock? func (zm *zMutex) LockWithTimeout(wait time.Duration, desc string) (err error) { timer := time.NewTimer(wait) defer func() { if panicErr := recover(); panicErr != nil || err != nil { zm.deleteLock() } }() // Ensure the rendezvous node is here. // FIXME(msolo) Assuming locks are contended, it will be cheaper to assume this just // exists. _, err = zkhelper.CreateRecursive(zm.zconn, zm.path, "", 0, zk.WorldACL(zkhelper.PERM_DIRECTORY)) if err != nil && !zkhelper.ZkErrorEqual(err, zk.ErrNodeExists) { return err } lockPrefix := path.Join(zm.path, "lock-") zflags := zk.FlagSequence if zm.ephemeral { zflags = zflags | zk.FlagEphemeral } // update node content var lockContent map[string]interface{} err = json.Unmarshal([]byte(zm.contents), &lockContent) if err != nil { return err } lockContent["desc"] = desc newContent, err := json.Marshal(lockContent) if err != nil { return err } createlock: lockCreated, err := zm.zconn.Create(lockPrefix, newContent, int32(zflags), zk.WorldACL(zkhelper.PERM_FILE)) if err != nil { return err } name := path.Base(lockCreated) zm.mu.Lock() zm.name = name zm.mu.Unlock() trylock: children, _, err := zm.zconn.Children(zm.path) if err != nil { return fmt.Errorf("zkutil: trylock failed %v", err) } sort.Strings(children) if len(children) == 0 { return fmt.Errorf("zkutil: empty lock: %v", zm.path) } if children[0] == name { // We are the lock owner. return nil } if zm.onRetryLock != nil { zm.onRetryLock() } // This is the degenerate case of a nonblocking lock check. It's not optimal, but // also probably not worth optimizing. if wait == 0 { return zkhelper.ErrTimeout } prevLock := "" for i := 1; i < len(children); i++ { if children[i] == name { prevLock = children[i-1] break } } if prevLock == "" { // This is an interesting case. The node disappeared // underneath us, probably due to a session loss. We can // recreate the lock node (with a new sequence number) and // keep trying. log.Warnf("zkutil: no lock node found: %v/%v", zm.path, zm.name) goto createlock } zkPrevLock := path.Join(zm.path, prevLock) exist, stat, watch, err := zm.zconn.ExistsW(zkPrevLock) if err != nil { // FIXME(msolo) Should this be a retry? return fmt.Errorf("zkutil: unable to watch previous lock node %v %v", zkPrevLock, err) } if stat == nil || !exist { goto trylock } select { case <-timer.C: return zkhelper.ErrTimeout case <-zm.interrupted: return zkhelper.ErrInterrupted case event := <-watch: log.Infof("zkutil: lock event: %v", event) // The precise event doesn't matter - try to read again regardless. goto trylock } panic("unexpected") }
// RunTask returns nil when the underlyingtask ends or the error it // generated. func (ze *zElector) RunTask(task *electorTask) error { leaderPath := path.Join(ze.path, "leader") for { _, err := zkhelper.CreateRecursive(ze.zconn, leaderPath, "", 0, zk.WorldACL(zkhelper.PERM_FILE)) if err == nil || zkhelper.ZkErrorEqual(err, zk.ErrNodeExists) { break } log.Warnf("election leader create failed: %v", err) time.Sleep(500 * time.Millisecond) } for { err := ze.Lock("RunTask") if err != nil { log.Warnf("election lock failed: %v", err) if err == zkhelper.ErrInterrupted { return zkhelper.ErrInterrupted } continue } // Confirm your win and deliver acceptance speech. This notifies // listeners who will have been watching the leader node for // changes. _, err = ze.zconn.Set(leaderPath, []byte(ze.contents), -1) if err != nil { log.Warnf("election promotion failed: %v", err) continue } log.Infof("election promote leader %v", leaderPath) taskErrChan := make(chan error) go func() { taskErrChan <- task.Run() }() watchLeader: // Watch the leader so we can get notified if something goes wrong. data, _, watch, err := ze.zconn.GetW(leaderPath) if err != nil { log.Warnf("election unable to watch leader node %v %v", leaderPath, err) // FIXME(msolo) Add delay goto watchLeader } if string(data) != ze.contents { log.Warnf("election unable to promote leader") task.Stop() // We won the election, but we didn't become the leader. How is that possible? // (see Bush v. Gore for some inspiration) // It means: // 1. Someone isn't playing by the election rules (a bad actor). // Hard to detect - let's assume we don't have this problem. :) // 2. We lost our connection somehow and the ephemeral lock was cleared, // allowing someone else to win the election. continue } // This is where we start our target process and watch for its failure. waitForEvent: select { case <-ze.interrupted: log.Warn("election interrupted - stop child process") task.Stop() // Once the process dies from the signal, this will all tear down. goto waitForEvent case taskErr := <-taskErrChan: // If our code fails, unlock to trigger an election. log.Infof("election child process ended: %v", taskErr) ze.Unlock() if task.Interrupted() { log.Warnf("election child process interrupted - stepping down") return zkhelper.ErrInterrupted } continue case zevent := <-watch: // We had a zk connection hiccup. We have a few choices, // but it depends on the constraints and the events. // // If we get SESSION_EXPIRED our connection loss triggered an // election that we won't have won and the thus the lock was // automatically freed. We have no choice but to start over. if zevent.State == zk.StateExpired { log.Warnf("election leader watch expired") task.Stop() continue } // Otherwise, we had an intermittent issue or something touched // the node. Either we lost our position or someone broke // protocol and touched the leader node. We just reconnect and // revalidate. In the meantime, assume we are still the leader // until we determine otherwise. // // On a reconnect we will be able to see the leader // information. If we still hold the position, great. If not, we // kill the associated process. // // On a leader node change, we need to perform the same // validation. It's possible an election completes without the // old leader realizing he is out of touch. log.Warnf("election leader watch event %v", zevent) goto watchLeader } } panic("unreachable") }