예제 #1
0
// RunTask returns nil when the underlyingtask ends or the error it
// generated.
func (ze *zElector) RunTask(task *electorTask) error {
	leaderPath := path.Join(ze.path, "leader")
	for {
		_, err := zkhelper.CreateRecursive(ze.zconn, leaderPath, "", 0, zk.WorldACL(zkhelper.PERM_FILE))
		if err == nil || zkhelper.ZkErrorEqual(err, zk.ErrNodeExists) {
			break
		}
		log.Warnf("election leader create failed: %v", err)
		time.Sleep(500 * time.Millisecond)
	}

	for {
		err := ze.Lock("RunTask")
		if err != nil {
			log.Warnf("election lock failed: %v", err)
			if err == zkhelper.ErrInterrupted {
				return zkhelper.ErrInterrupted
			}
			continue
		}
		// Confirm your win and deliver acceptance speech. This notifies
		// listeners who will have been watching the leader node for
		// changes.
		_, err = ze.zconn.Set(leaderPath, []byte(ze.contents), -1)
		if err != nil {
			log.Warnf("election promotion failed: %v", err)
			continue
		}

		log.Infof("election promote leader %v", leaderPath)
		taskErrChan := make(chan error)
		go func() {
			taskErrChan <- task.Run()
		}()

	watchLeader:
		// Watch the leader so we can get notified if something goes wrong.
		data, _, watch, err := ze.zconn.GetW(leaderPath)
		if err != nil {
			log.Warnf("election unable to watch leader node %v %v", leaderPath, err)
			// FIXME(msolo) Add delay
			goto watchLeader
		}

		if string(data) != ze.contents {
			log.Warnf("election unable to promote leader")
			task.Stop()
			// We won the election, but we didn't become the leader. How is that possible?
			// (see Bush v. Gore for some inspiration)
			// It means:
			//   1. Someone isn't playing by the election rules (a bad actor).
			//      Hard to detect - let's assume we don't have this problem. :)
			//   2. We lost our connection somehow and the ephemeral lock was cleared,
			//      allowing someone else to win the election.
			continue
		}

		// This is where we start our target process and watch for its failure.
	waitForEvent:
		select {
		case <-ze.interrupted:
			log.Warn("election interrupted - stop child process")
			task.Stop()
			// Once the process dies from the signal, this will all tear down.
			goto waitForEvent
		case taskErr := <-taskErrChan:
			// If our code fails, unlock to trigger an election.
			log.Infof("election child process ended: %v", taskErr)
			ze.Unlock()
			if task.Interrupted() {
				log.Warnf("election child process interrupted - stepping down")
				return zkhelper.ErrInterrupted
			}
			continue
		case zevent := <-watch:
			// We had a zk connection hiccup.  We have a few choices,
			// but it depends on the constraints and the events.
			//
			// If we get SESSION_EXPIRED our connection loss triggered an
			// election that we won't have won and the thus the lock was
			// automatically freed. We have no choice but to start over.
			if zevent.State == zk.StateExpired {
				log.Warnf("election leader watch expired")
				task.Stop()
				continue
			}

			// Otherwise, we had an intermittent issue or something touched
			// the node. Either we lost our position or someone broke
			// protocol and touched the leader node.  We just reconnect and
			// revalidate. In the meantime, assume we are still the leader
			// until we determine otherwise.
			//
			// On a reconnect we will be able to see the leader
			// information. If we still hold the position, great. If not, we
			// kill the associated process.
			//
			// On a leader node change, we need to perform the same
			// validation. It's possible an election completes without the
			// old leader realizing he is out of touch.
			log.Warnf("election leader watch event %v", zevent)
			goto watchLeader
		}
	}
	panic("unreachable")
}
예제 #2
0
func newRaft(c *Config, fsm raft.FSM) (Cluster, error) {
	r := new(Raft)

	if len(c.Raft.Addr) == 0 {
		return nil, nil
	}

	peers := make([]net.Addr, 0, len(c.Raft.Cluster))

	r.raftAddr = c.Raft.Addr

	a, err := net.ResolveTCPAddr("tcp", r.raftAddr)
	if err != nil {
		return nil, fmt.Errorf("invalid raft addr format %s, must host:port, err:%v", r.raftAddr, err)
	}

	peers = raft.AddUniquePeer(peers, a)

	for _, cluster := range c.Raft.Cluster {
		a, err = net.ResolveTCPAddr("tcp", cluster)
		if err != nil {
			return nil, fmt.Errorf("invalid cluster format %s, must host:port, err:%v", cluster, err)
		}

		peers = raft.AddUniquePeer(peers, a)
	}

	os.MkdirAll(c.Raft.DataDir, 0755)

	cfg := raft.DefaultConfig()

	if len(c.Raft.LogDir) == 0 {
		r.log = os.Stdout
	} else {
		os.MkdirAll(c.Raft.LogDir, 0755)
		logFile := path.Join(c.Raft.LogDir, "raft.log")
		f, err := os.OpenFile(logFile, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0644)
		if err != nil {
			return nil, err
		}
		r.log = f

		cfg.LogOutput = r.log
	}

	raftDBPath := path.Join(c.Raft.DataDir, "raft_db")
	r.dbStore, err = raftboltdb.NewBoltStore(raftDBPath)
	if err != nil {
		return nil, err
	}

	fileStore, err := raft.NewFileSnapshotStore(c.Raft.DataDir, 1, r.log)
	if err != nil {
		return nil, err
	}

	r.trans, err = raft.NewTCPTransport(r.raftAddr, nil, 3, 5*time.Second, r.log)
	if err != nil {
		return nil, err
	}

	r.peerStore = raft.NewJSONPeers(c.Raft.DataDir, r.trans)

	if c.Raft.ClusterState == ClusterStateNew {
		log.Infof("cluster state is new, use new cluster config")
		r.peerStore.SetPeers(peers)
	} else {
		log.Infof("cluster state is existing, use previous + new cluster config")
		ps, err := r.peerStore.Peers()
		if err != nil {
			log.Errorf("get store peers error %v", err)
			return nil, err
		}

		for _, peer := range peers {
			ps = raft.AddUniquePeer(ps, peer)
		}

		r.peerStore.SetPeers(ps)
	}

	if peers, _ := r.peerStore.Peers(); len(peers) <= 1 {
		cfg.EnableSingleNode = true
		log.Warn("raft will run in single node mode, may only be used in test")
	}

	r.r, err = raft.NewRaft(cfg, fsm, r.dbStore, r.dbStore, fileStore, r.peerStore, r.trans)

	return r, err
}