Пример #1
0
func (g *group) applyConfChange(e raftpb.Entry) error {
	var cc raftpb.ConfChange
	pbutil.MustUnmarshal(&cc, e.Data)
	glog.V(2).Infof("%v applies conf change %v: %#v", g, e.Index, cc)

	if len(cc.Context) == 0 {
		g.stateMachine.ApplyConfChange(cc, GroupNode{})
		return nil
	}

	if id, req, err := g.node.decReq(cc.Context); err == nil {
		if gn, ok := req.Data.(GroupNode); ok {
			res := Response{ID: id}
			res.Err = g.stateMachine.ApplyConfChange(cc, gn)
			g.node.line.call(res)
			return nil
		}
	}

	var gn GroupNode
	if err := bhgob.Decode(&gn, cc.Context); err != nil {
		glog.Fatalf("%v cannot decode config change: %v", g, err)
	}

	if gn.Node != cc.NodeID {
		glog.Fatalf("invalid config change: %v != %v", gn.Node, cc.NodeID)
	}
	g.stateMachine.ApplyConfChange(cc, gn)
	return nil
}
Пример #2
0
func (q *qee) handleUnicastMsg(mh msgAndHandler) {
	glog.V(2).Infof("unicast msg: %v", mh.msg)
	b, ok := q.beeByID(mh.msg.To())
	if !ok {
		info, err := q.hive.registry.bee(mh.msg.To())
		if err != nil {
			glog.Errorf("cannot find bee %v", mh.msg.To())
		}

		if q.isLocalBee(info) {
			glog.Fatalf("%v cannot find local bee %v", q, mh.msg.To())
		}

		if b, ok = q.beeByID(info.ID); !ok {
			if b, err = q.newProxyBee(info); err != nil {
				glog.Errorf("%v cannnot find remote bee %v", q, mh.msg.To())
				return
			}
		}
	}

	if mh.handler == nil && !b.detached && !b.proxy {
		glog.Fatalf("handler is nil for message %v", mh.msg)
	}

	b.enqueMsg(mh)
}
Пример #3
0
func hiveIDFromPeers(addr string, paddrs []string) uint64 {
	if len(paddrs) == 0 {
		return 1
	}

	ch := make(chan uint64, len(paddrs))
	for _, paddr := range paddrs {
		glog.Infof("requesting hive ID from %v", paddr)
		go func(paddr string) {
			c, err := newRPCClient(paddr)
			if err != nil {
				glog.Error(err)
				return
			}
			defer c.stop()

			id, err := c.sendCmd(cmd{Data: cmdNewHiveID{}})
			if err != nil {
				glog.Error(err)
				return
			}

			if id == Nil {
				glog.Fatalf("invalid ID from peer")
			}

			_, err = c.sendCmd(cmd{
				Data: cmdAddHive{
					Hive: HiveInfo{
						ID:   id.(uint64),
						Addr: addr,
					},
				},
			})
			if err != nil {
				glog.Error(err)
				return
			}
			ch <- id.(uint64)
		}(paddr)
		select {
		case id := <-ch:
			return id
		case <-time.After(1 * time.Second):
			glog.Infof("cannot get id from %v", paddr)
			continue
		}
	}

	glog.Fatalf("cannot get a new hive ID from peers")
	return 1
}
Пример #4
0
func saveMeta(m hiveMeta, cfg HiveConfig) {
	metafile := path.Join(cfg.StatePath, "meta")
	f, err := os.OpenFile(metafile, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0700)
	if err != nil {
		glog.Fatalf("cannot open meta file: %v", err)
	}

	enc := gob.NewEncoder(f)
	if err := enc.Encode(&m); err != nil {
		glog.Fatalf("cannot encode meta: %v", err)
	}

	f.Close()
}
Пример #5
0
func (b *bee) ApplyConfChange(cc raftpb.ConfChange, gn raft.GroupNode) error {
	if gn.Data == nil {
		return nil
	}

	b.Lock()
	defer b.Unlock()

	col := b.beeColony
	bid := gn.Data.(uint64)

	switch cc.Type {
	case raftpb.ConfChangeAddNode:
		if col.Contains(bid) {
			return ErrDuplicateBee
		}
		col.AddFollower(bid)
	case raftpb.ConfChangeRemoveNode:
		if !col.Contains(bid) {
			return ErrNoSuchBee
		}
		if bid == b.beeID {
			// TODO(soheil): should we stop the bee here?
			glog.Fatalf("bee is alive but removed from raft")
		}
		if col.Leader == bid {
			// TODO(soheil): should we launch a goroutine to campaign here?
			col.Leader = 0
		} else {
			col.DelFollower(bid)
		}
	}
	b.beeColony = col
	return nil
}
Пример #6
0
func (b *bee) StartDetached(h DetachedHandler) uint64 {
	d, err := b.qee.processCmd(cmdStartDetached{Handler: h})
	if err != nil {
		glog.Fatalf("Cannot start a detached bee: %v", err)
	}
	return d.(uint64)
}
Пример #7
0
func meta(cfg HiveConfig) hiveMeta {
	m := hiveMeta{}

	var dec *gob.Decoder
	metapath := path.Join(cfg.StatePath, "meta")
	f, err := os.Open(metapath)
	if err != nil {
		// TODO(soheil): We should also update our peer addresses when we have an
		// existing meta.
		m.Peers = peersInfo(cfg.PeerAddrs)
		m.Hive.Addr = cfg.Addr
		if len(cfg.PeerAddrs) == 0 {
			// The initial ID is 1. There is no raft node up yet to allocate an ID. So
			// we must do this when the hive starts.
			m.Hive.ID = 1
			goto save
		}

		m.Hive.ID = hiveIDFromPeers(cfg.Addr, cfg.PeerAddrs)
		goto save
	}

	dec = gob.NewDecoder(f)
	if err = dec.Decode(&m); err != nil {
		glog.Fatalf("Cannot decode meta: %v", err)
	}
	m.Hive.Addr = cfg.Addr
	f.Close()

save:
	saveMeta(m, cfg)
	return m
}
Пример #8
0
func (r *registry) ApplyConfChange(cc raftpb.ConfChange, gn raft.GroupNode) (
	err error) {

	r.m.Lock()
	defer r.m.Unlock()

	glog.V(2).Infof("%v applies conf change %#v for %v", r, cc, gn.Node)
	switch cc.Type {
	case raftpb.ConfChangeAddNode:
		if gn.Node != cc.NodeID {
			glog.Fatalf("invalid data in the config change: %v != %v", gn.Node,
				cc.NodeID)
		}
		if gn.Data != nil {
			hi := HiveInfo{
				ID:   gn.Node,
				Addr: gn.Data.(string),
			}
			r.addHive(hi)
			glog.V(2).Infof("%v adds hive %v@%v", r, hi.ID, hi.Addr)
		}

	case raftpb.ConfChangeRemoveNode:
		r.delHive(cc.NodeID)
		glog.V(2).Infof("%v deletes hive %v", r, cc.NodeID)
	}
	return nil
}
Пример #9
0
func addFlowEntriesForPath(sub bh.AppCellKey, path nom.Path,
	flows []nom.FlowEntry, ctx bh.RcvContext) {

	fs := make([]flowAndStatus, 0, len(flows))
	path.ID = strconv.FormatUint(reservePathID(ctx), 16)
	for i := range flows {
		flows[i].ID = path.ID
		fs = append(fs, flowAndStatus{Flow: flows[i]})
	}

	pf := pathAndFlows{
		Subscriber: sub,
		Path:       path,
		Flows:      fs,
		Timestamp:  time.Now(),
	}
	d := ctx.Dict(dictPath)
	if err := d.Put(path.ID, pf); err != nil {
		glog.Fatalf("error in storing path entry: %v", err)
	}

	ack := centralizedAppCellKey(ctx.App())
	for _, f := range flows {
		addf := nom.AddFlowEntry{
			Flow:       f,
			Subscriber: ack,
		}
		ctx.Emit(addf)
	}
}
Пример #10
0
func (h *hive) Start() error {
	h.status = hiveStarted
	h.registerSignals()
	h.startRaftNode()
	if err := h.listen(); err != nil {
		glog.Errorf("%v cannot start listener: %v", h, err)
		h.Stop()
		return err
	}
	if err := h.raftBarrier(); err != nil {
		glog.Fatalf("error when joining the cluster: %v", err)
	}
	glog.V(2).Infof("%v is in sync with the cluster", h)
	h.startQees()
	h.reloadState()

	glog.V(2).Infof("%v starts message loop", h)
	dataCh := h.dataCh.out()
	for h.status == hiveStarted {
		select {
		case m := <-dataCh:
			h.handleMsg(m.msg)

		case cmd := <-h.ctrlCh:
			h.handleCmd(cmd)
		}
	}
	return nil
}
Пример #11
0
func (r *registry) mustFindBee(id uint64) BeeInfo {
	info, ok := r.Bees[id]
	if !ok {
		glog.Fatalf("cannot find bee %v", id)
	}
	return info
}
Пример #12
0
// MustEncode encodes the hive into bytes.
func (i GroupNode) MustEncode() []byte {
	b, err := bhgob.Encode(i)
	if err != nil {
		glog.Fatalf("error in encoding peer: %v", err)
	}
	return b
}
Пример #13
0
func (r *registry) beeForCells(app string, cells MappedCells) (info BeeInfo,
	hasAll bool, err error) {

	r.m.RLock()
	defer r.m.RUnlock()

	hasAll = true
	for _, k := range cells {
		col, ok := r.Store.colony(app, k)
		if !ok {
			hasAll = false
			continue
		}

		if info.ID == 0 {
			info = r.Bees[col.Leader]
			if info.ID != col.Leader {
				glog.Fatalf("bee %b has an invalid info %#v", col.Leader, info)
			}
		} else if info.ID != col.Leader {
			// Incosistencies should be handled by consensus.
			hasAll = false
		}

		if !hasAll {
			return info, hasAll, nil
		}
	}
	if info.ID == 0 {
		return info, hasAll, ErrNoSuchBee
	}
	return info, hasAll, nil
}
Пример #14
0
func parseBeeID(str string) uint64 {
	id, err := strconv.ParseUint(str, 10, 64)
	if err != nil {
		glog.Fatalf("error in parsing id: %v", err)
	}
	return id
}
Пример #15
0
func (b *bee) followerHandlers() (func(mhs []msgAndHandler),
	func(cc cmdAndChannel)) {

	c := b.colony()
	if c.Leader == b.ID() {
		glog.Fatalf("%v is the leader", b)
	}

	_, err := b.hive.registry.bee(c.Leader)
	if err != nil {
		glog.Fatalf("%v cannot find leader %v", b, c.Leader)
	}

	mfn, _ := b.proxyHandlers(c.Leader)
	return mfn, b.handleCmdLocal
}
Пример #16
0
// NewHelloListener creates a new HelloListener.
func NewHelloListener() *HelloListener {
	lis, err := net.Listen("tcp", ":6789")
	if err != nil {
		glog.Fatalf("cannot start listener: %v", err)
	}

	return &HelloListener{lis: lis}
}
Пример #17
0
func (b *bee) peer(gid, bid uint64) etcdraft.Peer {
	bi, err := b.hive.registry.bee(bid)
	if err != nil {
		glog.Fatalf("%v cannot find peer bee %v: %v", b, bid, err)
	}
	// TODO(soheil): maybe include address.
	return raft.GroupNode{Node: bi.Hive, Group: gid, Data: bid}.Peer()
}
Пример #18
0
func (b *bee) SendToCell(msgData interface{}, app string, cell CellKey) {
	bi, _, err := b.hive.registry.beeForCells(app, MappedCells{cell})
	if err != nil {
		glog.Fatalf("cannot find any bee in app %v for cell %v", app, cell)
	}
	msg := newMsgFromData(msgData, bi.ID, 0)
	b.bufferOrEmit(msg)
}
Пример #19
0
func (b *bee) ProcessStatusChange(sch interface{}) {
	switch ev := sch.(type) {
	case raft.LeaderChanged:
		glog.V(2).Infof("%v recevies leader changed event %#v", b, ev)
		if ev.New == Nil {
			// TODO(soheil): when we switch to nil during a campaign, shouldn't we
			// just change the colony?
			return
		}

		oldc := b.colony()
		oldi, err := b.hive.bee(oldc.Leader)
		if err != nil {
			glog.Fatalf("%v cannot find leader: %v", b, err)
		}
		if oldi.Hive == ev.New {
			glog.V(2).Infof("%v has no need to change %v", b, oldc)
			return
		}

		newc := oldc.DeepCopy()
		if oldc.Leader != Nil {
			newc.Leader = Nil
			newc.AddFollower(oldc.Leader)
		}
		newi := b.fellowBeeOnHive(ev.New)
		newc.DelFollower(newi.ID)
		newc.Leader = newi.ID
		b.setColony(newc)

		go b.processCmd(cmdRefreshRole{})

		if ev.New != b.hive.ID() {
			return
		}

		b.setTerm(ev.Term)

		go func() {
			// FIXME(): add raft term to make sure it's versioned.
			glog.V(2).Infof("%v is the new leader of %v", b, oldc)
			up := updateColony{
				Term: ev.Term,
				Old:  oldc,
				New:  newc,
			}

			// TODO(soheil): should we have a max retry?
			_, err := b.hive.node.ProposeRetry(hiveGroup, up,
				b.hive.config.RaftElectTimeout(), -1)
			if err != nil {
				glog.Errorf("%v cannot update its colony: %v", b, err)
			}
		}()
		// TODO(soheil): add health checks here and recruit if needed.
	}
}
Пример #20
0
// Peer returns a peer which stores the binary representation of the hive info
// in the the peer's context.
func (i GroupNode) Peer() etcdraft.Peer {
	if i.Group == 0 || i.Node == 0 {
		glog.Fatalf("zero group")
	}
	return etcdraft.Peer{
		ID:      i.Node,
		Context: i.MustEncode(),
	}
}
Пример #21
0
func (r *registry) addBee(info BeeInfo) error {
	glog.V(2).Infof("%v add bee %v (detached=%v) for %v with %v,", r, info.ID,
		info.Detached, info.App, info.Colony)

	if info.ID == Nil {
		glog.Fatalf("invalid bee info: %v", info)
	}

	if i, ok := r.Bees[info.ID]; ok {
		if !reflect.DeepEqual(info, i) {
			return ErrDuplicateBee
		}
	}
	if r.BeeID < info.ID {
		glog.Fatalf("%v has invalid bee ID: %v < %v", r, info.ID, r.HiveID)
	}
	r.Bees[info.ID] = info
	return nil
}
Пример #22
0
func (s *rpcServer) ProcessRaft(batch raft.Batch, dummy *bool) (err error) {
	if batch.To != s.h.ID() {
		glog.Fatalf("%v recieves a raft message for %v", s.h, msg.To)
	}

	glog.V(3).Infof("%v handles a batch from %v", s.h, batch.From)
	ctx, cnl := context.WithTimeout(context.Background(),
		s.h.config.RaftHBTimeout())
	err = s.h.node.StepBatch(ctx, batch, 2*s.h.config.RaftHBTimeout())
	cnl()
	return
}
Пример #23
0
// ShortestPathCentralized calculates the shortest path from node "from" to node
// "to" according to the state stored in GraphDict by the
// GraphBuilderCentralized.
//
// This method is not go-routine safe and must be called within a handler of the
// application that uses the GraphBuilderCentralized as a handler. Otherwise,
// the user needs to synchronize the two.
func ShortestPathCentralized(from, to nom.UID, ctx bh.RcvContext) (
	paths [][]nom.Link, length int) {

	if from == to {
		return nil, 0
	}

	visited := make(map[nom.UID]distAndLinks)
	visited[from] = distAndLinks{Dist: 0}

	pq := nodeAndDistSlice{{Dist: 0, Node: from}}
	heap.Init(&pq)

	dict := ctx.Dict(GraphDict)
	for len(pq) != 0 {
		nd := heap.Pop(&pq).(nodeAndDist)
		if nd.Node == to {
			continue
		}
		nodeLinks := make(map[nom.UID][]nom.Link)
		if v, err := dict.Get(string(nd.Node)); err == nil {
			nodeLinks = v.(map[nom.UID][]nom.Link)
		}
		nd.Dist = visited[nd.Node].Dist
		for _, links := range nodeLinks {
			for _, l := range links {
				nid, _ := nom.ParsePortUID(l.To)
				ton := nom.UID(nid)
				if dl, ok := visited[ton]; ok {
					switch {
					case nd.Dist+1 < dl.Dist:
						glog.Fatalf("invalid distance in BFS")
					case nd.Dist+1 == dl.Dist:
						dl.BackLinks = append(dl.BackLinks, l)
						visited[ton] = dl
					}
					continue
				}

				visited[ton] = distAndLinks{
					Dist:      nd.Dist + 1,
					BackLinks: []nom.Link{l},
				}
				ndto := nodeAndDist{
					Dist: nd.Dist + 1,
					Node: ton,
				}
				heap.Push(&pq, ndto)
			}
		}
	}
	return allPaths(from, to, visited)
}
Пример #24
0
func (b *bee) fellowBeeOnHive(hive uint64) (fellow BeeInfo) {
	c := b.colony()
	i, err := b.hive.bee(c.Leader)
	if err != nil {
		glog.Fatalf("%v cannot find leader %v", b, c.Leader)
	}
	if i.Hive == hive {
		return i
	}
	for _, f := range c.Followers {
		i, err = b.hive.bee(f)
		if err != nil {
			glog.Fatalf("%v cannot find bee %v", b, f)
		}
		if i.Hive == hive {
			return i
		}
	}
	glog.Fatalf("%v cannot find fellow on hive %v", b, hive)
	return
}
Пример #25
0
func (g *group) snapshot() {
	d, err := g.stateMachine.Save()
	if err != nil {
		glog.Fatalf("error in seralizing the state machine: %v", err)
	}
	g.snapped = g.applied

	go func(snapi uint64) {
		snap, err := g.raftStorage.CreateSnapshot(snapi, &g.confState, d)
		if err != nil {
			// the snapshot was done asynchronously with the progress of raft.
			// raft might have already got a newer snapshot.
			if err == etcdraft.ErrSnapOutOfDate {
				return
			}
			glog.Fatalf("unexpected create snapshot error %v", err)
		}

		if err := g.diskStorage.SaveSnap(snap); err != nil {
			glog.Fatalf("save snapshot error: %v", err)
		}
		glog.Infof("%v saved snapshot at index %d", g, snap.Metadata.Index)

		// keep some in memory log entries for slow followers.
		compacti := uint64(1)
		if snapi > numberOfCatchUpEntries {
			compacti = snapi - numberOfCatchUpEntries
		}
		if err = g.raftStorage.Compact(compacti); err != nil {
			// the compaction was done asynchronously with the progress of raft.
			// raft log might already been compact.
			if err == etcdraft.ErrCompacted {
				return
			}
			glog.Fatalf("unexpected compaction error %v", err)
		}
		glog.Infof("%v compacted raft log at %d", g, compacti)
	}(g.snapped)
}
Пример #26
0
func (n *MultiNode) handleGroupRequest(req groupRequest) {
	_, ok := n.groups[req.group.id]
	res := groupResponse{
		group: req.group.id,
	}

	switch req.reqType {
	case groupRequestCreate:
		if ok {
			res.err = ErrGroupExists
			break
		}

		n.groups[req.group.id] = req.group
		err := n.node.CreateGroup(req.group.id, req.config, req.peers)
		if err == nil {
			go req.group.startSaver()
			go req.group.startApplier()
		} else {
			delete(n.groups, req.group.id)
		}

	case groupRequestRemove:
		if !ok {
			res.err = ErrNoSuchGroup
			break
		}

		g, ok := n.groups[req.group.id]
		if !ok {
			res.err = ErrNoSuchGroup
			break
		}

		g.stop()
		delete(n.groups, req.group.id)

	case groupRequestStatus:
		// TODO(soheil): add softstate to the response.
		if _, ok := n.groups[req.group.id]; !ok {
			res.err = ErrNoSuchGroup
		}

	default:
		glog.Fatalf("invalid group request: %v", req.reqType)
	}

	req.ch <- res
}
Пример #27
0
func (a *app) Handle(msg interface{}, h Handler) error {
	if a.qee == nil {
		glog.Fatalf("app's qee is nil!")
	}

	t := MsgType(msg)
	a.hive.RegisterMsg(msg)
	if err := a.registerHandler(t, h); err != nil {
		return err
	}

	s := syncReq{Data: msg}
	t = MsgType(s)
	return a.registerHandler(t, syncHandler{handler: h})
}
Пример #28
0
// All composes handlers in a pipeline with the same sequence. An incoming
// message is passed to the i'th handler, if the (i-1)'th handler has
// successfully processed the incoming message.
//
// If any of these handlers returns an error, the whole transaction will be
// aborted, meaning that a message is either processed by all of these handlers
// or none of them.
func All(handlers ...bh.Handler) bh.Handler {
	if len(handlers) == 0 {
		glog.Fatalf("no handler provided")
	}

	if len(handlers) == 1 {
		return handlers[0]
	}

	return &ComposedHandler{
		Handlers: handlers,
		Composer: ComposeAll,
		Isolate:  true,
	}
}
Пример #29
0
func (n *MultiNode) processConfChange(ctx context.Context, group uint64,
	cc raftpb.ConfChange, gn GroupNode) error {

	if group == 0 || gn.Node == 0 || gn.Group != group {
		glog.Fatalf("invalid group node: %v", gn)
	}

	id := n.genID()
	req := Request{Data: gn}

	var err error
	cc.Context, err = n.encReq(id, req)
	if err != nil {
		return err
	}

	ch := n.line.wait(id, req)

	d, err := cc.Marshal()
	if err != nil {
		return err
	}
	select {
	case n.propc <- multiMessage{
		group: group,
		msg: raftpb.Message{
			Type:    raftpb.MsgProp,
			Entries: []raftpb.Entry{{Type: raftpb.EntryConfChange, Data: d}},
		},
	}:
	case <-ctx.Done():
		n.line.cancel(id)
		return ctx.Err()
	case <-n.done:
		return ErrStopped
	}

	select {
	case res := <-ch:
		return res.Err
	case <-ctx.Done():
		n.line.cancel(id)
		return ctx.Err()
	case <-n.done:
		return ErrStopped
	}
}
Пример #30
0
func (c localCollector) updateMatrix(r beeRecord, ctx RcvContext) {
	d := ctx.Dict(dictLocalStat)
	k := formatBeeID(r.Bee)
	lm := localBeeMatrix{}
	if v, err := d.Get(k); err != nil {
		lm.BeeMatrix.Bee = r.Bee
		lm.BeeMatrix.Matrix = make(map[uint64]uint64)
		lm.UpdateTime = time.Now()
	} else {
		lm = v.(localBeeMatrix)
	}
	lm.BeeMatrix.Matrix[r.In.From()]++
	lm.UpdateMsgCnt++
	if err := d.Put(k, lm); err != nil {
		glog.Fatalf("cannot store matrix: %v", err)
	}
}