func (h *httpHostListHandler) Rcv(msg bh.Msg, ctx bh.RcvContext) error { hrq := msg.Data().(http.HTTPRequest) if hrq.AppName == "host" && hrq.Verb == "list" { dict := ctx.Dict(hostDict) v, err := dict.Get("hsts") hsts := []nom.Host{} if err == nil { hsts = v.([]nom.Host) } data, err := json.Marshal(hsts) if err != nil { glog.Errorf("Host list JSON marshaling: %v", err) return err } fmt.Println(hsts) hrs := http.HTTPResponse{ AppName: "host", Data: data, } err = ctx.Reply(msg, hrs) if err != nil { glog.Errorf("Replay error: %v", err) return err } } return nil }
func (h *intentHandler) Rcv(msg bh.Msg, ctx bh.RcvContext) error { hrq := msg.Data().(http.HTTPRequest) if hrq.AppName == "intent" && hrq.Verb == "build" { spd := shortestPathData{} err := json.Unmarshal(hrq.Data, &spd) if err != nil { glog.Errorf("Host list JSON unmarshaling: %v", err) return err } fmt.Println(spd) fmt.Println(discovery.ShortestPathCentralized(spd.From, spd.To, ctx)) hrs := http.HTTPResponse{ AppName: "host", Data: []byte{'A'}, } err = ctx.Reply(msg, hrs) if err != nil { glog.Errorf("Replay error: %v", err) return err } } return nil }
func (h *hostConnectedHandler) Rcv(msg bh.Msg, ctx bh.RcvContext) error { host := msg.Data().(nom.HostConnected) dict := ctx.Dict(hostDict) _, err := dict.Get(host.MACAddr.String()) if err == nil { return nil } v, err := dict.Get("hsts") hsts := []nom.Host{} if err == nil { hsts = v.([]nom.Host) } hsts = append(hsts, nom.Host(host)) err = dict.Put(host.MACAddr.String(), host) if err != nil { glog.Errorf("Put %v in %s: %v", host, host.MACAddr.String(), err) return err } err = dict.Put("hsts", hsts) if err != nil { glog.Errorf("Put %v in hsts: %v", hsts, err) return err } ctx.Emit(nom.HostJoined(host)) return nil }
func (q *qee) handleUnicastMsg(mh msgAndHandler) { glog.V(2).Infof("unicast msg: %v", mh.msg) b, ok := q.beeByID(mh.msg.To()) if !ok { info, err := q.hive.registry.bee(mh.msg.To()) if err != nil { glog.Errorf("cannot find bee %v", mh.msg.To()) } if q.isLocalBee(info) { glog.Fatalf("%v cannot find local bee %v", q, mh.msg.To()) } if b, ok = q.beeByID(info.ID); !ok { if b, err = q.newProxyBee(info); err != nil { glog.Errorf("%v cannnot find remote bee %v", q, mh.msg.To()) return } } } if mh.handler == nil && !b.detached && !b.proxy { glog.Fatalf("handler is nil for message %v", mh.msg) } b.enqueMsg(mh) }
func (c *ofConn) doRead(done chan struct{}, stop chan struct{}) { defer close(done) pkts := make([]of.Header, c.readBufLen) for { select { case <-stop: return default: } n, err := c.ReadHeaders(pkts) if err != nil { if err == io.EOF { glog.Infof("connection %v closed", c.RemoteAddr()) } else { glog.Errorf("cannot read from the connection %v: %v", c.RemoteAddr(), err) } return } for _, pkt := range pkts[:n] { if err := c.driver.handlePkt(pkt, c); err != nil { glog.Errorf("%s", err) return } } pkts = pkts[n:] if len(pkts) == 0 { pkts = make([]of.Header, c.readBufLen) } } }
func (b *bee) recoverFromError(mh msgAndHandler, err interface{}, stack bool) { b.AbortTx() if d, ok := err.(time.Duration); ok { b.snooze(mh, d) return } glog.Errorf("error in %s for %s: %v", b.app.Name(), mh.msg.Type(), err) if stack { glog.Errorf("%s", debug.Stack()) } }
func (h *hive) Start() error { h.status = hiveStarted h.registerSignals() h.startRaftNode() if err := h.listen(); err != nil { glog.Errorf("%v cannot start listener: %v", h, err) h.Stop() return err } if err := h.raftBarrier(); err != nil { glog.Fatalf("error when joining the cluster: %v", err) } glog.V(2).Infof("%v is in sync with the cluster", h) h.startQees() h.reloadState() glog.V(2).Infof("%v starts message loop", h) dataCh := h.dataCh.out() for h.status == hiveStarted { select { case m := <-dataCh: h.handleMsg(m.msg) case cmd := <-h.ctrlCh: h.handleCmd(cmd) } } return nil }
func (h *hive) stopQees() { glog.Infof("%v is stopping qees...", h) qs := make(map[*qee]bool) for _, mhs := range h.qees { for _, mh := range mhs { qs[mh.q] = true } } stopCh := make(chan cmdResult) for q := range qs { q.ctrlCh <- newCmdAndChannel(cmdStop{}, h.ID(), q.app.Name(), 0, stopCh) glog.V(3).Infof("waiting on a qee: %v", q) stopped := false tries := 5 for !stopped { select { case res := <-stopCh: _, err := res.get() if err != nil { glog.Errorf("error in stopping a qee: %v", err) } stopped = true case <-time.After(1 * time.Second): if tries--; tries < 0 { glog.Infof("giving up on qee %v", q) stopped = true continue } glog.Infof("still waiting for a qee %v...", q) } } } }
func (h *arpPktInHandler) Rcv(msg bh.Msg, ctx bh.RcvContext) error { pin := msg.Data().(nom.PacketIn) p := gopacket.NewPacket([]byte(pin.Packet), layers.LayerTypeEthernet, gopacket.Default) etherlayer := p.Layer(layers.LayerTypeEthernet) if etherlayer == nil { return nil } e, _ := etherlayer.(*layers.Ethernet) if e.EthernetType != layers.EthernetTypeARP { return nil } host, _, err := decodeARP([]byte(pin.Packet)) host.Node = pin.Node if err != nil { glog.Errorf("ARP decoding error: %v", err) return err } glog.V(2).Infof("Host detected: %v", host) ctx.Emit(nom.HostConnected(host)) return nil }
func (b *bee) handoff(to uint64) error { if !b.app.persistent() { return b.handoffNonPersistent(to) } c := b.colony() if !c.IsFollower(to) { return fmt.Errorf("%v is not a follower of %v", to, b) } if _, err := b.qee.sendCmdToBee(to, cmdSync{}); err != nil { return err } ch := make(chan error) go func() { // TODO(soheil): use context with deadline here. _, err := b.qee.sendCmdToBee(to, cmdCampaign{}) ch <- err }() t := b.hive.config.RaftElectTimeout() time.Sleep(t) if _, err := b.hive.node.ProposeRetry(c.ID, noOp{}, t, 10); err != nil { glog.Errorf("%v cannot sync raft: %v", b, err) } if b.isFollower(b.ID()) { glog.V(2).Infof("%v successfully handed off leadership to %v", b, to) b.becomeFollower() } return <-ch }
// Map method of the composed handler. func (c *ComposedHandler) Map(msg bh.Msg, ctx bh.MapContext) bh.MappedCells { var cells bh.MappedCells var err error for i, h := range c.Handlers { var hc bh.MappedCells if c.Isolate { mctx := composedMapContext{MapContext: ctx, prefix: strconv.Itoa(i)} hc, err = c.callMap(h, msg, mctx) } else { hc, err = c.callMap(h, msg, ctx) } // TODO(soheil): Is there any better way to handle this? if err != nil { glog.Errorf("error in calling the map function of %#v: %v", h, err) return nil } if c.Isolate { for j := range hc { hc[j].Dict = strconv.Itoa(i) + hc[j].Dict } } for _, cell := range hc { cells = append(cells, cell) } } return cells }
func (r *registry) doApply(req interface{}) (interface{}, error) { glog.V(2).Infof("%v applies: %#v", r, req) switch req := req.(type) { case noOp: return nil, nil case newHiveID: return r.newHiveID(), nil case allocateBeeIDs: return r.allocBeeIDs(req) case addBee: return nil, r.addBee(BeeInfo(req)) case delBee: return nil, r.delBee(uint64(req)) case moveBee: return nil, r.moveBee(req) case updateColony: return nil, r.updateColony(req) case lockMappedCell: return r.lockCell(req) case transferCells: return nil, r.transfer(req) case batchReq: return r.handleBatch(req), nil } glog.Errorf("%v cannot handle %v", r, req) return nil, ErrUnsupportedRequest }
// RuntimeMap generates an automatic runtime map function based on the given // rcv function. // // If there was an error in the rcv function, it will return "nil" and the // message will be dropped. func RuntimeMap(rcv RcvFunc) MapFunc { return func(msg Msg, ctx MapContext) (cells MappedCells) { defer func() { if r := recover(); r != nil { glog.Errorf("runtime map cannot find the mapped cells") cells = nil } }() q := ctx.(*qee) rCtx := runtimeRcvContext{ qee: q, state: state.NewTransactional(q.app.newState()), } if err := rcv(msg, rCtx); err != nil { return nil } for _, d := range rCtx.state.Dicts() { d.ForEach(func(k string, v interface{}) bool { cells = append(cells, CellKey{Dict: d.Name(), Key: k}) return true }) } return } }
func peersInfo(addrs []string) map[uint64]HiveInfo { if len(addrs) == 0 { return nil } ch := make(chan []HiveInfo, len(addrs)) for _, a := range addrs { go func(a string) { s, err := getHiveState(a) if err != nil { glog.Errorf("cannot communicate with %v: %v", a, err) return } ch <- s.Peers }(a) } // Return the first one. hives := <-ch glog.V(2).Infof("found live hives: %v", hives) infos := make(map[uint64]HiveInfo) for _, h := range hives { infos[h.ID] = h } return infos }
func (q *qee) newRemoteBee(pc *pendingCells, hive uint64) { var col Colony cmd := cmd{ Hive: hive, App: q.app.Name(), Data: cmdCreateBee{}, } res, err := q.hive.client.sendCmd(cmd) if err != nil { q.placementCh <- placementRes{pCells: pc} goto fallback } col.Leader = res.(uint64) cmd.Bee = col.Leader cmd.Data = cmdJoinColony{ Colony: col, } if _, err = q.hive.client.sendCmd(cmd); err != nil { goto fallback } q.placementCh <- placementRes{ hive: hive, colony: col, pCells: pc, } return fallback: glog.Errorf("%v cannot create a new bee on %v. will place locally: %v", q, hive, err) q.placementCh <- placementRes{pCells: pc} }
func (b *bee) handleMsgLeader(mhs []msgAndHandler) { usetx := b.app.transactional() if usetx && len(mhs) > 1 { b.stateL2 = state.NewTransactional(b.stateL1) b.stateL1.BeginTx() } for i := range mhs { if usetx { b.BeginTx() } mh := mhs[i] if glog.V(2) { glog.Infof("%v handles message %v", b, mh.msg) } b.callRcv(mh) if usetx { var err error if b.stateL2 == nil { err = b.CommitTx() } else if len(b.msgBufL1) == 0 && b.stateL2.HasEmptyTx() { // If there is no pending L1 message and there is no state change, // emit the buffered messages in L2 as a shortcut. b.throttle(b.msgBufL2) b.resetTx(b.stateL2, &b.msgBufL2) } else { err = b.commitTxL2() } if err != nil && err != state.ErrNoTx { glog.Errorf("%v cannot commit a transaction: %v", b, err) } } } if !usetx || b.stateL2 == nil { return } b.stateL2 = nil if err := b.CommitTx(); err != nil && err != state.ErrNoTx { glog.Errorf("%v cannot commit a transaction: %v", b, err) } }
func (b *bee) Apply(req interface{}) (interface{}, error) { b.Lock() defer b.Unlock() switch r := req.(type) { case commitTx: if b.txTerm < r.Term { b.txTerm = r.Term } else if r.Term < b.txTerm { return nil, ErrOldTx } glog.V(2).Infof("%v commits %v", b, r) leader := b.isLeader() if b.stateL2 != nil { b.stateL2 = nil glog.Errorf("%v has an L2 transaction", b) } if b.stateL1.TxStatus() == state.TxOpen { if !leader { glog.Errorf("%v is a follower and has an open transaction", b) } b.resetTx(b.stateL1, &b.msgBufL1) } if err := b.stateL1.Apply(r.Tx.Ops); err != nil { return nil, err } if leader && b.emitInRaft { for _, msg := range r.Tx.Msgs { msg.MsgFrom = b.beeID glog.V(2).Infof("%v emits %#v", b, msg) } b.throttle(r.Tx.Msgs) } return nil, nil case noOp: return nil, nil } glog.Errorf("%v cannot handle %v", b, req) return nil, ErrUnsupportedRequest }
func (g *group) fsync() error { glog.V(2).Infof("%v syncing disk storage", g) if err := g.diskStorage.Sync(); err != nil { glog.Errorf("%v cannot sync disk storage: %v", g, err) return err } return nil }
func (h *hive) sendRaft(batch *raft.Batch, r raft.Reporter) { go func() { if err := h.client.sendRaft(batch, r); err != nil && !isBackoffError(err) { glog.Errorf("%v cannot send raft messages: %v", h, err) } }() }
func (b *bee) ProcessStatusChange(sch interface{}) { switch ev := sch.(type) { case raft.LeaderChanged: glog.V(2).Infof("%v recevies leader changed event %#v", b, ev) if ev.New == Nil { // TODO(soheil): when we switch to nil during a campaign, shouldn't we // just change the colony? return } oldc := b.colony() oldi, err := b.hive.bee(oldc.Leader) if err != nil { glog.Fatalf("%v cannot find leader: %v", b, err) } if oldi.Hive == ev.New { glog.V(2).Infof("%v has no need to change %v", b, oldc) return } newc := oldc.DeepCopy() if oldc.Leader != Nil { newc.Leader = Nil newc.AddFollower(oldc.Leader) } newi := b.fellowBeeOnHive(ev.New) newc.DelFollower(newi.ID) newc.Leader = newi.ID b.setColony(newc) go b.processCmd(cmdRefreshRole{}) if ev.New != b.hive.ID() { return } b.setTerm(ev.Term) go func() { // FIXME(): add raft term to make sure it's versioned. glog.V(2).Infof("%v is the new leader of %v", b, oldc) up := updateColony{ Term: ev.Term, Old: oldc, New: newc, } // TODO(soheil): should we have a max retry? _, err := b.hive.node.ProposeRetry(hiveGroup, up, b.hive.config.RaftElectTimeout(), -1) if err != nil { glog.Errorf("%v cannot update its colony: %v", b, err) } }() // TODO(soheil): add health checks here and recruit if needed. } }
func (n *MultiNode) handleBatch(bt batchTimeout) { ctx, cnl := context.WithTimeout(context.Background(), bt.timeout) for g, msgs := range bt.batch.Messages { if _, ok := n.groups[g]; !ok { glog.Errorf("group %v is not created on %v", g, n) continue } for _, m := range msgs { if err := n.node.Step(ctx, g, m); err != nil { glog.Errorf("%v cannot step group %v: %v", n, g, err) if err == context.DeadlineExceeded || err == context.Canceled { return } } } } cnl() }
func (h *hive) delBeeFromRegistry(id uint64) error { _, err := h.node.ProposeRetry(hiveGroup, delBee(id), h.config.RaftElectTimeout(), -1) if err == ErrNoSuchBee { err = nil } if err != nil { glog.Errorf("%v cannot delete bee %v from registory: %v", h, id, err) } return err }
func main() { flag.Parse() rand.Seed(time.Now().UnixNano()) h := beehive.NewHive() if err := server.RegisterTaskQ(h); err != nil { glog.Errorf("cannot register taskq: %v", err) os.Exit(-1) } h.Start() }
func (q *qee) invokeMap(mh msgAndHandler) (ms MappedCells) { defer func() { if r := recover(); r != nil { glog.Errorf("error in map of %s: %v\n%s", q.app.Name(), r, string(debug.Stack())) ms = nil } }() glog.V(2).Infof("%v invokes map for %v", q, mh.msg) return mh.handler.Map(mh.msg, q) }
func (h *hive) reloadState() { for _, b := range h.registry.beesOfHive(h.id) { if b.Detached || b.Colony.IsNil() { glog.V(1).Infof( "%v will not reload detached bee %v (detached=%v, colony=%#v)", h, b.ID, b.Detached, b.Colony) go h.delBeeFromRegistry(b.ID) continue } a, ok := h.app(b.App) if !ok { glog.Errorf("app %v is not registered but has a bee", b.App) continue } _, err := a.qee.processCmd(cmdReloadBee{ID: b.ID, Colony: b.Colony}) if err != nil { glog.Errorf("cannot reload bee %v on %v", b.ID, h.id) continue } } }
func (b *bee) commitTxL1() (err error) { if b.stateL2 != nil { glog.Errorf("%v has open L2 transaction while committing L1", b) b.commitTxL2() } if err = b.stateL1.CommitTx(); err == nil { b.throttle(b.msgBufL1) } b.resetTx(b.stateL1, &b.msgBufL1) return }
func (g *group) startSaver() { defer func() { if err := g.diskStorage.Close(); err != nil { glog.Errorf("%v cannot close disk storage: %v", g, err) } close(g.saverDone) }() var fsync <-chan time.Time for { select { case rdsv := <-g.savec: if err := g.save(rdsv); err != nil { if err != ErrStopped { glog.Errorf("%v cannot apply entries: %v", g, err) } return } if g.fsyncTime == 0 { if err := g.fsync(); err != nil { return } } else if fsync == nil { fsync = time.After(g.fsyncTime) } case <-fsync: if err := g.fsync(); err != nil { return } fsync = nil case <-g.node.done: return case <-g.stopc: return } } }
func (b *bee) replicate() error { glog.V(2).Infof("%v replicates transaction", b) b.Lock() if b.stateL2 != nil { err := b.commitTxL2() b.stateL2 = nil if err != nil && err != state.ErrNoTx { b.Unlock() return err } } if b.stateL1.TxStatus() != state.TxOpen { b.Unlock() return state.ErrNoTx } stx := b.stateL1.Tx() if len(stx.Ops) == 0 { err := b.commitTxL1() b.Unlock() return err } b.Unlock() if err := b.maybeRecruitFollowers(); err != nil { return err } msgs := make([]*msg, len(b.msgBufL1)) copy(msgs, b.msgBufL1) tx := tx{ Tx: stx, Msgs: msgs, } ctx, cnl := context.WithTimeout(context.Background(), 10*b.hive.config.RaftElectTimeout()) defer cnl() commit := commitTx{ Tx: tx, Term: b.term(), } if _, err := b.hive.node.Propose(ctx, b.group(), commit); err != nil { glog.Errorf("%v cannot replicate the transaction: %v", b, err) return err } glog.V(2).Infof("%v successfully replicates transaction", b) return nil }
func (b *bee) BeginTx() error { dicts, _ := b.currentState() if dicts.TxStatus() == state.TxOpen { return state.ErrOpenTx } if err := dicts.BeginTx(); err != nil { glog.Errorf("Cannot begin a transaction for %v: %v", b, err) return err } glog.V(2).Infof("%v begins a new transaction", b) return nil }
func (l *ofListener) Start(ctx bh.RcvContext) { nl, err := net.Listen(l.proto, l.addr) if err != nil { glog.Errorf("Cannot start the OF listener: %v", err) return } glog.Infof("OF listener started on %s:%s", l.proto, l.addr) defer func() { glog.Infof("OF listener closed") nl.Close() }() for { c, err := nl.Accept() if err != nil { glog.Errorf("Error in OF accept: %v", err) return } l.startOFConn(c, ctx) } }