func set(cl *doozer.Conn, id, iter int, path string, value []byte, f cb) { for i := 0; i < iter; i++ { s := time.Nanoseconds() rev, err := cl.Set(path, math.MaxInt64, value) e := time.Nanoseconds() f(id, i, rev, s, e, err) } }
func watch(conn *doozer.Conn, events chan doozer.Event, revs chan int64) { for { rev := <-revs event, err := conn.Wait("/*", rev) if err != nil { panic("error waiting for event, bailing") } revs <- event.Rev + 1 events <- event } }
func activate(st *store.Store, self string, c *doozer.Conn) int64 { rev, _ := st.Snap() entries := store.Getdir(st, calDir) base := entries[len(entries)-1] index, _ := strconv.ParseInt(base, 10, 32) index += 1 p := calDir + "/" + strconv.Itoa(int(index)) seqn, err := c.Set(p, rev, []byte(self)) if err != nil { panic(err) } // for _, base := range store.Getdir(st, calDir) { // p := calDir + "/" + base // v, rev := st.Get(p) // if rev != store.Dir && v[0] == "" { // seqn, err := c.Set(p, rev, []byte(self)) // if err != nil { // log.Println(err) // continue // } // return seqn // } // } // for { // ch, err := st.Wait(calGlob, rev+1) // if err != nil { // panic(err) // } // ev, ok := <-ch // if !ok { // panic(io.EOF) // } // rev = ev.Rev // // TODO ev.IsEmpty() // if ev.IsSet() && ev.Body == "" { // seqn, err := c.Set(ev.Path, ev.Rev, []byte(self)) // if err != nil { // log.Println(err) // continue // } // return seqn // } else if ev.IsSet() && ev.Body == self { // return ev.Seqn // } // } return seqn }
func waitFor(cl *doozer.Conn, path string) { var rev int64 for { ev, err := cl.Wait(path, rev) if err != nil { panic(err) } if ev.IsSet() && len(ev.Body) > 0 { break } rev = ev.Rev + 1 } }
// Find possible addresses for cluster named name. func lookup(b *doozer.Conn, name string) (as []string) { rev, err := b.Rev() if err != nil { panic(err) } info, err := b.Walk("/ctl/ns/"+name+"/*", rev, 0, -1) if err != nil { panic(err) } for _, e := range info { as = append(as, string(e.Body)) } return as }
func activate(st *store.Store, self string, c *doozer.Conn) int64 { rev, _ := st.Snap() for _, base := range store.Getdir(st, calDir) { p := calDir + "/" + base v, rev := st.Get(p) if rev != store.Dir && v[0] == "" { seqn, err := c.Set(p, rev, []byte(self)) if err != nil { log.Println(err) continue } return seqn } } for { ch, err := st.Wait(calGlob, rev+1) if err != nil { panic(err) } ev, ok := <-ch if !ok { panic(io.EOF) } rev = ev.Rev // TODO ev.IsEmpty() if ev.IsSet() && ev.Body == "" { seqn, err := c.Set(ev.Path, ev.Rev, []byte(self)) if err != nil { log.Println(err) continue } return seqn } else if ev.IsSet() && ev.Body == self { return ev.Seqn } } return 0 }
func follow(st *store.Store, cl *doozer.Conn, rev int64, stop chan bool) { for { ev, err := cl.Wait("/**", rev) if err != nil { panic(err) } // store.Clobber is okay here because the event // has already passed through another store mut := store.MustEncodeSet(ev.Path, string(ev.Body), store.Clobber) st.Ops <- store.Op{ev.Rev, mut} rev = ev.Rev + 1 select { case <-stop: return default: } } }
// wait waits on a changes for the fiven file starting at the given // revision from the given doozer connection. It sends updated peer // lists on the returned channel. func wait(d *doozer.Conn, file string, rev *int64) chan []string { c := make(chan []string, 1) cur := *rev go func() { for { // Wait for the change. e, err := d.Wait(file, cur+1) if err != nil { log.Println("waiting failed (no longer watching):", err) close(c) return } // Update the revision and send the change on the channel. atomic.CompareAndSwapInt64(rev, cur, e.Rev) cur = e.Rev c <- strings.Split(string(e.Body), " ") } }() return c }
// Elect chooses a seed node, and returns a connection to a cal. // If this process is the seed, returns nil. func elect(name, id, laddr string, b *doozer.Conn) *doozer.Conn { // advertise our presence, since we might become a cal nspath := "/ctl/ns/" + name + "/" + id r, err := b.Set(nspath, 0, []byte(laddr)) if err != nil { panic(err) } // fight to be the seed _, err = b.Set("/ctl/boot/"+name, 0, []byte(id)) if err, ok := err.(*doozer.Error); ok && err.Err == doozer.ErrOldRev { // we lost, lookup addresses again cl := lookupAndAttach(b, name) if cl == nil { panic("failed to attach after losing election") } // also delete our entry, since we're not officially a cal yet. // it gets set again in peer.Main when we become a cal. err := b.Del(nspath, r) if err != nil { panic(err) } return cl } else if err != nil { panic(err) } return nil // we are the seed node -- don't attach }
// Find possible addresses for cluster named name. func lookup(b *doozer.Conn, name string) (as []string) { rev, err := b.Rev() if err != nil { panic(err) } path := "/ctl/ns/" + name names, err := b.Getdir(path, rev, 0, -1) if err == doozer.ErrNoEnt { return nil } else if err, ok := err.(*doozer.Error); ok && err.Err == doozer.ErrNoEnt { return nil } else if err != nil { panic(err) } path += "/" for _, name := range names { body, _, err := b.Get(path+name, &rev) if err != nil { panic(err) } as = append(as, string(body)) } return as }
// watch updates the peer list of servers based on changes to the // doozer configuration or signals from the OS. func watch(d *doozer.Conn) { peerFile := "/peers" var peers []string var rev int64 // Run the initial get. data, rev, err := d.Get(peerFile, nil) if err != nil { log.Println("initial peer list get:", err) log.Println("using empty set to start") peers = []string{} } else { peers = strings.Split(string(data), " ") } // Add myself to the list. peers = append(peers, "http://"+addr) rev, err = d.Set(peerFile, rev, []byte(strings.Join(peers, " "))) if err != nil { log.Println("unable to add myself to the peer list (no longer watching).") return } pool.Set(peers...) log.Println("added myself to the peer list.") // Setup signal handling to deal with ^C and others. sigs := make(chan os.Signal, 1) signal.Notify(sigs, os.Interrupt, os.Kill) // Get the channel that's listening for changes. updates := wait(d, peerFile, &rev) for { select { case <-sigs: // Remove ourselves from the peer list and exit. for i, peer := range peers { if peer == "http://"+addr { peers = append(peers[:i], peers[i+1:]...) d.Set(peerFile, rev, []byte(strings.Join(peers, " "))) log.Println("removed myself from peer list before exiting.") } } os.Exit(1) case update, ok := <-updates: // If the channel was closed, we should stop selecting on it. if !ok { updates = nil continue } // Otherwise, update the peer list. peers = update log.Println("got new peer list:", strings.Join(peers, " ")) pool.Set(peers...) } } }
func Main(clusterName, self, buri, rwsk, rosk string, cl *doozer.Conn, udpConn net.PacketConn, listener, webListener net.Listener, pulseInterval, fillDelay, kickTimeout int64, hi int64) { listenAddr := listener.Addr().String() canWrite := make(chan bool, 1) in := make(chan consensus.Packet, 50) out := make(chan consensus.Packet, 50) st := store.New() pr := &proposer{ seqns: make(chan int64, alpha), props: make(chan *consensus.Prop), st: st, } calSrv := func(start int64) { go gc.Pulse(self, st.Seqns, pr, pulseInterval) go gc.Clean(st, hi, time.Tick(1e9)) var m consensus.Manager m.Self = self m.DefRev = start m.Alpha = alpha m.In = in m.Out = out m.Ops = st.Ops m.PSeqn = pr.seqns m.Props = pr.props m.TFill = fillDelay m.Store = st m.Ticker = time.Tick(10e6) go m.Run() } if cl == nil { // we are the only node in a new cluster set(st, "/ctl/name", clusterName, store.Missing) set(st, "/ctl/node/"+self+"/addr", listenAddr, store.Missing) set(st, "/ctl/node/"+self+"/hostname", os.Getenv("HOSTNAME"), store.Missing) set(st, "/ctl/node/"+self+"/version", Version, store.Missing) set(st, "/ctl/cal/0", self, store.Missing) calSrv(<-st.Seqns) // Skip ahead alpha steps so that the registrar can provide a // meaningful cluster. for i := 0; i < alpha; i++ { st.Ops <- store.Op{1 + <-st.Seqns, store.Nop} } canWrite <- true } else { setC(cl, "/ctl/node/"+self+"/addr", listenAddr, store.Clobber) setC(cl, "/ctl/node/"+self+"/hostname", os.Getenv("HOSTNAME"), store.Clobber) setC(cl, "/ctl/node/"+self+"/version", Version, store.Clobber) rev, err := cl.Rev() if err != nil { panic(err) } stop := make(chan bool, 1) go follow(st, cl, rev+1, stop) errs := make(chan os.Error) go func() { e, ok := <-errs if ok { panic(e) } }() doozer.Walk(cl, rev, "/", cloner{st.Ops, cl}, errs) close(errs) st.Flush() ch, err := st.Wait(store.Any, rev+1) if err == nil { <-ch } go func() { n := activate(st, self, cl) calSrv(n) advanceUntil(cl, st.Seqns, n+alpha) stop <- true canWrite <- true if buri != "" { b, err := doozer.DialUri(buri, "") if err != nil { panic(err) } setC( b, "/ctl/ns/"+clusterName+"/"+self, listenAddr, store.Missing, ) } }() } shun := make(chan string, 3) // sufficient for a cluster of 7 go member.Clean(shun, st, pr) go server.ListenAndServe(listener, canWrite, st, pr, rwsk, rosk) if rwsk == "" && rosk == "" && webListener != nil { web.Store = st web.ClusterName = clusterName go web.Serve(webListener) } go func() { for p := range out { addr, err := net.ResolveUDPAddr("udp", p.Addr) if err != nil { log.Println(err) continue } n, err := udpConn.WriteTo(p.Data, addr) if err != nil { log.Println(err) continue } if n != len(p.Data) { log.Println("packet len too long:", len(p.Data)) continue } } }() lv := liveness{ timeout: kickTimeout, ival: kickTimeout / 2, times: make(map[string]int64), self: self, shun: shun, } for { t := time.Nanoseconds() buf := make([]byte, maxUDPLen) n, addr, err := udpConn.ReadFrom(buf) if err == os.EINVAL { return } if err != nil { log.Println(err) continue } buf = buf[:n] // Update liveness time stamp for this addr lv.times[addr.String()] = t lv.check(t) in <- consensus.Packet{addr.String(), buf} } }
func setC(cl *doozer.Conn, path, body string, rev int64) { _, err := cl.Set(path, rev, []byte(body)) if err != nil { panic(err) } }
func advanceUntil(cl *doozer.Conn, ver <-chan int64, done int64) { for <-ver < done { cl.Nop() } }
func Main(clusterName, self, buri, rwsk, rosk string, cl *doozer.Conn, udpConn *net.UDPConn, listener, webListener net.Listener, pulseInterval, fillDelay, kickTimeout int64, hi int64) { listenAddr := listener.Addr().String() canWrite := make(chan bool, 1) in := make(chan consensus.Packet, 50) out := make(chan consensus.Packet, 50) st := store.New() pr := &proposer{ seqns: make(chan int64, alpha), props: make(chan *consensus.Prop), st: st, } calSrv := func(start int64) { go gc.Pulse(self, st.Seqns, pr, pulseInterval) go gc.Clean(st, hi, time.Tick(1e9)) var m consensus.Manager m.Self = self m.DefRev = start m.Alpha = alpha m.In = in m.Out = out m.Ops = st.Ops m.PSeqn = pr.seqns m.Props = pr.props m.TFill = fillDelay m.Store = st m.Ticker = time.Tick(10e6) go m.Run() } hostname, err := os.Hostname() if err != nil { hostname = "unknown" } if cl == nil { // we are the only node in a new cluster set(st, "/ctl/name", clusterName, store.Missing) set(st, "/ctl/node/"+self+"/addr", listenAddr, store.Missing) set(st, "/ctl/node/"+self+"/hostname", hostname, store.Missing) set(st, "/ctl/node/"+self+"/version", Version, store.Missing) set(st, "/ctl/cal/0", self, store.Missing) if buri == "" { set(st, "/ctl/ns/"+clusterName+"/"+self, listenAddr, store.Missing) } calSrv(<-st.Seqns) // Skip ahead alpha steps so that the registrar can provide a // meaningful cluster. for i := 0; i < alpha; i++ { st.Ops <- store.Op{1 + <-st.Seqns, store.Nop} } canWrite <- true go setReady(pr, self) } else { setC(cl, "/ctl/node/"+self+"/addr", listenAddr, store.Clobber) setC(cl, "/ctl/node/"+self+"/hostname", hostname, store.Clobber) setC(cl, "/ctl/node/"+self+"/version", Version, store.Clobber) rev, err := cl.Rev() if err != nil { panic(err) } stop := make(chan bool, 1) go follow(st, cl, rev+1, stop) errs := make(chan error) go func() { e, ok := <-errs if ok { panic(e) } }() doozer.Walk(cl, rev, "/", cloner{st.Ops, cl, rev}, errs) close(errs) st.Flush() ch, err := st.Wait(store.Any, rev+1) if err == nil { <-ch } go func() { n := activate(st, self, cl) calSrv(n) advanceUntil(cl, st.Seqns, n+alpha) stop <- true canWrite <- true go setReady(pr, self) if buri != "" { b, err := doozer.DialUri(buri, "") if err != nil { panic(err) } setC( b, "/ctl/ns/"+clusterName+"/"+self, listenAddr, store.Missing, ) } }() } shun := make(chan string, 3) // sufficient for a cluster of 7 go member.Clean(shun, st, pr) go server.ListenAndServe(listener, canWrite, st, pr, rwsk, rosk, self) if rwsk == "" && rosk == "" && webListener != nil { web.Store = st web.ClusterName = clusterName go web.Serve(webListener) } go func() { for p := range out { n, err := udpConn.WriteTo(p.Data, p.Addr) if err != nil { log.Println(err) continue } if n != len(p.Data) { log.Println("packet len too long:", len(p.Data)) continue } } }() selfAddr, ok := udpConn.LocalAddr().(*net.UDPAddr) if !ok { panic("no UDP addr") } lv := liveness{ timeout: kickTimeout, ival: kickTimeout / 2, self: selfAddr, shun: shun, } for { t := time.Now().UnixNano() buf := make([]byte, maxUDPLen) n, addr, err := udpConn.ReadFromUDP(buf) if err != nil && strings.Contains(err.Error(), "use of closed network connection") { log.Printf("<<<< EXITING >>>>") return } if err != nil { log.Println(err) continue } buf = buf[:n] lv.mark(addr, t) lv.check(t) in <- consensus.Packet{addr, buf} } }
func Main(clusterName, self, buri, secret string, cl *doozer.Conn, udpConn net.PacketConn, listener, webListener net.Listener, pulseInterval, fillDelay, kickTimeout int64) { listenAddr := listener.Addr().String() canWrite := make(chan bool, 1) in := make(chan consensus.Packet, 50) out := make(chan consensus.Packet, 50) st := store.New() pr := &proposer{ seqns: make(chan int64, alpha), props: make(chan *consensus.Prop), st: st, } calSrv := func(start int64) { go gc.Pulse(self, st.Seqns, pr, pulseInterval) go gc.Clean(st, 360000, time.Tick(1e9)) consensus.NewManager(self, start, alpha, in, out, st.Ops, pr.seqns, pr.props, fillDelay, st) } if cl == nil { // we are the only node in a new cluster set(st, "/ctl/name", clusterName, store.Missing) set(st, "/ctl/node/"+self+"/addr", listenAddr, store.Missing) set(st, "/ctl/node/"+self+"/hostname", os.Getenv("HOSTNAME"), store.Missing) set(st, "/ctl/node/"+self+"/version", Version, store.Missing) set(st, "/ctl/cal/0", self, store.Missing) calSrv(<-st.Seqns) // Skip ahead alpha steps so that the registrar can provide a // meaningful cluster. for i := 0; i < alpha; i++ { st.Ops <- store.Op{1 + <-st.Seqns, store.Nop} } canWrite <- true } else { setC(cl, "/ctl/node/"+self+"/addr", listenAddr, store.Clobber) setC(cl, "/ctl/node/"+self+"/hostname", os.Getenv("HOSTNAME"), store.Clobber) setC(cl, "/ctl/node/"+self+"/version", Version, store.Clobber) rev, err := cl.Rev() if err != nil { panic(err) } stop := make(chan bool, 1) go follow(st, cl, rev+1, stop) info, err := cl.Walk("/**", rev, 0, -1) if err != nil { panic(err) } for _, ev := range info { // store.Clobber is okay here because the event // has already passed through another store mut := store.MustEncodeSet(ev.Path, string(ev.Body), store.Clobber) st.Ops <- store.Op{ev.Rev, mut} } st.Flush() ch, err := st.Wait(store.Any, rev+1) if err == nil { <-ch } go func() { n := activate(st, self, cl) calSrv(n) advanceUntil(cl, st.Seqns, n+alpha) stop <- true canWrite <- true if buri != "" { b, err := doozer.DialUri(buri) if err != nil { panic(err) } setC( b, "/ctl/ns/"+clusterName+"/"+self, listenAddr, store.Missing, ) } }() } shun := make(chan string, 3) // sufficient for a cluster of 7 go member.Clean(shun, st, pr) go server.ListenAndServe(listener, canWrite, st, pr, secret) if secret == "" && webListener != nil { web.Store = st web.ClusterName = clusterName go web.Serve(webListener) } go func() { for p := range out { addr, err := net.ResolveUDPAddr(p.Addr) if err != nil { log.Println(err) continue } n, err := udpConn.WriteTo(p.Data, addr) if err != nil { log.Println(err) continue } if n != len(p.Data) { log.Println("packet len too long:", len(p.Data)) continue } } }() lv := liveness{ timeout: kickTimeout, ival: kickTimeout / 2, times: make(map[string]int64), self: self, shun: shun, } for { t := time.Nanoseconds() buf := make([]byte, maxUDPLen) n, addr, err := udpConn.ReadFrom(buf) if err == os.EINVAL { return } if err != nil { log.Println(err) continue } buf = buf[:n] // Update liveness time stamp for this addr lv.times[addr.String()] = t lv.check(t) in <- consensus.Packet{addr.String(), buf} } }