// InitWatch initialize watch module func InitWatch() error { // Initialize zookeeper connection Log.Info("Initializing zookeeper,zookeeper.Dial(\"%s\", \"%dms\")", Conf.ZKAddr, Conf.ZKTimeout/1000000) zkTmp, session, err := zookeeper.Dial(Conf.ZKAddr, Conf.ZKTimeout) if err != nil { return err } zk = zkTmp // Zookeeper client will reconnect automatically for { event := <-session if event.State < zookeeper.STATE_CONNECTING { return errors.New(fmt.Sprintf("connect zookeeper fail, event:\"%v\"", event)) } else if event.State == zookeeper.STATE_CONNECTING { Log.Warn("Zookeeper connecting!") time.Sleep(time.Second) continue } else { break } } Log.Info("Initialize zookeeper OK") // Zookeeper create Public message subnode if err := zkCreate(); err != nil { return err } // Init public message mid-creater PubMID = timeID.NewTimeID() return nil }
func MakeZookeeperClient(server_host string, debug bool) (*ZookeeperClient, error) { client := &ZookeeperClient{ debug: debug, } if server_host == "" { client.server_host = "localhost:2181" } else { client.server_host = server_host } conn, session, err := zookeeper.Dial(client.server_host, 5*time.Second) if err != nil { fmt.Printf("Couldn't connect: %s\n", err) return nil, err } client.client = conn client.session = session // Wait for connection. event := <-client.session fmt.Printf("Got event\n") if event.State != zookeeper.STATE_CONNECTED { fmt.Printf("Error with connect, %s!\n", event.State) return nil, errors.New("Error with connect") } return client, nil }
func DialZkTimeout(zkAddr string, baseTimeout time.Duration, connectTimeout time.Duration) (*ZkConn, <-chan zookeeper.Event, error) { resolvedZkAddr, err := resolveZkAddr(zkAddr) if err != nil { return nil, nil, err } sem.Acquire() defer sem.Release() zconn, session, err := zookeeper.Dial(resolvedZkAddr, baseTimeout) if err == nil { // Wait for connection, with a timeout timer := time.NewTimer(connectTimeout) select { case <-timer.C: err = fmt.Errorf("zk connect timed out") case event := <-session: if event.State != zookeeper.STATE_CONNECTED { err = fmt.Errorf("zk connect failed: %v", event.State) } } if err == nil { return &ZkConn{conn: zconn}, session, nil } else { zconn.Close() } } return nil, nil, err }
func Initzk() { ipport := utils.Cfg.GetString("zkServer") zk1, session, err := zookeeper.Dial(ipport, 5e9) zk = zk1 if err != nil { // log.Fatalf("Can't connect: %v", err) fmt.Println(err) } // defer zk.Close() // Wait for connection. event := <-session if event.State != zookeeper.STATE_CONNECTED { // log.Fatalf("Can't connect: %v", event) fmt.Println(event.String()) } fmt.Println(event) _, err = zk.Create(utils.Cfg.GetString("sendslavers"), "0", 0, zookeeper.WorldACL(zookeeper.PERM_ALL)) //zookeeper.EPHEMERAL|zookeeper.SEQUENCE if err != nil { // log.Fatalf("Can't create counter: %v", err) fmt.Println("Can't create counter: %v", err) } else { fmt.Println("Counter created!") } watchslavers() }
func Dial(serverAddrs []string, recvTimeout time.Duration) (*GlobalConn, <-chan zookeeper.Event, error) { zconns := make([]*zookeeper.Conn, len(serverAddrs)) zchans := make([]<-chan zookeeper.Event, len(serverAddrs)) for i, addr := range serverAddrs { conn, eventChan, err := zookeeper.Dial(addr, recvTimeout) if err != nil { // teardown for j := 0; j < i; j++ { zconns[j].Close() } return nil, nil, err } zconns[i] = conn zchans[i] = eventChan } eventChan := make(chan zookeeper.Event, 1) go func() { var e zookeeper.Event for _, c := range zchans { e = <-c } eventChan <- e close(eventChan) }() return &GlobalConn{serverAddrs, zconns, DEFAULT_MAX_RETRIES}, eventChan, nil }
func (zkd *Zkd) init(preserveData bool) error { log.Infof("zkd.Init") for _, path := range zkd.config.DirectoryList() { if err := os.MkdirAll(path, 0775); err != nil { log.Errorf(err.Error()) return err } // FIXME(msolomon) validate permissions? } configData, err := zkd.makeCfg() if err == nil { err = ioutil.WriteFile(zkd.config.ConfigFile(), []byte(configData), 0664) } if err != nil { log.Errorf("failed creating %v: %v", zkd.config.ConfigFile(), err) return err } err = zkd.config.WriteMyid() if err != nil { log.Errorf("failed creating %v: %v", zkd.config.MyidFile(), err) return err } if err = zkd.Start(); err != nil { log.Errorf("failed starting, check %v", zkd.config.LogDir()) return err } zkAddr := fmt.Sprintf("localhost:%v", zkd.config.ClientPort) zk, session, err := zookeeper.Dial(zkAddr, StartWaitTime*time.Second) if err != nil { return err } event := <-session if event.State != zookeeper.STATE_CONNECTED { return err } defer zk.Close() if !preserveData { _, err = zk.Create("/zk", "", 0, zookeeper.WorldACL(zookeeper.PERM_ALL)) if err != nil && !zookeeper.IsError(err, zookeeper.ZNODEEXISTS) { return err } if zkd.config.Global { _, err = zk.Create("/zk/global", "", 0, zookeeper.WorldACL(zookeeper.PERM_ALL)) if err != nil && !zookeeper.IsError(err, zookeeper.ZNODEEXISTS) { return err } } } return nil }
// Join joins the cluster *c is configured for. func (c *Cluster) Join() error { if c.config.NodeId == "" { return errors.New("config requires a NodeId") } if len(c.config.Servers) == 0 { return errors.New("config requires Servers") } servers := strings.Join(c.config.Servers, ",") // log.Println("Join...") switch atomic.LoadInt32(&c.state) { case NewState /*, ShutdownState */ : zk, session, err := zookeeper.Dial(servers, c.config.Timeout) if err != nil { return err } ev := <-session if ev.State != zookeeper.STATE_CONNECTED { return errors.New("Failed to connect to Zookeeper") } log.Printf("Node %s connected to ZooKeeper", c.config.NodeId) c.zk, c.zkSession = zkutil.NewZooKeeper(zk), session c.createPaths() c.joinCluster() c.listener.OnJoin(c.zk) c.setupWatchers() if !atomic.CompareAndSwapInt32(&c.state, NewState, StartedState) { log.Fatalf("Could not move from NewState to StartedState: State is not NewState") } case StartedState, DrainingState: return errors.New("Tried to join with state StartedState or DrainingState") case ShutdownState: // TODO default: panic("Unknown state") } c.balancer.Init(c) go func() { c.rebalanceKill = make(chan byte) for { select { case <-c.rebalanceKill: return case <-time.After(c.config.RebalanceInterval): c.rebalance() } } }() c.getTasks() return nil }
func (c *Ctl) connectToZK() error { servers := strings.Join(c.cfg.Servers, ",") zk, zkEv, err := zookeeper.Dial(servers, c.cfg.Timeout) if err != nil { return err } ev := <-zkEv if ev.State != zookeeper.STATE_CONNECTED { errors.New("Failed to connect to Zookeeper servers: " + servers) } c.zk = zk return nil }
func NewLockServer(endpoints string) (*LockServer, error) { zk, session, err := zookeeper.Dial(endpoints, 5e9) if err != nil { log.Println("Can't connect: %v", err) return nil, err } select { case event := <-session: log.Println(event) case <-time.After(time.Second * 5): return nil, errors.New("Connection timeout") } return &LockServer{zk, session, make(chan bool)}, nil }
// Dial a ZK server and waits for connection event. Returns a ZkConn // encapsulating the zookeeper.Conn, and the zookeeper session event // channel to monitor the connection // // The value for baseTimeout is used as a session timeout as well, and // will be used to negotiate a 'good' value with the server. From // reading the zookeeper source code, it has to be between 6 and 60 // seconds (2x and 20x the tickTime by default, with default tick time // being 3 seconds). min session time, max session time and ticktime // can all be overwritten on the zookeeper server side, so these // numbers may vary. // // Then this baseTimeout is used to compute other related timeouts: // - connect timeout is 1/3 of baseTimeout // - recv timeout is 2/3 of baseTimeout minus a ping time // - send timeout is 1/3 of baseTimeout // - we try to send a ping a least every baseTimeout / 3 // // Note the baseTimeout has *nothing* to do with the time between we // call Dial and the maximum time before we receive the event on the // session. The library will actually try to re-connect in the background // (after each timeout), and may *never* send an event if the TCP connections // always fail. Use DialZkTimeout to enforce a timeout for the initial connect. func DialZk(zkAddr string, baseTimeout time.Duration) (*ZkConn, <-chan zookeeper.Event, error) { sem.Acquire() defer sem.Release() zconn, session, err := zookeeper.Dial(zkAddr, baseTimeout) if err == nil { // Wait for connection, possibly forever event := <-session if event.State != zookeeper.STATE_CONNECTED { err = fmt.Errorf("zk connect failed: %v", event.State) } if err == nil { return &ZkConn{zconn}, session, nil } else { zconn.Close() } } return nil, nil, err }
// This error will be delivered via C errno, since ZK unfortunately // only provides the handler back from zookeeper_init(). func (s *S) TestInitErrorThroughErrno(c *C) { conn, watch, err := zk.Dial("bad-domain-without-port", 5e9) if conn != nil { conn.Close() } if watch != nil { go func() { for { _, ok := <-watch if !ok { break } } }() } c.Assert(conn, IsNil) c.Assert(watch, IsNil) c.Assert(err, ErrorMatches, "zookeeper: dial: invalid argument") }
func main() { zk, session, err := zookeeper.Dial("localhost:2181", 5e9) if err != nil { log.Fatalf("Can't connect: %v", err) } defer zk.Close() // Wait for connection. event := <-session if event.State != zookeeper.STATE_CONNECTED { log.Fatalf("Can't connect: %v", event) } _, err = zk.Create("/counter", "0", 0, zookeeper.WorldACL(zookeeper.PERM_ALL)) if err != nil { log.Fatalf("Can't create counter: %v", err) } else { fmt.Println("Counter created!") } }
// newZookeeper dial zookeeper cluster. func newZookeeper() (*ZK, error) { zk, session, err := zookeeper.Dial(Conf.ZookeeperAddr, Conf.ZookeeperTimeout) if err != nil { Log.Error("zookeeper.Dial(\"%s\", %d) error(%v)", Conf.ZookeeperAddr, Conf.ZookeeperTimeout, err) return nil, err } go func() { for { event := <-session if event.State < zookeeper.STATE_CONNECTING { Log.Error("can't connect zookeeper, event: %v", event) } else if event.State == zookeeper.STATE_CONNECTING { Log.Warn("retry connect zookeeper, event: %v", event) } else { Log.Debug("succeed connect zookeeper, event: %v", event) } } }() return &ZK{conn: zk}, nil }
func NewZookeeper(addr string, timeout int) (*ZK, error) { zk, session, err := zookeeper.Dial(addr, time.Duration(timeout)*1e9) if err != nil { return nil, err } go func() { for { event := <-session if event.State < zookeeper.STATE_CONNECTING { Log.Error("can't connect zookeeper, event: %v", event) } else if event.State == zookeeper.STATE_CONNECTING { Log.Warn("retry connect zookeeper, event: %v", event) } else { Log.Debug("succeed connect zookeeper, event: %v", event) } } }() return &ZK{conn: zk}, nil }
func (s *S) TestRecvTimeoutInitParameter(c *C) { conn, watch, err := zk.Dial(s.zkAddr, 0) c.Assert(err, IsNil) defer conn.Close() select { case <-watch: c.Fatal("Watch fired") default: } for i := 0; i != 1000; i++ { _, _, err := conn.Get("/zookeeper") if err != nil { c.Check(zk.IsError(err, zk.ZOPERATIONTIMEOUT), Equals, true, Commentf("%v", err)) c.SucceedNow() } } c.Fatal("Operation didn't timeout") }
func (s *S) init(c *C) (*zk.Conn, chan zk.Event) { c.Logf("init dialling %q", s.zkAddr) conn, watch, err := zk.Dial(s.zkAddr, 5e9) c.Assert(err, IsNil) s.handles = append(s.handles, conn) bufferedWatch := make(chan zk.Event, 256) select { case e, ok := <-watch: c.Assert(ok, Equals, true) c.Assert(e.Type, Equals, zk.EVENT_SESSION) c.Assert(e.State, Equals, zk.STATE_CONNECTED) bufferedWatch <- e case <-time.After(5e9): c.Fatalf("timeout dialling zookeeper addr %v", s.zkAddr) } s.liveWatches += 1 go func() { loop: for { select { case event, ok := <-watch: if !ok { close(bufferedWatch) break loop } select { case bufferedWatch <- event: default: panic("Too many events in buffered watch!") } } } s.deadWatches <- true }() return conn, bufferedWatch }
// InitWatch initialize watch module func InitWatch() error { // Initialize zookeeper connection zkTmp, session, err := zookeeper.Dial(Conf.Zookeeper.Addr, time.Duration(Conf.Zookeeper.Timeout)*1e9) if err != nil { return err } zk = zkTmp //defer zk.Close() for { event := <-session if event.State < zookeeper.STATE_CONNECTING { return errors.New(fmt.Sprintf("connect zookeeper fail, event: %v", event)) } else if event.State == zookeeper.STATE_CONNECTING { time.Sleep(time.Second) continue } else { break } } return nil }
func (zkd *Zkd) CheckProcess() error { pidFile := zkd.config.PidFile() data, err := ioutil.ReadFile(pidFile) if err != nil { return err } pid, err := strconv.Atoi(string(data)) // found a pid - if the process is burned, fast-fail // otherwise, try to connect and fail slowly if err == nil { _, err := os.FindProcess(pid) if err != nil { return err } } zkAddr := fmt.Sprintf("localhost:%v", zkd.config.ClientPort) zk, session, err := zookeeper.Dial(zkAddr, StartWaitTime*time.Second) if err != nil { return err } defer zk.Close() timer := time.NewTimer(StartWaitTime * 1e9) defer timer.Stop() select { case event := <-session: if event.State != zookeeper.STATE_CONNECTED { return err } case <-timer.C: return errors.New("zk deadline exceeded connecting to " + zkAddr) } _, _, err = zk.Get("/zk") return err }
func (s *S) TestConcurrentClose(c *C) { // make sure the server is ready to receive connections. s.init(c) // Close should wait until all outstanding requests have // completed before returning. The idea of this test is that // any request that requests or changes a zookeeper node must // make at least one round trip to the server, so we interpose a // proxy between the client and the server which can stop all // incoming traffic on demand, thus blocking the request until // we want it to unblock. // // We assume that all requests take less than 0.1s to complete, // thus when we wait below, neither of the above goroutines // should complete within the allotted time (the request because // it's waiting for a reply from the server and the close // because it's waiting for the request to complete). If the // locking doesn't work, the Close will return early. If the // proxy blocking doesn't work, the request will return early. // // When we reenable incoming messages from the server, both // goroutines should complete. We can't tell which completes // first, but the fact that the close blocked is sufficient to // tell that the locking is working correctly. for i, f := range requestFuncs { c.Logf("iter %d", i) p := newProxy(c, s.zkAddr) conn, watch, err := zk.Dial(p.addr(), 5e9) c.Assert(err, IsNil) c.Assert((<-watch).Ok(), Equals, true) // sanity check that the connection is actually // up and running. _, err = conn.Exists("/nothing") c.Assert(err, IsNil) p.stopIncoming() reqDone := make(chan bool) closeDone := make(chan bool) go func() { f(conn, "/closetest") reqDone <- true }() go func() { // sleep for long enough for the request to be initiated and the read lock taken. time.Sleep(0.05e9) conn.Close() closeDone <- true }() select { case <-reqDone: c.Fatalf("request %d finished early", i) case <-closeDone: c.Fatalf("request %d close finished early", i) case <-time.After(0.1e9): } p.startIncoming() for reqDone != nil || closeDone != nil { select { case <-reqDone: reqDone = nil case <-closeDone: closeDone = nil case <-time.After(0.4e9): c.Fatalf("request %d timed out waiting for req (%p) and close(%p)", i, reqDone, closeDone) } } p.close() err = f(conn, "/closetest") c.Check(zk.IsError(err, zk.ZCLOSING), Equals, true, Commentf("%v", err)) } }