Ejemplo n.º 1
0
// InitWatch initialize watch module
func InitWatch() error {
	// Initialize zookeeper connection
	Log.Info("Initializing zookeeper,zookeeper.Dial(\"%s\", \"%dms\")", Conf.ZKAddr, Conf.ZKTimeout/1000000)
	zkTmp, session, err := zookeeper.Dial(Conf.ZKAddr, Conf.ZKTimeout)
	if err != nil {
		return err
	}
	zk = zkTmp

	// Zookeeper client will reconnect automatically
	for {
		event := <-session
		if event.State < zookeeper.STATE_CONNECTING {
			return errors.New(fmt.Sprintf("connect zookeeper fail, event:\"%v\"", event))
		} else if event.State == zookeeper.STATE_CONNECTING {
			Log.Warn("Zookeeper connecting!")
			time.Sleep(time.Second)
			continue
		} else {
			break
		}
	}

	Log.Info("Initialize zookeeper OK")

	// Zookeeper create Public message subnode
	if err := zkCreate(); err != nil {
		return err
	}

	// Init public message mid-creater
	PubMID = timeID.NewTimeID()

	return nil
}
Ejemplo n.º 2
0
func MakeZookeeperClient(server_host string, debug bool) (*ZookeeperClient, error) {
	client := &ZookeeperClient{
		debug: debug,
	}
	if server_host == "" {
		client.server_host = "localhost:2181"
	} else {
		client.server_host = server_host
	}

	conn, session, err := zookeeper.Dial(client.server_host, 5*time.Second)
	if err != nil {
		fmt.Printf("Couldn't connect: %s\n", err)
		return nil, err
	}

	client.client = conn
	client.session = session

	// Wait for connection.
	event := <-client.session
	fmt.Printf("Got event\n")
	if event.State != zookeeper.STATE_CONNECTED {
		fmt.Printf("Error with connect, %s!\n", event.State)
		return nil, errors.New("Error with connect")
	}

	return client, nil
}
Ejemplo n.º 3
0
func DialZkTimeout(zkAddr string, baseTimeout time.Duration, connectTimeout time.Duration) (*ZkConn, <-chan zookeeper.Event, error) {
	resolvedZkAddr, err := resolveZkAddr(zkAddr)
	if err != nil {
		return nil, nil, err
	}

	sem.Acquire()
	defer sem.Release()
	zconn, session, err := zookeeper.Dial(resolvedZkAddr, baseTimeout)
	if err == nil {
		// Wait for connection, with a timeout
		timer := time.NewTimer(connectTimeout)
		select {
		case <-timer.C:
			err = fmt.Errorf("zk connect timed out")
		case event := <-session:
			if event.State != zookeeper.STATE_CONNECTED {
				err = fmt.Errorf("zk connect failed: %v", event.State)
			}
		}

		if err == nil {
			return &ZkConn{conn: zconn}, session, nil
		} else {
			zconn.Close()
		}
	}
	return nil, nil, err
}
Ejemplo n.º 4
0
func Initzk() {
	ipport := utils.Cfg.GetString("zkServer")
	zk1, session, err := zookeeper.Dial(ipport, 5e9)
	zk = zk1
	if err != nil {
		//		log.Fatalf("Can't connect: %v", err)
		fmt.Println(err)
	}
	//	defer zk.Close()

	// Wait for connection.
	event := <-session
	if event.State != zookeeper.STATE_CONNECTED {
		//		log.Fatalf("Can't connect: %v", event)
		fmt.Println(event.String())
	}
	fmt.Println(event)

	_, err = zk.Create(utils.Cfg.GetString("sendslavers"), "0", 0, zookeeper.WorldACL(zookeeper.PERM_ALL)) //zookeeper.EPHEMERAL|zookeeper.SEQUENCE

	if err != nil {
		//		log.Fatalf("Can't create counter: %v", err)
		fmt.Println("Can't create counter: %v", err)
	} else {
		fmt.Println("Counter created!")
	}
	watchslavers()
}
Ejemplo n.º 5
0
func Dial(serverAddrs []string, recvTimeout time.Duration) (*GlobalConn, <-chan zookeeper.Event, error) {
	zconns := make([]*zookeeper.Conn, len(serverAddrs))
	zchans := make([]<-chan zookeeper.Event, len(serverAddrs))
	for i, addr := range serverAddrs {
		conn, eventChan, err := zookeeper.Dial(addr, recvTimeout)
		if err != nil {
			// teardown
			for j := 0; j < i; j++ {
				zconns[j].Close()
			}
			return nil, nil, err
		}
		zconns[i] = conn
		zchans[i] = eventChan
	}

	eventChan := make(chan zookeeper.Event, 1)
	go func() {
		var e zookeeper.Event
		for _, c := range zchans {
			e = <-c
		}
		eventChan <- e
		close(eventChan)
	}()
	return &GlobalConn{serverAddrs, zconns, DEFAULT_MAX_RETRIES}, eventChan, nil
}
Ejemplo n.º 6
0
func (zkd *Zkd) init(preserveData bool) error {
	log.Infof("zkd.Init")
	for _, path := range zkd.config.DirectoryList() {
		if err := os.MkdirAll(path, 0775); err != nil {
			log.Errorf(err.Error())
			return err
		}
		// FIXME(msolomon) validate permissions?
	}

	configData, err := zkd.makeCfg()
	if err == nil {
		err = ioutil.WriteFile(zkd.config.ConfigFile(), []byte(configData), 0664)
	}
	if err != nil {
		log.Errorf("failed creating %v: %v", zkd.config.ConfigFile(), err)
		return err
	}

	err = zkd.config.WriteMyid()
	if err != nil {
		log.Errorf("failed creating %v: %v", zkd.config.MyidFile(), err)
		return err
	}

	if err = zkd.Start(); err != nil {
		log.Errorf("failed starting, check %v", zkd.config.LogDir())
		return err
	}

	zkAddr := fmt.Sprintf("localhost:%v", zkd.config.ClientPort)
	zk, session, err := zookeeper.Dial(zkAddr, StartWaitTime*time.Second)
	if err != nil {
		return err
	}
	event := <-session
	if event.State != zookeeper.STATE_CONNECTED {
		return err
	}
	defer zk.Close()

	if !preserveData {
		_, err = zk.Create("/zk", "", 0, zookeeper.WorldACL(zookeeper.PERM_ALL))
		if err != nil && !zookeeper.IsError(err, zookeeper.ZNODEEXISTS) {
			return err
		}

		if zkd.config.Global {
			_, err = zk.Create("/zk/global", "", 0, zookeeper.WorldACL(zookeeper.PERM_ALL))
			if err != nil && !zookeeper.IsError(err, zookeeper.ZNODEEXISTS) {
				return err
			}
		}
	}

	return nil
}
Ejemplo n.º 7
0
// Join joins the cluster *c is configured for.
func (c *Cluster) Join() error {
	if c.config.NodeId == "" {
		return errors.New("config requires a NodeId")
	}
	if len(c.config.Servers) == 0 {
		return errors.New("config requires Servers")
	}

	servers := strings.Join(c.config.Servers, ",")

	// log.Println("Join...")
	switch atomic.LoadInt32(&c.state) {
	case NewState /*, ShutdownState */ :
		zk, session, err := zookeeper.Dial(servers, c.config.Timeout)
		if err != nil {
			return err
		}
		ev := <-session
		if ev.State != zookeeper.STATE_CONNECTED {
			return errors.New("Failed to connect to Zookeeper")
		}
		log.Printf("Node %s connected to ZooKeeper", c.config.NodeId)
		c.zk, c.zkSession = zkutil.NewZooKeeper(zk), session
		c.createPaths()
		c.joinCluster()
		c.listener.OnJoin(c.zk)
		c.setupWatchers()
		if !atomic.CompareAndSwapInt32(&c.state, NewState, StartedState) {
			log.Fatalf("Could not move from NewState to StartedState: State is not NewState")
		}
	case StartedState, DrainingState:
		return errors.New("Tried to join with state StartedState or DrainingState")
	case ShutdownState:
		// TODO
	default:
		panic("Unknown state")
	}

	c.balancer.Init(c)
	go func() {
		c.rebalanceKill = make(chan byte)
		for {
			select {
			case <-c.rebalanceKill:
				return
			case <-time.After(c.config.RebalanceInterval):
				c.rebalance()
			}
		}
	}()
	c.getTasks()
	return nil
}
Ejemplo n.º 8
0
func (c *Ctl) connectToZK() error {
	servers := strings.Join(c.cfg.Servers, ",")
	zk, zkEv, err := zookeeper.Dial(servers, c.cfg.Timeout)
	if err != nil {
		return err
	}
	ev := <-zkEv
	if ev.State != zookeeper.STATE_CONNECTED {
		errors.New("Failed to connect to Zookeeper servers: " + servers)
	}
	c.zk = zk
	return nil
}
Ejemplo n.º 9
0
func NewLockServer(endpoints string) (*LockServer, error) {
	zk, session, err := zookeeper.Dial(endpoints, 5e9)
	if err != nil {
		log.Println("Can't connect: %v", err)
		return nil, err
	}

	select {
	case event := <-session:
		log.Println(event)
	case <-time.After(time.Second * 5):
		return nil, errors.New("Connection timeout")
	}
	return &LockServer{zk, session, make(chan bool)}, nil
}
Ejemplo n.º 10
0
// Dial a ZK server and waits for connection event. Returns a ZkConn
// encapsulating the zookeeper.Conn, and the zookeeper session event
// channel to monitor the connection
//
// The value for baseTimeout is used as a session timeout as well, and
// will be used to negotiate a 'good' value with the server. From
// reading the zookeeper source code, it has to be between 6 and 60
// seconds (2x and 20x the tickTime by default, with default tick time
// being 3 seconds).  min session time, max session time and ticktime
// can all be overwritten on the zookeeper server side, so these
// numbers may vary.
//
// Then this baseTimeout is used to compute other related timeouts:
// - connect timeout is 1/3 of baseTimeout
// - recv timeout is 2/3 of baseTimeout minus a ping time
// - send timeout is 1/3 of baseTimeout
// - we try to send a ping a least every baseTimeout / 3
//
// Note the baseTimeout has *nothing* to do with the time between we
// call Dial and the maximum time before we receive the event on the
// session. The library will actually try to re-connect in the background
// (after each timeout), and may *never* send an event if the TCP connections
// always fail. Use DialZkTimeout to enforce a timeout for the initial connect.
func DialZk(zkAddr string, baseTimeout time.Duration) (*ZkConn, <-chan zookeeper.Event, error) {
	sem.Acquire()
	defer sem.Release()
	zconn, session, err := zookeeper.Dial(zkAddr, baseTimeout)
	if err == nil {
		// Wait for connection, possibly forever
		event := <-session
		if event.State != zookeeper.STATE_CONNECTED {
			err = fmt.Errorf("zk connect failed: %v", event.State)
		}
		if err == nil {
			return &ZkConn{zconn}, session, nil
		} else {
			zconn.Close()
		}
	}
	return nil, nil, err
}
Ejemplo n.º 11
0
// This error will be delivered via C errno, since ZK unfortunately
// only provides the handler back from zookeeper_init().
func (s *S) TestInitErrorThroughErrno(c *C) {
	conn, watch, err := zk.Dial("bad-domain-without-port", 5e9)
	if conn != nil {
		conn.Close()
	}
	if watch != nil {
		go func() {
			for {
				_, ok := <-watch
				if !ok {
					break
				}
			}
		}()
	}
	c.Assert(conn, IsNil)
	c.Assert(watch, IsNil)
	c.Assert(err, ErrorMatches, "zookeeper: dial: invalid argument")
}
Ejemplo n.º 12
0
func main() {
	zk, session, err := zookeeper.Dial("localhost:2181", 5e9)
	if err != nil {
		log.Fatalf("Can't connect: %v", err)
	}
	defer zk.Close()

	// Wait for connection.
	event := <-session
	if event.State != zookeeper.STATE_CONNECTED {
		log.Fatalf("Can't connect: %v", event)
	}

	_, err = zk.Create("/counter", "0", 0, zookeeper.WorldACL(zookeeper.PERM_ALL))
	if err != nil {
		log.Fatalf("Can't create counter: %v", err)
	} else {
		fmt.Println("Counter created!")
	}
}
Ejemplo n.º 13
0
// newZookeeper dial zookeeper cluster.
func newZookeeper() (*ZK, error) {
	zk, session, err := zookeeper.Dial(Conf.ZookeeperAddr, Conf.ZookeeperTimeout)
	if err != nil {
		Log.Error("zookeeper.Dial(\"%s\", %d) error(%v)", Conf.ZookeeperAddr, Conf.ZookeeperTimeout, err)
		return nil, err
	}
	go func() {
		for {
			event := <-session
			if event.State < zookeeper.STATE_CONNECTING {
				Log.Error("can't connect zookeeper, event: %v", event)
			} else if event.State == zookeeper.STATE_CONNECTING {
				Log.Warn("retry connect zookeeper, event: %v", event)
			} else {
				Log.Debug("succeed connect zookeeper, event: %v", event)
			}
		}
	}()
	return &ZK{conn: zk}, nil
}
Ejemplo n.º 14
0
func NewZookeeper(addr string, timeout int) (*ZK, error) {
	zk, session, err := zookeeper.Dial(addr, time.Duration(timeout)*1e9)
	if err != nil {
		return nil, err
	}

	go func() {
		for {
			event := <-session
			if event.State < zookeeper.STATE_CONNECTING {
				Log.Error("can't connect zookeeper, event: %v", event)
			} else if event.State == zookeeper.STATE_CONNECTING {
				Log.Warn("retry connect zookeeper, event: %v", event)
			} else {
				Log.Debug("succeed connect zookeeper, event: %v", event)
			}
		}
	}()

	return &ZK{conn: zk}, nil
}
Ejemplo n.º 15
0
func (s *S) TestRecvTimeoutInitParameter(c *C) {
	conn, watch, err := zk.Dial(s.zkAddr, 0)
	c.Assert(err, IsNil)
	defer conn.Close()

	select {
	case <-watch:
		c.Fatal("Watch fired")
	default:
	}

	for i := 0; i != 1000; i++ {
		_, _, err := conn.Get("/zookeeper")
		if err != nil {
			c.Check(zk.IsError(err, zk.ZOPERATIONTIMEOUT), Equals, true, Commentf("%v", err))
			c.SucceedNow()
		}
	}

	c.Fatal("Operation didn't timeout")
}
Ejemplo n.º 16
0
func (s *S) init(c *C) (*zk.Conn, chan zk.Event) {
	c.Logf("init dialling %q", s.zkAddr)
	conn, watch, err := zk.Dial(s.zkAddr, 5e9)
	c.Assert(err, IsNil)
	s.handles = append(s.handles, conn)
	bufferedWatch := make(chan zk.Event, 256)

	select {
	case e, ok := <-watch:
		c.Assert(ok, Equals, true)
		c.Assert(e.Type, Equals, zk.EVENT_SESSION)
		c.Assert(e.State, Equals, zk.STATE_CONNECTED)
		bufferedWatch <- e
	case <-time.After(5e9):
		c.Fatalf("timeout dialling zookeeper addr %v", s.zkAddr)
	}

	s.liveWatches += 1
	go func() {
	loop:
		for {
			select {
			case event, ok := <-watch:
				if !ok {
					close(bufferedWatch)
					break loop
				}
				select {
				case bufferedWatch <- event:
				default:
					panic("Too many events in buffered watch!")
				}
			}
		}
		s.deadWatches <- true
	}()

	return conn, bufferedWatch
}
Ejemplo n.º 17
0
// InitWatch initialize watch module
func InitWatch() error {
	// Initialize zookeeper connection
	zkTmp, session, err := zookeeper.Dial(Conf.Zookeeper.Addr, time.Duration(Conf.Zookeeper.Timeout)*1e9)
	if err != nil {
		return err
	}
	zk = zkTmp
	//defer zk.Close()

	for {
		event := <-session
		if event.State < zookeeper.STATE_CONNECTING {
			return errors.New(fmt.Sprintf("connect zookeeper fail, event: %v", event))
		} else if event.State == zookeeper.STATE_CONNECTING {
			time.Sleep(time.Second)
			continue
		} else {
			break
		}
	}

	return nil
}
Ejemplo n.º 18
0
func (zkd *Zkd) CheckProcess() error {
	pidFile := zkd.config.PidFile()
	data, err := ioutil.ReadFile(pidFile)
	if err != nil {
		return err
	}

	pid, err := strconv.Atoi(string(data))
	// found a pid - if the process is burned, fast-fail
	// otherwise, try to connect and fail slowly
	if err == nil {
		_, err := os.FindProcess(pid)
		if err != nil {
			return err
		}
	}

	zkAddr := fmt.Sprintf("localhost:%v", zkd.config.ClientPort)
	zk, session, err := zookeeper.Dial(zkAddr, StartWaitTime*time.Second)
	if err != nil {
		return err
	}
	defer zk.Close()
	timer := time.NewTimer(StartWaitTime * 1e9)
	defer timer.Stop()
	select {
	case event := <-session:
		if event.State != zookeeper.STATE_CONNECTED {
			return err
		}
	case <-timer.C:
		return errors.New("zk deadline exceeded connecting to " + zkAddr)
	}
	_, _, err = zk.Get("/zk")
	return err
}
Ejemplo n.º 19
0
func (s *S) TestConcurrentClose(c *C) {
	// make sure the server is ready to receive connections.
	s.init(c)

	// Close should wait until all outstanding requests have
	// completed before returning.  The idea of this test is that
	// any request that requests or changes a zookeeper node must
	// make at least one round trip to the server, so we interpose a
	// proxy between the client and the server which can stop all
	// incoming traffic on demand, thus blocking the request until
	// we want it to unblock.
	//
	// We assume that all requests take less than 0.1s to complete,
	// thus when we wait below, neither of the above goroutines
	// should complete within the allotted time (the request because
	// it's waiting for a reply from the server and the close
	// because it's waiting for the request to complete).  If the
	// locking doesn't work, the Close will return early.  If the
	// proxy blocking doesn't work, the request will return early.
	//
	// When we reenable incoming messages from the server, both
	// goroutines should complete.  We can't tell which completes
	// first, but the fact that the close blocked is sufficient to
	// tell that the locking is working correctly.
	for i, f := range requestFuncs {
		c.Logf("iter %d", i)
		p := newProxy(c, s.zkAddr)
		conn, watch, err := zk.Dial(p.addr(), 5e9)
		c.Assert(err, IsNil)
		c.Assert((<-watch).Ok(), Equals, true)

		// sanity check that the connection is actually
		// up and running.
		_, err = conn.Exists("/nothing")
		c.Assert(err, IsNil)

		p.stopIncoming()
		reqDone := make(chan bool)
		closeDone := make(chan bool)
		go func() {
			f(conn, "/closetest")
			reqDone <- true
		}()
		go func() {
			// sleep for long enough for the request to be initiated and the read lock taken.
			time.Sleep(0.05e9)
			conn.Close()
			closeDone <- true
		}()
		select {
		case <-reqDone:
			c.Fatalf("request %d finished early", i)
		case <-closeDone:
			c.Fatalf("request %d close finished early", i)
		case <-time.After(0.1e9):
		}
		p.startIncoming()
		for reqDone != nil || closeDone != nil {
			select {
			case <-reqDone:
				reqDone = nil
			case <-closeDone:
				closeDone = nil
			case <-time.After(0.4e9):
				c.Fatalf("request %d timed out waiting for req (%p) and close(%p)", i, reqDone, closeDone)
			}
		}
		p.close()
		err = f(conn, "/closetest")
		c.Check(zk.IsError(err, zk.ZCLOSING), Equals, true, Commentf("%v", err))
	}
}