Example #1
0
func (top *Topology) doWatch(evtch <-chan topo.Event, evtbus chan interface{}) {
	e := <-evtch
	log.Warningf("topo event %+v", e)

	switch e.Type {
	//case topo.EventNodeCreated:
	//case topo.EventNodeDataChanged:
	case topo.EventNodeChildrenChanged: //only care children changed
		// TODO: get changed node and decode event
	default:
		log.Warningf("%+v", e)
	}

	evtbus <- e
}
Example #2
0
func (c *Session) handleComStmtPrepare(sqlstmt string) error {
	stmt, err := parser.Parse(sqlstmt)
	if err != nil {
		log.Warningf(`parse sql "%s" error "%s"`, sqlstmt, err.Error())
		return c.handleMySQLError(
			mysql.NewDefaultError(mysql.ER_SYNTAX_ERROR, err.Error()))
	}

	// Only a few statements supported by prepare statements
	// http://dev.mysql.com/worklog/task/?id=2871
	switch v := stmt.(type) {
	case parser.ISelect, *parser.Insert, *parser.Update, *parser.Delete,
		*parser.Replace,
		parser.IDDLStatement,
		*parser.ShowTables,
		*parser.ShowColumns,
		*parser.ShowVariables,
		*parser.ShowIndex,
		*parser.Set,
		*parser.DescribeTable,
		*parser.Do:
		return c.prepare(v, sqlstmt)
	default:
		log.Warnf("statement %T[%s] not support prepare ops", stmt, sqlstmt)
		return c.handleMySQLError(
			mysql.NewDefaultError(mysql.ER_UNSUPPORTED_PS))
	}
}
Example #3
0
File: main.go Project: cuiwm/reborn
func (h *Handler) run() error {
	log.Infof("open listen address '%s' and start service", h.l.Addr())

	for {
		if nc, err := h.l.Accept(); err != nil {
			return errors.Trace(err)
		} else {
			h.counters.clientsAccepted.Add(1)
			go func() {
				h.counters.clients.Add(1)
				defer h.counters.clients.Sub(1)

				c := newConn(nc, h, h.config.ConnTimeout)

				log.Infof("new connection: %s", c)
				if err := c.serve(h); err != nil {
					if errors.Cause(err) == io.EOF {
						log.Infof("connection lost: %s [io.EOF]", c)
					} else {
						log.Warningf("connection lost: %s, err = %s", c, err)
					}
				} else {
					log.Infof("connection exit: %s", c)
				}
			}()
		}
	}
	return nil
}
Example #4
0
File: slots.go Project: CowLeo/qdb
// SLOTSRESTORE key ttlms value [key ttlms value ...]
func (s *Store) SlotsRestore(db uint32, args [][]byte) error {
	if len(args) == 0 || len(args)%3 != 0 {
		return errArguments("len(args) = %d, expect != 0 && mod 3 = 0", len(args))
	}

	objs := make([]*rdb.ObjEntry, len(args)/3)
	for i := 0; i < len(objs); i++ {
		key := args[i*3]
		ttlms, err := ParseInt(args[i*3+1])
		if err != nil {
			return errArguments("parse args failed - %s", err)
		}
		value := args[i*3+2]

		expireat := int64(0)
		if ttlms != 0 {
			if v, ok := TTLmsToExpireAt(ttlms); ok && v > 0 {
				expireat = v
			} else {
				return errArguments("parse args[%d] ttlms = %d", i*3+1, ttlms)
			}
		}

		obj, err := rdb.DecodeDump(value)
		if err != nil {
			return errArguments("decode args[%d] failed, %s", i*3+2, err)
		}

		objs[i] = &rdb.ObjEntry{
			DB:       db,
			Key:      key,
			ExpireAt: uint64(expireat),
			Value:    obj,
		}
	}

	if err := s.acquire(); err != nil {
		return errors.Trace(err)
	}
	defer s.release()

	ms := &markSet{}
	bt := engine.NewBatch()
	for i := len(objs) - 1; i >= 0; i-- {
		e := objs[i]
		if ms.Has(e.Key) {
			log.Debugf("[%d] restore batch, db = %d, key = %v, ignore", i, e.DB, e.Key)
			continue
		} else {
			log.Debugf("[%d] restore batch, db = %d, key = %v", i, e.DB, e.Key)
		}
		if err := s.restore(bt, e.DB, e.Key, int64(e.ExpireAt), e.Value); err != nil {
			log.Warningf("restore object failed, db = %d, key = %v, err = %s", e.DB, e.Key, err)
			return errors.Trace(err)
		}
		ms.Set(e.Key)
	}
	fw := &Forward{DB: db, Op: "SlotsRestore", Args: args}
	return s.commit(bt, fw)
}
Example #5
0
func (s *Store) travelPostCommitHandlers(f *Forward) {
	for _, h := range s.postCommitHandlers {
		if err := h(f); err != nil {
			log.Warningf("handle DidCommitHandler err - %s", err)
		}
	}
}
Example #6
0
File: conn.go Project: cuiwm/reborn
func (c *conn) handleRequest(h *Handler) (redis.Resp, error) {
	if c.timeout > 0 {
		deadline := time.Now().Add(c.timeout)
		if err := c.nc.SetReadDeadline(deadline); err != nil {
			return nil, errors.Trace(err)
		}
	}
	request, err := redis.DecodeRequest(c.r)
	if err != nil {
		return nil, errors.Trace(err)
	}

	if request.Type() == redis.TypePing {
		return nil, nil
	}

	h.counters.commands.Add(1)
	response, err := c.dispatch(h, request)
	if err != nil {
		h.counters.commandsFailed.Add(1)
		b, _ := redis.EncodeToBytes(request)
		log.Warningf("handle commands failed, conn = %s, request = '%s', err = %s", c, base64.StdEncoding.EncodeToString(b), err)
	}

	return response, nil
}
Example #7
0
// Interrupt releases a lock that's held.
func (zm *zMutex) Interrupt() {
	select {
	case zm.interrupted <- struct{}{}:
	default:
		log.Warningf("zmutex interrupt blocked")
	}
}
Example #8
0
func (c *conn) Do(cmd *redis.Array, timeout time.Duration) (redis.Resp, error) {
	if c.err != nil {
		return nil, errors.Trace(c.err)
	}
	if err := c.encodeResp(cmd, timeout); err != nil {
		c.err = err
		log.Warningf("encode resp failed - %s", err)
		return nil, errors.Trace(c.err)
	}
	if rsp, err := c.decodeResp(timeout); err != nil {
		c.err = err
		log.Warningf("decode resp failed - %s", err)
		return nil, errors.Trace(c.err)
	} else {
		c.last = time.Now()
		return rsp, nil
	}
}
Example #9
0
func doMigrate(addr string, timeout time.Duration, db uint32, bins []*rdb.BinEntry) error {
	c, err := getSockConn(addr, timeout)
	if err != nil {
		log.Warningf("connect to %s failed, timeout = %d, err = %s", addr, timeout, err)
		return errors.Trace(err)
	}
	defer putSockConn(addr, c)

	cmd1 := redis.NewArray()
	cmd1.AppendBulkBytes([]byte("select"))
	cmd1.AppendBulkBytes([]byte(FormatUint(uint64(db))))

	if err := c.DoMustOK(cmd1, timeout); err != nil {
		log.Warningf("command select failed, addr = %s, db = %d, err = %s", addr, db, err)
		return errors.Trace(err)
	}
	log.Debugf("command select ok, addr = %s, db = %d, err = %s", addr, db, err)

	cmd2 := redis.NewArray()
	cmd2.AppendBulkBytes([]byte("slotsrestore"))
	for _, bin := range bins {
		cmd2.AppendBulkBytes(bin.Key)
		ttlms := int64(0)
		if bin.ExpireAt != 0 {
			if v, ok := ExpireAtToTTLms(int64(bin.ExpireAt)); ok && v > 0 {
				ttlms = v
			} else {
				ttlms = 1
			}
		}
		cmd2.AppendBulkBytes([]byte(FormatInt(ttlms)))
		cmd2.AppendBulkBytes(bin.Value)
	}

	if err := c.DoMustOK(cmd2, timeout); err != nil {
		log.Warningf("command restore failed, addr = %s, db = %d, len(bins) = %d, err = %s", addr, db, len(bins), err)
		return errors.Trace(err)
	} else {
		log.Debugf("command restore ok, addr = %s, db = %d, len(bins) = %d", addr, db, len(bins))
		return nil
	}
}
Example #10
0
func loadSavedProcs() error {
	files, err := ioutil.ReadDir(baseProcDataDir())
	if err != nil {
		return errors.Trace(err)
	}

	for _, f := range files {
		if !f.IsDir() {
			continue
		}

		baseName := path.Base(f.Name())
		tp, id, ok := getPathType(baseName)
		if !ok {
			continue
		}

		datFile := path.Join(baseProcDataDir(), baseName, fmt.Sprintf("%s.dat", tp))
		if p, err := loadProcess(datFile); err != nil {
			log.Warningf("load process data %s err %v, skip", dataDir, err)
			continue
		} else if p == nil {
			log.Infof("proc %s has no need to be reload, skip", id)
			continue
		} else {
			if id != p.ID {
				log.Warningf("we need proc %s, but got %s", id, p.ID)
				continue
			}

			// TODO: bind after start func for different type
			if err := bindProcHandler(p); err != nil {
				log.Errorf("bind proc %s err %v, skip", p.Cmd, err)
				continue
			}
			addCheckProc(p)
		}
	}

	return nil
}
Example #11
0
func putSockConn(addr string, c *conn) {
	if c.err != nil {
		c.sock.Close()
		log.Warningf("close error connection %s : %s - err = %s", addr, c, c.err)
	} else {
		poolmap.Lock()
		pool := poolmap.m[addr]
		if pool == nil {
			pool = list.New()
			poolmap.m[addr] = pool
		}
		c.last = time.Now()
		pool.PushFront(c)
		poolmap.Unlock()
	}
}
Example #12
0
File: proc.go Project: vebin/reborn
func (p *process) checkAlive() (bool, error) {
	proc, err := ps.FindProcess(p.Pid)
	if err != nil {
		return false, errors.Trace(err)
	} else if proc == nil {
		// proc is not alive
		return false, nil
	} else {
		if strings.Contains(proc.Executable(), p.Cmd) {
			return true, nil
		} else {
			log.Warningf("pid %d exits, but exeutable name is %s, not %s", p.Pid, proc.Executable(), p.Cmd)
			return false, nil
		}
	}
}
Example #13
0
func (s *Server) OnSlotRangeChange(param *models.SlotMultiSetParam) {
	log.Warningf("slotRangeChange %+v", param)
	if !validSlot(param.From) || !validSlot(param.To) {
		log.Errorf("invalid slot number, %+v", param)
		return
	}

	for i := param.From; i <= param.To; i++ {
		switch param.Status {
		case models.SLOT_STATUS_OFFLINE:
			s.clearSlot(i)
		case models.SLOT_STATUS_ONLINE:
			s.fillSlot(i, true)
		default:
			log.Errorf("can not handle status %v", param.Status)
		}
	}
}
Example #14
0
func checkProcs() {
	restartProcs := []*process{}

	m.Lock()

	for _, p := range procs {
		if b, err := p.checkAlive(); err != nil {
			log.Errorf("check %d (%s) alive err %v, retry later", p.Pid, p.Cmd, err)
		} else if !b {
			needRestart := p.needRestart()
			log.Warningf("%d (%s) is not alive, need restart: %v", p.Pid, p.Cmd, needRestart)
			if needRestart {
				restartProcs = append(restartProcs, p)
			}

			// clear old log
			p.clearLog()

			// remove from procs
			delete(procs, p.ID)
		}
	}

	m.Unlock()

	for _, p := range restartProcs {
		switch strings.ToLower(p.Type) {
		case proxyType:
			// for proxy type, we will use a new id to avoid zk node exists error
			args := new(proxyArgs)
			map2Args(args, p.Ctx)

			p.clearData()

			startProxy(args)
		default:
			if err := p.start(); err != nil {
				log.Errorf("restart %s err %v", p.Cmd, err)
			} else {
				addCheckProc(p)
			}
		}
	}
}
Example #15
0
func (c *Session) comQuery(sqlstmt string) error {

	stmt, err := parser.Parse(sqlstmt)
	if err != nil {
		log.Warningf(`parse sql "%s" error "%s"`, sqlstmt, err.Error())
		return c.handleMySQLError(
			NewDefaultError(ER_SYNTAX_ERROR, err.Error()))
	}

	switch v := stmt.(type) {
	case parser.ISelect:
		return c.handleQuery(v, sqlstmt)
	case *parser.Insert, *parser.Update, *parser.Delete, *parser.Replace:
		return c.handleExec(stmt, sqlstmt, false)
	case *parser.Set:
		return c.handleSet(v, sqlstmt)
	case *parser.Begin, *parser.StartTrans:
		return c.handleBegin()
	case *parser.Commit:
		return c.handleCommit()
	case *parser.Rollback:
		return c.handleRollback()
	case parser.IShow:
		return c.handleShow(sqlstmt, v)
	case parser.IDDLStatement:
		return c.handleDDL(v, sqlstmt)
	case *parser.Do, *parser.Call, *parser.FlushTables:
		return c.handleExec(stmt, sqlstmt, false)
	case *parser.Use:
		if err := c.useDB(hack.String(stmt.(*parser.Use).DB)); err != nil {
			return c.handleMySQLError(err)
		} else {
			return c.fc.WriteOK(nil)
		}
	default:
		log.Warnf("statement %T[%s] not support now", stmt, sqlstmt)
		return nil
	}

	return nil
}
Example #16
0
func (s *Store) commit(bt *engine.Batch, fw *Forward) error {
	if bt.Len() == 0 {
		return nil
	}

	s.travelPreCommitHandlers(fw)

	if err := s.db.Commit(bt); err != nil {
		log.Warningf("store commit failed - %s", err)
		return err
	}
	for i := s.itlist.Len(); i != 0; i-- {
		v := s.itlist.Remove(s.itlist.Front()).(*storeIterator)
		v.Close()
	}
	s.serial++

	s.travelPostCommitHandlers(fw)

	return nil
}
Example #17
0
func (s *Server) checkAndDoTopoChange(seq int) bool {
	act, err := s.top.GetActionWithSeq(int64(seq))
	if err != nil { // TODO: error is not "not exist"
		log.Fatal(errors.ErrorStack(err), "action seq", seq)
	}

	if !needResponse(act.Receivers, s.pi) { // no need to response
		return false
	}

	log.Warningf("action %v receivers %v", seq, act.Receivers)

	s.stopTaskRunners()

	switch act.Type {
	case models.ACTION_TYPE_SLOT_MIGRATE, models.ACTION_TYPE_SLOT_CHANGED,
		models.ACTION_TYPE_SLOT_PREMIGRATE:
		slot := &models.Slot{}
		s.getActionObject(seq, slot)
		s.fillSlot(slot.Id, true)
	case models.ACTION_TYPE_SERVER_GROUP_CHANGED:
		serverGroup := &models.ServerGroup{}
		s.getActionObject(seq, serverGroup)
		s.OnGroupChange(serverGroup.Id)
	case models.ACTION_TYPE_SERVER_GROUP_REMOVE:
		// do not care
	case models.ACTION_TYPE_MULTI_SLOT_CHANGED:
		param := &models.SlotMultiSetParam{}
		s.getActionObject(seq, param)
		s.OnSlotRangeChange(param)
	default:
		log.Fatalf("unknown action %+v", act)
	}

	s.createTaskRunners()

	return true
}
Example #18
0
func (s *Server) handleConn(c net.Conn) {
	log.Info("new connection", c.RemoteAddr())

	s.counter.Add("connections", 1)
	client := &session{
		Conn:          c,
		r:             bufio.NewReaderSize(c, DefaultReaderSize),
		w:             bufio.NewWriterSize(c, DefaultWiterSize),
		CreateAt:      time.Now(),
		backQ:         make(chan *PipelineResponse, PipelineResponseNum),
		closeSignal:   &sync.WaitGroup{},
		authenticated: false,
	}
	client.closeSignal.Add(1)

	go client.WritingLoop()

	var err error
	defer func() {
		client.closeSignal.Wait() //waiting for writer goroutine
		if errors2.ErrorNotEqual(err, io.EOF) {
			log.Warningf("close connection %v, %v", client, errors.ErrorStack(err))
		} else {
			log.Infof("close connection %v", client)
		}

		s.counter.Add("connections", -1)
	}()

	for {
		err = s.redisTunnel(client)
		if err != nil {
			close(client.backQ)
			return
		}
		client.Ops++
	}
}
Example #19
0
func createDashboardNode(conn zkhelper.Conn) error {
	// make sure root dir is exists
	rootDir := fmt.Sprintf("/zk/reborn/db_%s", globalEnv.ProductName())
	zkhelper.CreateRecursive(conn, rootDir, "", 0, zkhelper.DefaultDirACLs())

	coordPath := fmt.Sprintf("%s/dashboard", rootDir)
	// make sure we're the only one dashboard
	timeoutCh := time.After(60 * time.Second)

	for {
		if exists, _, ch, _ := conn.ExistsW(coordPath); exists {
			data, _, _ := conn.Get(coordPath)

			if checkDashboardAlive(data) {
				return errors.Errorf("dashboard already exists: %s", string(data))
			} else {
				log.Warningf("dashboard %s exists in zk, wait it removed", data)

				select {
				case <-ch:
				case <-timeoutCh:
					return errors.Errorf("wait existed dashboard %s removed timeout", string(data))
				}
			}
		} else {
			break
		}
	}

	content := fmt.Sprintf(`{"addr": "%v", "pid": %v}`, globalEnv.DashboardAddr(), os.Getpid())
	pathCreated, err := conn.Create(coordPath, []byte(content),
		zk.FlagEphemeral, zkhelper.DefaultFileACLs())

	log.Infof("dashboard node %s created, data %s, err %v", pathCreated, string(content), err)

	return errors.Trace(err)
}
Example #20
0
func (c *client) do(table, row []byte, action action, useCache bool, retries int) chan pb.Message {
	region := c.LocateRegion(table, row, useCache)
	if region == nil {
		return nil
	}
	conn := c.getRegionConn(region.Server)
	if conn == nil {
		return nil
	}

	regionSpecifier := &proto.RegionSpecifier{
		Type:  proto.RegionSpecifier_REGION_NAME.Enum(),
		Value: []byte(region.Name),
	}

	var cl *call = nil
	switch a := action.(type) {
	case *Get:
		cl = newCall(&proto.GetRequest{
			Region: regionSpecifier,
			Get:    a.ToProto().(*proto.Get),
		})
	case *Put, *Delete:
		cl = newCall(&proto.MutateRequest{
			Region:   regionSpecifier,
			Mutation: a.ToProto().(*proto.MutationProto),
		})

	case *CoprocessorServiceCall:
		cl = newCall(&proto.CoprocessorServiceRequest{
			Region: regionSpecifier,
			Call:   a.ToProto().(*proto.CoprocessorServiceCall),
		})
	}

	result := make(chan pb.Message)

	go func() {
		r := <-cl.responseCh

		switch r.(type) {
		case *exception:
			if retries <= c.maxRetries {
				// retry action, and refresh region info
				log.Infof("Retrying action for the %d time", retries+1)
				newr := c.do(table, row, action, false, retries+1)
				result <- <-newr
			} else {
				result <- r
			}
			return
		default:
			result <- r
		}
	}()

	if cl != nil {
		err := conn.call(cl)

		if err != nil {
			log.Warningf("Error return while attempting call [err=%#v]", err)
			// purge dead server
			delete(c.cachedConns, region.Server)

			if retries <= c.maxRetries {
				// retry action
				log.Infof("Retrying action for the %d time", retries+1)
				c.do(table, row, action, false, retries+1)
			}
		}
	}

	return result
}
Example #21
0
File: sync.go Project: vebin/reborn
func (h *Handler) daemonSyncMaster() {
	var last *conn
	lost := make(chan int, 0)

	h.masterRunID = "?"
	h.syncOffset.Set(-1)
	h.masterConnState.Set(masterConnNone)

	retryTimer := time.NewTimer(infinityDelay)
	defer retryTimer.Stop()

	var err error
LOOP:
	for exists := false; !exists; {
		var c *conn
		needSlaveofReply := false
		select {
		case <-lost:
			h.masterConnState.Set(masterConnConnect)
			// here means replication conn was broken, we will reconnect it
			last = nil
			h.syncSince.Set(0)

			log.Infof("replication connection from master %s was broken, try reconnect 1s later", h.masterAddr.Get())
			retryTimer.Reset(time.Second)
			continue LOOP
		case <-h.signal:
			exists = true
		case c = <-h.master:
			needSlaveofReply = true
		case <-retryTimer.C:
			log.Infof("retry connect to master %s", h.masterAddr.Get())
			c, err = h.replicationConnectMaster(h.masterAddr.Get())
			if err != nil {
				log.Errorf("repliaction retry connect master %s err, try 1s later again - %s", h.masterAddr.Get(), err)
				retryTimer.Reset(time.Second)
				continue LOOP
			}
		}

		retryTimer.Reset(infinityDelay)

		if last != nil {
			last.Close()
			<-lost
		}
		last = c
		if c != nil {
			masterAddr := c.nc.RemoteAddr().String()

			syncOffset := h.syncOffset.Get()
			if masterAddr == h.masterAddr.Get() && h.masterRunID != "?" {
				// sync same master with last synchronization
				syncOffset++
			} else {
				// last sync master is not same
				h.masterRunID = "?"
				h.syncOffset.Set(-1)
				syncOffset = -1
			}

			h.masterAddr.Set(masterAddr)

			go func(syncOffset int64) {
				defer func() {
					lost <- 0
				}()
				defer c.Close()
				err := h.psync(c, h.masterRunID, syncOffset)
				log.Warningf("slave %s do psync err - %s", c, err)
			}(syncOffset)

			h.syncSince.Set(time.Now().UnixNano() / int64(time.Millisecond))
			log.Infof("slaveof %s", h.masterAddr.Get())
		} else {
			h.masterAddr.Set("")
			h.syncOffset.Set(-1)
			h.masterRunID = "?"
			h.syncSince.Set(0)
			log.Infof("slaveof no one")
		}

		if needSlaveofReply {
			h.slaveofReply <- struct{}{}
		}
	}
}
Example #22
0
func errArguments(format string, v ...interface{}) error {
	err := errors.Errorf(format, v...)
	log.Warningf("call store function with invalid arguments - %s", err)
	return err
}
Example #23
0
File: proc.go Project: vebin/reborn
func (p *process) start() error {
	os.MkdirAll(p.procDataDir(), 0755)
	os.MkdirAll(p.procLogDir(), 0755)

	var c *exec.Cmd
	if p.Daemonize {
		c = exec.Command(p.Cmd, p.Args...)
	} else {
		args := append([]string{p.Cmd}, p.Args...)
		c = exec.Command("reborn-daemon", args...)
	}

	c.Stdout = os.Stdout
	c.Stderr = os.Stderr

	if err := c.Start(); err != nil {
		return errors.Trace(err)
	}

	go func() {
		// use another goroutine to wait process over
		// we don't handle anything here, because we will
		// check process alive in a checker totally.
		c.Wait()
	}()

	log.Infof("wait 1s to let %s start ok", p.Type)
	time.Sleep(time.Second)

	var err error
	for i := 0; i < 5; i++ {
		// we must read pid from pid file
		if p.Pid, err = p.readPid(); err != nil {
			log.Warningf("read pid failed, err %v, wait 1s and retry", err)
			err = errors.Trace(err)
			time.Sleep(1 * time.Second)
		} else {
			break
		}
	}

	if err != nil {
		return errors.Trace(err)
	}

	if b, err := p.checkAlive(); err != nil {
		return errors.Trace(err)
	} else if !b {
		return errors.Errorf("start %d (%s) but it's not alive", p.Pid, p.Type)
	}

	if p.postStartFunc != nil {
		if err := p.postStartFunc(p); err != nil {
			log.Errorf("post start %d (%s) err %v", p.Pid, p.Type, err)
			return errors.Trace(err)
		}
	}

	log.Infof("%s start ok now", p.Type)
	return errors.Trace(p.save())
}
Example #24
0
// RunTask returns nil when the underlyingtask ends or the error it
// generated.
func (ze *ZElector) RunTask(task ElectorTask) error {
	delay := newBackoffDelay(100*time.Millisecond, 1*time.Minute)
	leaderPath := path.Join(ze.path, "leader")
	for {
		_, err := CreateRecursive(ze.zconn, leaderPath, "", 0, zk.WorldACL(PERM_FILE))
		if err == nil || ZkErrorEqual(err, zk.ErrNodeExists) {
			break
		}
		log.Warningf("election leader create failed: %v", err)
		time.Sleep(delay.NextDelay())
	}

	for {
		err := ze.Lock("RunTask")
		if err != nil {
			log.Warningf("election lock failed: %v", err)
			if err == ErrInterrupted {
				return ErrInterrupted
			}
			continue
		}
		// Confirm your win and deliver acceptance speech. This notifies
		// listeners who will have been watching the leader node for
		// changes.
		_, err = ze.zconn.Set(leaderPath, []byte(ze.contents), -1)
		if err != nil {
			log.Warningf("election promotion failed: %v", err)
			continue
		}

		log.Infof("election promote leader %v", leaderPath)
		taskErrChan := make(chan error)
		go func() {
			taskErrChan <- task.Run()
		}()

	watchLeader:
		// Watch the leader so we can get notified if something goes wrong.
		data, _, watch, err := ze.zconn.GetW(leaderPath)
		if err != nil {
			log.Warningf("election unable to watch leader node %v %v", leaderPath, err)
			// FIXME(msolo) Add delay
			goto watchLeader
		}

		if string(data) != ze.contents {
			log.Warningf("election unable to promote leader")
			task.Stop()
			// We won the election, but we didn't become the leader. How is that possible?
			// (see Bush v. Gore for some inspiration)
			// It means:
			//   1. Someone isn't playing by the election rules (a bad actor).
			//      Hard to detect - let's assume we don't have this problem. :)
			//   2. We lost our connection somehow and the ephemeral lock was cleared,
			//      allowing someone else to win the election.
			continue
		}

		// This is where we start our target process and watch for its failure.
	waitForEvent:
		select {
		case <-ze.interrupted:
			log.Warning("election interrupted - stop child process")
			task.Stop()
			// Once the process dies from the signal, this will all tear down.
			goto waitForEvent
		case taskErr := <-taskErrChan:
			// If our code fails, unlock to trigger an election.
			log.Infof("election child process ended: %v", taskErr)
			ze.Unlock()
			if task.Interrupted() {
				log.Warningf("election child process interrupted - stepping down")
				return ErrInterrupted
			}
			continue
		case zevent := <-watch:
			// We had a zk connection hiccup.  We have a few choices,
			// but it depends on the constraints and the events.
			//
			// If we get SESSION_EXPIRED our connection loss triggered an
			// election that we won't have won and the thus the lock was
			// automatically freed. We have no choice but to start over.
			if zevent.State == zk.StateExpired {
				log.Warningf("election leader watch expired")
				task.Stop()
				continue
			}

			// Otherwise, we had an intermittent issue or something touched
			// the node. Either we lost our position or someone broke
			// protocol and touched the leader node.  We just reconnect and
			// revalidate. In the meantime, assume we are still the leader
			// until we determine otherwise.
			//
			// On a reconnect we will be able to see the leader
			// information. If we still hold the position, great. If not, we
			// kill the associated process.
			//
			// On a leader node change, we need to perform the same
			// validation. It's possible an election completes without the
			// old leader realizing he is out of touch.
			log.Warningf("election leader watch event %v", zevent)
			goto watchLeader
		}
	}
	panic("unreachable")
}
Example #25
0
// LockWithTimeout returns nil when the lock is acquired. A lock is
// held if the file exists and you are the creator. Setting the wait
// to zero makes this a nonblocking lock check.
//
// FIXME(msolo) Disallow non-super users from removing the lock?
func (zm *zMutex) LockWithTimeout(wait time.Duration, desc string) (err error) {
	timer := time.NewTimer(wait)
	defer func() {
		if panicErr := recover(); panicErr != nil || err != nil {
			zm.deleteLock()
		}
	}()
	// Ensure the rendezvous node is here.
	// FIXME(msolo) Assuming locks are contended, it will be cheaper to assume this just
	// exists.
	_, err = CreateRecursive(zm.zconn, zm.path, "", 0, zk.WorldACL(PERM_DIRECTORY))
	if err != nil && !ZkErrorEqual(err, zk.ErrNodeExists) {
		return err
	}

	lockPrefix := path.Join(zm.path, "lock-")
	zflags := zk.FlagSequence
	if zm.ephemeral {
		zflags = zflags | zk.FlagEphemeral
	}

	// update node content
	var lockContent map[string]interface{}
	err = json.Unmarshal([]byte(zm.contents), &lockContent)
	if err != nil {
		return err
	}
	lockContent["desc"] = desc
	newContent, err := json.Marshal(lockContent)
	if err != nil {
		return err
	}

createlock:
	lockCreated, err := zm.zconn.Create(lockPrefix, newContent, int32(zflags), zk.WorldACL(PERM_FILE))
	if err != nil {
		return err
	}
	name := path.Base(lockCreated)
	zm.mu.Lock()
	zm.name = name
	zm.mu.Unlock()

trylock:
	children, _, err := zm.zconn.Children(zm.path)
	if err != nil {
		return fmt.Errorf("zkutil: trylock failed %v", err)
	}
	sort.Strings(children)
	if len(children) == 0 {
		return fmt.Errorf("zkutil: empty lock: %v", zm.path)
	}

	if children[0] == name {
		// We are the lock owner.
		return nil
	}

	// This is the degenerate case of a nonblocking lock check. It's not optimal, but
	// also probably not worth optimizing.
	if wait == 0 {
		return ErrTimeout
	}
	prevLock := ""
	for i := 1; i < len(children); i++ {
		if children[i] == name {
			prevLock = children[i-1]
			break
		}
	}
	if prevLock == "" {
		// This is an interesting case. The node disappeared
		// underneath us, probably due to a session loss. We can
		// recreate the lock node (with a new sequence number) and
		// keep trying.
		log.Warningf("zkutil: no lock node found: %v/%v", zm.path, zm.name)
		goto createlock
	}

	zkPrevLock := path.Join(zm.path, prevLock)
	exist, stat, watch, err := zm.zconn.ExistsW(zkPrevLock)
	if err != nil {
		// FIXME(msolo) Should this be a retry?
		return fmt.Errorf("zkutil: unable to watch previous lock node %v %v", zkPrevLock, err)
	}
	if stat == nil || !exist {
		goto trylock
	}
	select {
	case <-timer.C:
		return ErrTimeout
	case <-zm.interrupted:
		return ErrInterrupted
	case event := <-watch:
		log.Infof("zkutil: lock event: %v", event)
		// The precise event doesn't matter - try to read again regardless.
		goto trylock
	}
	panic("unexpected")
}
Example #26
0
// Close the release channel when you want to clean up nicely.
func CreatePidNode(zconn Conn, zkPath string, contents string, done chan struct{}) error {
	// On the first try, assume the cluster is up and running, that will
	// help hunt down any config issues present at startup
	if _, err := zconn.Create(zkPath, []byte(contents), zk.FlagEphemeral, zk.WorldACL(PERM_FILE)); err != nil {
		if ZkErrorEqual(err, zk.ErrNodeExists) {
			err = zconn.Delete(zkPath, -1)
		}
		if err != nil {
			return fmt.Errorf("zkutil: failed deleting pid node: %v: %v", zkPath, err)
		}
		_, err = zconn.Create(zkPath, []byte(contents), zk.FlagEphemeral, zk.WorldACL(PERM_FILE))
		if err != nil {
			return fmt.Errorf("zkutil: failed creating pid node: %v: %v", zkPath, err)
		}
	}

	go func() {
		for {
			_, _, watch, err := zconn.GetW(zkPath)
			if err != nil {
				if ZkErrorEqual(err, zk.ErrNoNode) {
					_, err = zconn.Create(zkPath, []byte(contents), zk.FlagEphemeral, zk.WorldACL(zk.PermAll))
					if err != nil {
						log.Warningf("failed recreating pid node: %v: %v", zkPath, err)
					} else {
						log.Infof("recreated pid node: %v", zkPath)
						continue
					}
				} else {
					log.Warningf("failed reading pid node: %v", err)
				}
			} else {
				select {
				case event := <-watch:
					if ZkEventOk(event) && event.Type == zk.EventNodeDeleted {
						// Most likely another process has started up. However,
						// there is a chance that an ephemeral node is deleted by
						// the session expiring, yet that same session gets a watch
						// notification. This seems like buggy behavior, but rather
						// than race too hard on the node, just wait a bit and see
						// if the situation resolves itself.
						log.Warningf("pid deleted: %v", zkPath)
					} else {
						log.Infof("pid node event: %v", event)
					}
					// break here and wait for a bit before attempting
				case <-done:
					log.Infof("pid watcher stopped on done: %v", zkPath)
					return
				}
			}
			select {
			// No one likes a thundering herd, least of all zk.
			case <-time.After(5*time.Second + time.Duration(rand.Int63n(55e9))):
			case <-done:
				log.Infof("pid watcher stopped on done: %v", zkPath)
				return
			}
		}
	}()

	return nil
}
Example #27
0
func (c *Session) comQuery(sqlstmt string) error {

	//TODO accerlate the flow control module and the figerprint module
	// err := c.intercept(sqlstmt)
	// if err != nil {
	// return err
	// }
	// c.updatefp(sqlstmt)
	log.Infof("session %d: %s", c.sessionId, sqlstmt)
	stmt, err := parser.Parse(sqlstmt)
	if err != nil {
		log.Warningf(`parse sql "%s" error "%s"`, sqlstmt, err.Error())
		return c.handleMySQLError(
			NewDefaultError(ER_SYNTAX_ERROR, err.Error()))
	}
	switch v := stmt.(type) {
	case parser.ISelect:
		return c.handleQuery(v, sqlstmt)
	case *parser.Insert, *parser.Update, *parser.Delete, *parser.Replace:
		return c.handleExec(stmt, sqlstmt, false)
	case *parser.Set:
		return c.handleSet(v, sqlstmt)
	case *parser.Begin, *parser.StartTrans:
		return c.handleBegin()
	case *parser.Commit:
		return c.handleCommit()
	case *parser.Rollback:
		// log.Debug(hack.String(stmt.(*parser.Rollback).Point))
		if len(stmt.(*parser.Rollback).Point) > 0 {
			return c.handleExec(stmt, sqlstmt, false)
		}
		return c.handleRollback()
	case parser.IShow:
		return c.handleShow(sqlstmt, v)
	case parser.IDDLStatement:
		return c.handleDDL(v, sqlstmt)
	case *parser.Do, *parser.Call, *parser.FlushTables:
		return c.handleExec(stmt, sqlstmt, false)
		//add the describe table module
	case *parser.DescribeTable, *parser.DescribeStmt:
		return c.handleQuery(v, sqlstmt)
	case *parser.Use:

		if err := c.useDB(hack.String(stmt.(*parser.Use).DB)); err != nil {
			return c.handleMySQLError(err)
		} else {
			return c.fc.WriteOK(nil)
		}
	case *parser.SavePoint:
		return c.handleExec(stmt, sqlstmt, false)
		// return c.handleQuery(v, sqlstmt)
	case *parser.SetTrans:
		// log.Warnf("set tx iso level ")
		t_sl := hack.Slice(sqlstmt)
		tmp := make([]byte, len(t_sl))
		copy(tmp, t_sl)
		// log.Debug(sqlstmt, t_sl, tmp, len(t_sl))
		c.txIsolationInDef = false
		sql := hack.String(tmp)
		// log.Debug(sql, len(sql))
		c.txIsolationStmt = sql
		// log.Warnf("set tx iso level finish  ")
		if c.isInTransaction() {
			return c.handleExec(stmt, sqlstmt, false)
		}
		return c.fc.WriteOK(nil)
	default:
		log.Warnf("session %d : statement %T[%s] not support now", c.sessionId, stmt, sqlstmt)
		err := errors.New("statement not support now")
		return c.handleMySQLError(
			NewDefaultError(ER_SYNTAX_ERROR, err.Error()))
	}

	return nil
}