func (top *Topology) doWatch(evtch <-chan topo.Event, evtbus chan interface{}) { e := <-evtch log.Warningf("topo event %+v", e) switch e.Type { //case topo.EventNodeCreated: //case topo.EventNodeDataChanged: case topo.EventNodeChildrenChanged: //only care children changed // TODO: get changed node and decode event default: log.Warningf("%+v", e) } evtbus <- e }
func (c *Session) handleComStmtPrepare(sqlstmt string) error { stmt, err := parser.Parse(sqlstmt) if err != nil { log.Warningf(`parse sql "%s" error "%s"`, sqlstmt, err.Error()) return c.handleMySQLError( mysql.NewDefaultError(mysql.ER_SYNTAX_ERROR, err.Error())) } // Only a few statements supported by prepare statements // http://dev.mysql.com/worklog/task/?id=2871 switch v := stmt.(type) { case parser.ISelect, *parser.Insert, *parser.Update, *parser.Delete, *parser.Replace, parser.IDDLStatement, *parser.ShowTables, *parser.ShowColumns, *parser.ShowVariables, *parser.ShowIndex, *parser.Set, *parser.DescribeTable, *parser.Do: return c.prepare(v, sqlstmt) default: log.Warnf("statement %T[%s] not support prepare ops", stmt, sqlstmt) return c.handleMySQLError( mysql.NewDefaultError(mysql.ER_UNSUPPORTED_PS)) } }
func (h *Handler) run() error { log.Infof("open listen address '%s' and start service", h.l.Addr()) for { if nc, err := h.l.Accept(); err != nil { return errors.Trace(err) } else { h.counters.clientsAccepted.Add(1) go func() { h.counters.clients.Add(1) defer h.counters.clients.Sub(1) c := newConn(nc, h, h.config.ConnTimeout) log.Infof("new connection: %s", c) if err := c.serve(h); err != nil { if errors.Cause(err) == io.EOF { log.Infof("connection lost: %s [io.EOF]", c) } else { log.Warningf("connection lost: %s, err = %s", c, err) } } else { log.Infof("connection exit: %s", c) } }() } } return nil }
// SLOTSRESTORE key ttlms value [key ttlms value ...] func (s *Store) SlotsRestore(db uint32, args [][]byte) error { if len(args) == 0 || len(args)%3 != 0 { return errArguments("len(args) = %d, expect != 0 && mod 3 = 0", len(args)) } objs := make([]*rdb.ObjEntry, len(args)/3) for i := 0; i < len(objs); i++ { key := args[i*3] ttlms, err := ParseInt(args[i*3+1]) if err != nil { return errArguments("parse args failed - %s", err) } value := args[i*3+2] expireat := int64(0) if ttlms != 0 { if v, ok := TTLmsToExpireAt(ttlms); ok && v > 0 { expireat = v } else { return errArguments("parse args[%d] ttlms = %d", i*3+1, ttlms) } } obj, err := rdb.DecodeDump(value) if err != nil { return errArguments("decode args[%d] failed, %s", i*3+2, err) } objs[i] = &rdb.ObjEntry{ DB: db, Key: key, ExpireAt: uint64(expireat), Value: obj, } } if err := s.acquire(); err != nil { return errors.Trace(err) } defer s.release() ms := &markSet{} bt := engine.NewBatch() for i := len(objs) - 1; i >= 0; i-- { e := objs[i] if ms.Has(e.Key) { log.Debugf("[%d] restore batch, db = %d, key = %v, ignore", i, e.DB, e.Key) continue } else { log.Debugf("[%d] restore batch, db = %d, key = %v", i, e.DB, e.Key) } if err := s.restore(bt, e.DB, e.Key, int64(e.ExpireAt), e.Value); err != nil { log.Warningf("restore object failed, db = %d, key = %v, err = %s", e.DB, e.Key, err) return errors.Trace(err) } ms.Set(e.Key) } fw := &Forward{DB: db, Op: "SlotsRestore", Args: args} return s.commit(bt, fw) }
func (s *Store) travelPostCommitHandlers(f *Forward) { for _, h := range s.postCommitHandlers { if err := h(f); err != nil { log.Warningf("handle DidCommitHandler err - %s", err) } } }
func (c *conn) handleRequest(h *Handler) (redis.Resp, error) { if c.timeout > 0 { deadline := time.Now().Add(c.timeout) if err := c.nc.SetReadDeadline(deadline); err != nil { return nil, errors.Trace(err) } } request, err := redis.DecodeRequest(c.r) if err != nil { return nil, errors.Trace(err) } if request.Type() == redis.TypePing { return nil, nil } h.counters.commands.Add(1) response, err := c.dispatch(h, request) if err != nil { h.counters.commandsFailed.Add(1) b, _ := redis.EncodeToBytes(request) log.Warningf("handle commands failed, conn = %s, request = '%s', err = %s", c, base64.StdEncoding.EncodeToString(b), err) } return response, nil }
// Interrupt releases a lock that's held. func (zm *zMutex) Interrupt() { select { case zm.interrupted <- struct{}{}: default: log.Warningf("zmutex interrupt blocked") } }
func (c *conn) Do(cmd *redis.Array, timeout time.Duration) (redis.Resp, error) { if c.err != nil { return nil, errors.Trace(c.err) } if err := c.encodeResp(cmd, timeout); err != nil { c.err = err log.Warningf("encode resp failed - %s", err) return nil, errors.Trace(c.err) } if rsp, err := c.decodeResp(timeout); err != nil { c.err = err log.Warningf("decode resp failed - %s", err) return nil, errors.Trace(c.err) } else { c.last = time.Now() return rsp, nil } }
func doMigrate(addr string, timeout time.Duration, db uint32, bins []*rdb.BinEntry) error { c, err := getSockConn(addr, timeout) if err != nil { log.Warningf("connect to %s failed, timeout = %d, err = %s", addr, timeout, err) return errors.Trace(err) } defer putSockConn(addr, c) cmd1 := redis.NewArray() cmd1.AppendBulkBytes([]byte("select")) cmd1.AppendBulkBytes([]byte(FormatUint(uint64(db)))) if err := c.DoMustOK(cmd1, timeout); err != nil { log.Warningf("command select failed, addr = %s, db = %d, err = %s", addr, db, err) return errors.Trace(err) } log.Debugf("command select ok, addr = %s, db = %d, err = %s", addr, db, err) cmd2 := redis.NewArray() cmd2.AppendBulkBytes([]byte("slotsrestore")) for _, bin := range bins { cmd2.AppendBulkBytes(bin.Key) ttlms := int64(0) if bin.ExpireAt != 0 { if v, ok := ExpireAtToTTLms(int64(bin.ExpireAt)); ok && v > 0 { ttlms = v } else { ttlms = 1 } } cmd2.AppendBulkBytes([]byte(FormatInt(ttlms))) cmd2.AppendBulkBytes(bin.Value) } if err := c.DoMustOK(cmd2, timeout); err != nil { log.Warningf("command restore failed, addr = %s, db = %d, len(bins) = %d, err = %s", addr, db, len(bins), err) return errors.Trace(err) } else { log.Debugf("command restore ok, addr = %s, db = %d, len(bins) = %d", addr, db, len(bins)) return nil } }
func loadSavedProcs() error { files, err := ioutil.ReadDir(baseProcDataDir()) if err != nil { return errors.Trace(err) } for _, f := range files { if !f.IsDir() { continue } baseName := path.Base(f.Name()) tp, id, ok := getPathType(baseName) if !ok { continue } datFile := path.Join(baseProcDataDir(), baseName, fmt.Sprintf("%s.dat", tp)) if p, err := loadProcess(datFile); err != nil { log.Warningf("load process data %s err %v, skip", dataDir, err) continue } else if p == nil { log.Infof("proc %s has no need to be reload, skip", id) continue } else { if id != p.ID { log.Warningf("we need proc %s, but got %s", id, p.ID) continue } // TODO: bind after start func for different type if err := bindProcHandler(p); err != nil { log.Errorf("bind proc %s err %v, skip", p.Cmd, err) continue } addCheckProc(p) } } return nil }
func putSockConn(addr string, c *conn) { if c.err != nil { c.sock.Close() log.Warningf("close error connection %s : %s - err = %s", addr, c, c.err) } else { poolmap.Lock() pool := poolmap.m[addr] if pool == nil { pool = list.New() poolmap.m[addr] = pool } c.last = time.Now() pool.PushFront(c) poolmap.Unlock() } }
func (p *process) checkAlive() (bool, error) { proc, err := ps.FindProcess(p.Pid) if err != nil { return false, errors.Trace(err) } else if proc == nil { // proc is not alive return false, nil } else { if strings.Contains(proc.Executable(), p.Cmd) { return true, nil } else { log.Warningf("pid %d exits, but exeutable name is %s, not %s", p.Pid, proc.Executable(), p.Cmd) return false, nil } } }
func (s *Server) OnSlotRangeChange(param *models.SlotMultiSetParam) { log.Warningf("slotRangeChange %+v", param) if !validSlot(param.From) || !validSlot(param.To) { log.Errorf("invalid slot number, %+v", param) return } for i := param.From; i <= param.To; i++ { switch param.Status { case models.SLOT_STATUS_OFFLINE: s.clearSlot(i) case models.SLOT_STATUS_ONLINE: s.fillSlot(i, true) default: log.Errorf("can not handle status %v", param.Status) } } }
func checkProcs() { restartProcs := []*process{} m.Lock() for _, p := range procs { if b, err := p.checkAlive(); err != nil { log.Errorf("check %d (%s) alive err %v, retry later", p.Pid, p.Cmd, err) } else if !b { needRestart := p.needRestart() log.Warningf("%d (%s) is not alive, need restart: %v", p.Pid, p.Cmd, needRestart) if needRestart { restartProcs = append(restartProcs, p) } // clear old log p.clearLog() // remove from procs delete(procs, p.ID) } } m.Unlock() for _, p := range restartProcs { switch strings.ToLower(p.Type) { case proxyType: // for proxy type, we will use a new id to avoid zk node exists error args := new(proxyArgs) map2Args(args, p.Ctx) p.clearData() startProxy(args) default: if err := p.start(); err != nil { log.Errorf("restart %s err %v", p.Cmd, err) } else { addCheckProc(p) } } } }
func (c *Session) comQuery(sqlstmt string) error { stmt, err := parser.Parse(sqlstmt) if err != nil { log.Warningf(`parse sql "%s" error "%s"`, sqlstmt, err.Error()) return c.handleMySQLError( NewDefaultError(ER_SYNTAX_ERROR, err.Error())) } switch v := stmt.(type) { case parser.ISelect: return c.handleQuery(v, sqlstmt) case *parser.Insert, *parser.Update, *parser.Delete, *parser.Replace: return c.handleExec(stmt, sqlstmt, false) case *parser.Set: return c.handleSet(v, sqlstmt) case *parser.Begin, *parser.StartTrans: return c.handleBegin() case *parser.Commit: return c.handleCommit() case *parser.Rollback: return c.handleRollback() case parser.IShow: return c.handleShow(sqlstmt, v) case parser.IDDLStatement: return c.handleDDL(v, sqlstmt) case *parser.Do, *parser.Call, *parser.FlushTables: return c.handleExec(stmt, sqlstmt, false) case *parser.Use: if err := c.useDB(hack.String(stmt.(*parser.Use).DB)); err != nil { return c.handleMySQLError(err) } else { return c.fc.WriteOK(nil) } default: log.Warnf("statement %T[%s] not support now", stmt, sqlstmt) return nil } return nil }
func (s *Store) commit(bt *engine.Batch, fw *Forward) error { if bt.Len() == 0 { return nil } s.travelPreCommitHandlers(fw) if err := s.db.Commit(bt); err != nil { log.Warningf("store commit failed - %s", err) return err } for i := s.itlist.Len(); i != 0; i-- { v := s.itlist.Remove(s.itlist.Front()).(*storeIterator) v.Close() } s.serial++ s.travelPostCommitHandlers(fw) return nil }
func (s *Server) checkAndDoTopoChange(seq int) bool { act, err := s.top.GetActionWithSeq(int64(seq)) if err != nil { // TODO: error is not "not exist" log.Fatal(errors.ErrorStack(err), "action seq", seq) } if !needResponse(act.Receivers, s.pi) { // no need to response return false } log.Warningf("action %v receivers %v", seq, act.Receivers) s.stopTaskRunners() switch act.Type { case models.ACTION_TYPE_SLOT_MIGRATE, models.ACTION_TYPE_SLOT_CHANGED, models.ACTION_TYPE_SLOT_PREMIGRATE: slot := &models.Slot{} s.getActionObject(seq, slot) s.fillSlot(slot.Id, true) case models.ACTION_TYPE_SERVER_GROUP_CHANGED: serverGroup := &models.ServerGroup{} s.getActionObject(seq, serverGroup) s.OnGroupChange(serverGroup.Id) case models.ACTION_TYPE_SERVER_GROUP_REMOVE: // do not care case models.ACTION_TYPE_MULTI_SLOT_CHANGED: param := &models.SlotMultiSetParam{} s.getActionObject(seq, param) s.OnSlotRangeChange(param) default: log.Fatalf("unknown action %+v", act) } s.createTaskRunners() return true }
func (s *Server) handleConn(c net.Conn) { log.Info("new connection", c.RemoteAddr()) s.counter.Add("connections", 1) client := &session{ Conn: c, r: bufio.NewReaderSize(c, DefaultReaderSize), w: bufio.NewWriterSize(c, DefaultWiterSize), CreateAt: time.Now(), backQ: make(chan *PipelineResponse, PipelineResponseNum), closeSignal: &sync.WaitGroup{}, authenticated: false, } client.closeSignal.Add(1) go client.WritingLoop() var err error defer func() { client.closeSignal.Wait() //waiting for writer goroutine if errors2.ErrorNotEqual(err, io.EOF) { log.Warningf("close connection %v, %v", client, errors.ErrorStack(err)) } else { log.Infof("close connection %v", client) } s.counter.Add("connections", -1) }() for { err = s.redisTunnel(client) if err != nil { close(client.backQ) return } client.Ops++ } }
func createDashboardNode(conn zkhelper.Conn) error { // make sure root dir is exists rootDir := fmt.Sprintf("/zk/reborn/db_%s", globalEnv.ProductName()) zkhelper.CreateRecursive(conn, rootDir, "", 0, zkhelper.DefaultDirACLs()) coordPath := fmt.Sprintf("%s/dashboard", rootDir) // make sure we're the only one dashboard timeoutCh := time.After(60 * time.Second) for { if exists, _, ch, _ := conn.ExistsW(coordPath); exists { data, _, _ := conn.Get(coordPath) if checkDashboardAlive(data) { return errors.Errorf("dashboard already exists: %s", string(data)) } else { log.Warningf("dashboard %s exists in zk, wait it removed", data) select { case <-ch: case <-timeoutCh: return errors.Errorf("wait existed dashboard %s removed timeout", string(data)) } } } else { break } } content := fmt.Sprintf(`{"addr": "%v", "pid": %v}`, globalEnv.DashboardAddr(), os.Getpid()) pathCreated, err := conn.Create(coordPath, []byte(content), zk.FlagEphemeral, zkhelper.DefaultFileACLs()) log.Infof("dashboard node %s created, data %s, err %v", pathCreated, string(content), err) return errors.Trace(err) }
func (c *client) do(table, row []byte, action action, useCache bool, retries int) chan pb.Message { region := c.LocateRegion(table, row, useCache) if region == nil { return nil } conn := c.getRegionConn(region.Server) if conn == nil { return nil } regionSpecifier := &proto.RegionSpecifier{ Type: proto.RegionSpecifier_REGION_NAME.Enum(), Value: []byte(region.Name), } var cl *call = nil switch a := action.(type) { case *Get: cl = newCall(&proto.GetRequest{ Region: regionSpecifier, Get: a.ToProto().(*proto.Get), }) case *Put, *Delete: cl = newCall(&proto.MutateRequest{ Region: regionSpecifier, Mutation: a.ToProto().(*proto.MutationProto), }) case *CoprocessorServiceCall: cl = newCall(&proto.CoprocessorServiceRequest{ Region: regionSpecifier, Call: a.ToProto().(*proto.CoprocessorServiceCall), }) } result := make(chan pb.Message) go func() { r := <-cl.responseCh switch r.(type) { case *exception: if retries <= c.maxRetries { // retry action, and refresh region info log.Infof("Retrying action for the %d time", retries+1) newr := c.do(table, row, action, false, retries+1) result <- <-newr } else { result <- r } return default: result <- r } }() if cl != nil { err := conn.call(cl) if err != nil { log.Warningf("Error return while attempting call [err=%#v]", err) // purge dead server delete(c.cachedConns, region.Server) if retries <= c.maxRetries { // retry action log.Infof("Retrying action for the %d time", retries+1) c.do(table, row, action, false, retries+1) } } } return result }
func (h *Handler) daemonSyncMaster() { var last *conn lost := make(chan int, 0) h.masterRunID = "?" h.syncOffset.Set(-1) h.masterConnState.Set(masterConnNone) retryTimer := time.NewTimer(infinityDelay) defer retryTimer.Stop() var err error LOOP: for exists := false; !exists; { var c *conn needSlaveofReply := false select { case <-lost: h.masterConnState.Set(masterConnConnect) // here means replication conn was broken, we will reconnect it last = nil h.syncSince.Set(0) log.Infof("replication connection from master %s was broken, try reconnect 1s later", h.masterAddr.Get()) retryTimer.Reset(time.Second) continue LOOP case <-h.signal: exists = true case c = <-h.master: needSlaveofReply = true case <-retryTimer.C: log.Infof("retry connect to master %s", h.masterAddr.Get()) c, err = h.replicationConnectMaster(h.masterAddr.Get()) if err != nil { log.Errorf("repliaction retry connect master %s err, try 1s later again - %s", h.masterAddr.Get(), err) retryTimer.Reset(time.Second) continue LOOP } } retryTimer.Reset(infinityDelay) if last != nil { last.Close() <-lost } last = c if c != nil { masterAddr := c.nc.RemoteAddr().String() syncOffset := h.syncOffset.Get() if masterAddr == h.masterAddr.Get() && h.masterRunID != "?" { // sync same master with last synchronization syncOffset++ } else { // last sync master is not same h.masterRunID = "?" h.syncOffset.Set(-1) syncOffset = -1 } h.masterAddr.Set(masterAddr) go func(syncOffset int64) { defer func() { lost <- 0 }() defer c.Close() err := h.psync(c, h.masterRunID, syncOffset) log.Warningf("slave %s do psync err - %s", c, err) }(syncOffset) h.syncSince.Set(time.Now().UnixNano() / int64(time.Millisecond)) log.Infof("slaveof %s", h.masterAddr.Get()) } else { h.masterAddr.Set("") h.syncOffset.Set(-1) h.masterRunID = "?" h.syncSince.Set(0) log.Infof("slaveof no one") } if needSlaveofReply { h.slaveofReply <- struct{}{} } } }
func errArguments(format string, v ...interface{}) error { err := errors.Errorf(format, v...) log.Warningf("call store function with invalid arguments - %s", err) return err }
func (p *process) start() error { os.MkdirAll(p.procDataDir(), 0755) os.MkdirAll(p.procLogDir(), 0755) var c *exec.Cmd if p.Daemonize { c = exec.Command(p.Cmd, p.Args...) } else { args := append([]string{p.Cmd}, p.Args...) c = exec.Command("reborn-daemon", args...) } c.Stdout = os.Stdout c.Stderr = os.Stderr if err := c.Start(); err != nil { return errors.Trace(err) } go func() { // use another goroutine to wait process over // we don't handle anything here, because we will // check process alive in a checker totally. c.Wait() }() log.Infof("wait 1s to let %s start ok", p.Type) time.Sleep(time.Second) var err error for i := 0; i < 5; i++ { // we must read pid from pid file if p.Pid, err = p.readPid(); err != nil { log.Warningf("read pid failed, err %v, wait 1s and retry", err) err = errors.Trace(err) time.Sleep(1 * time.Second) } else { break } } if err != nil { return errors.Trace(err) } if b, err := p.checkAlive(); err != nil { return errors.Trace(err) } else if !b { return errors.Errorf("start %d (%s) but it's not alive", p.Pid, p.Type) } if p.postStartFunc != nil { if err := p.postStartFunc(p); err != nil { log.Errorf("post start %d (%s) err %v", p.Pid, p.Type, err) return errors.Trace(err) } } log.Infof("%s start ok now", p.Type) return errors.Trace(p.save()) }
// RunTask returns nil when the underlyingtask ends or the error it // generated. func (ze *ZElector) RunTask(task ElectorTask) error { delay := newBackoffDelay(100*time.Millisecond, 1*time.Minute) leaderPath := path.Join(ze.path, "leader") for { _, err := CreateRecursive(ze.zconn, leaderPath, "", 0, zk.WorldACL(PERM_FILE)) if err == nil || ZkErrorEqual(err, zk.ErrNodeExists) { break } log.Warningf("election leader create failed: %v", err) time.Sleep(delay.NextDelay()) } for { err := ze.Lock("RunTask") if err != nil { log.Warningf("election lock failed: %v", err) if err == ErrInterrupted { return ErrInterrupted } continue } // Confirm your win and deliver acceptance speech. This notifies // listeners who will have been watching the leader node for // changes. _, err = ze.zconn.Set(leaderPath, []byte(ze.contents), -1) if err != nil { log.Warningf("election promotion failed: %v", err) continue } log.Infof("election promote leader %v", leaderPath) taskErrChan := make(chan error) go func() { taskErrChan <- task.Run() }() watchLeader: // Watch the leader so we can get notified if something goes wrong. data, _, watch, err := ze.zconn.GetW(leaderPath) if err != nil { log.Warningf("election unable to watch leader node %v %v", leaderPath, err) // FIXME(msolo) Add delay goto watchLeader } if string(data) != ze.contents { log.Warningf("election unable to promote leader") task.Stop() // We won the election, but we didn't become the leader. How is that possible? // (see Bush v. Gore for some inspiration) // It means: // 1. Someone isn't playing by the election rules (a bad actor). // Hard to detect - let's assume we don't have this problem. :) // 2. We lost our connection somehow and the ephemeral lock was cleared, // allowing someone else to win the election. continue } // This is where we start our target process and watch for its failure. waitForEvent: select { case <-ze.interrupted: log.Warning("election interrupted - stop child process") task.Stop() // Once the process dies from the signal, this will all tear down. goto waitForEvent case taskErr := <-taskErrChan: // If our code fails, unlock to trigger an election. log.Infof("election child process ended: %v", taskErr) ze.Unlock() if task.Interrupted() { log.Warningf("election child process interrupted - stepping down") return ErrInterrupted } continue case zevent := <-watch: // We had a zk connection hiccup. We have a few choices, // but it depends on the constraints and the events. // // If we get SESSION_EXPIRED our connection loss triggered an // election that we won't have won and the thus the lock was // automatically freed. We have no choice but to start over. if zevent.State == zk.StateExpired { log.Warningf("election leader watch expired") task.Stop() continue } // Otherwise, we had an intermittent issue or something touched // the node. Either we lost our position or someone broke // protocol and touched the leader node. We just reconnect and // revalidate. In the meantime, assume we are still the leader // until we determine otherwise. // // On a reconnect we will be able to see the leader // information. If we still hold the position, great. If not, we // kill the associated process. // // On a leader node change, we need to perform the same // validation. It's possible an election completes without the // old leader realizing he is out of touch. log.Warningf("election leader watch event %v", zevent) goto watchLeader } } panic("unreachable") }
// LockWithTimeout returns nil when the lock is acquired. A lock is // held if the file exists and you are the creator. Setting the wait // to zero makes this a nonblocking lock check. // // FIXME(msolo) Disallow non-super users from removing the lock? func (zm *zMutex) LockWithTimeout(wait time.Duration, desc string) (err error) { timer := time.NewTimer(wait) defer func() { if panicErr := recover(); panicErr != nil || err != nil { zm.deleteLock() } }() // Ensure the rendezvous node is here. // FIXME(msolo) Assuming locks are contended, it will be cheaper to assume this just // exists. _, err = CreateRecursive(zm.zconn, zm.path, "", 0, zk.WorldACL(PERM_DIRECTORY)) if err != nil && !ZkErrorEqual(err, zk.ErrNodeExists) { return err } lockPrefix := path.Join(zm.path, "lock-") zflags := zk.FlagSequence if zm.ephemeral { zflags = zflags | zk.FlagEphemeral } // update node content var lockContent map[string]interface{} err = json.Unmarshal([]byte(zm.contents), &lockContent) if err != nil { return err } lockContent["desc"] = desc newContent, err := json.Marshal(lockContent) if err != nil { return err } createlock: lockCreated, err := zm.zconn.Create(lockPrefix, newContent, int32(zflags), zk.WorldACL(PERM_FILE)) if err != nil { return err } name := path.Base(lockCreated) zm.mu.Lock() zm.name = name zm.mu.Unlock() trylock: children, _, err := zm.zconn.Children(zm.path) if err != nil { return fmt.Errorf("zkutil: trylock failed %v", err) } sort.Strings(children) if len(children) == 0 { return fmt.Errorf("zkutil: empty lock: %v", zm.path) } if children[0] == name { // We are the lock owner. return nil } // This is the degenerate case of a nonblocking lock check. It's not optimal, but // also probably not worth optimizing. if wait == 0 { return ErrTimeout } prevLock := "" for i := 1; i < len(children); i++ { if children[i] == name { prevLock = children[i-1] break } } if prevLock == "" { // This is an interesting case. The node disappeared // underneath us, probably due to a session loss. We can // recreate the lock node (with a new sequence number) and // keep trying. log.Warningf("zkutil: no lock node found: %v/%v", zm.path, zm.name) goto createlock } zkPrevLock := path.Join(zm.path, prevLock) exist, stat, watch, err := zm.zconn.ExistsW(zkPrevLock) if err != nil { // FIXME(msolo) Should this be a retry? return fmt.Errorf("zkutil: unable to watch previous lock node %v %v", zkPrevLock, err) } if stat == nil || !exist { goto trylock } select { case <-timer.C: return ErrTimeout case <-zm.interrupted: return ErrInterrupted case event := <-watch: log.Infof("zkutil: lock event: %v", event) // The precise event doesn't matter - try to read again regardless. goto trylock } panic("unexpected") }
// Close the release channel when you want to clean up nicely. func CreatePidNode(zconn Conn, zkPath string, contents string, done chan struct{}) error { // On the first try, assume the cluster is up and running, that will // help hunt down any config issues present at startup if _, err := zconn.Create(zkPath, []byte(contents), zk.FlagEphemeral, zk.WorldACL(PERM_FILE)); err != nil { if ZkErrorEqual(err, zk.ErrNodeExists) { err = zconn.Delete(zkPath, -1) } if err != nil { return fmt.Errorf("zkutil: failed deleting pid node: %v: %v", zkPath, err) } _, err = zconn.Create(zkPath, []byte(contents), zk.FlagEphemeral, zk.WorldACL(PERM_FILE)) if err != nil { return fmt.Errorf("zkutil: failed creating pid node: %v: %v", zkPath, err) } } go func() { for { _, _, watch, err := zconn.GetW(zkPath) if err != nil { if ZkErrorEqual(err, zk.ErrNoNode) { _, err = zconn.Create(zkPath, []byte(contents), zk.FlagEphemeral, zk.WorldACL(zk.PermAll)) if err != nil { log.Warningf("failed recreating pid node: %v: %v", zkPath, err) } else { log.Infof("recreated pid node: %v", zkPath) continue } } else { log.Warningf("failed reading pid node: %v", err) } } else { select { case event := <-watch: if ZkEventOk(event) && event.Type == zk.EventNodeDeleted { // Most likely another process has started up. However, // there is a chance that an ephemeral node is deleted by // the session expiring, yet that same session gets a watch // notification. This seems like buggy behavior, but rather // than race too hard on the node, just wait a bit and see // if the situation resolves itself. log.Warningf("pid deleted: %v", zkPath) } else { log.Infof("pid node event: %v", event) } // break here and wait for a bit before attempting case <-done: log.Infof("pid watcher stopped on done: %v", zkPath) return } } select { // No one likes a thundering herd, least of all zk. case <-time.After(5*time.Second + time.Duration(rand.Int63n(55e9))): case <-done: log.Infof("pid watcher stopped on done: %v", zkPath) return } } }() return nil }
func (c *Session) comQuery(sqlstmt string) error { //TODO accerlate the flow control module and the figerprint module // err := c.intercept(sqlstmt) // if err != nil { // return err // } // c.updatefp(sqlstmt) log.Infof("session %d: %s", c.sessionId, sqlstmt) stmt, err := parser.Parse(sqlstmt) if err != nil { log.Warningf(`parse sql "%s" error "%s"`, sqlstmt, err.Error()) return c.handleMySQLError( NewDefaultError(ER_SYNTAX_ERROR, err.Error())) } switch v := stmt.(type) { case parser.ISelect: return c.handleQuery(v, sqlstmt) case *parser.Insert, *parser.Update, *parser.Delete, *parser.Replace: return c.handleExec(stmt, sqlstmt, false) case *parser.Set: return c.handleSet(v, sqlstmt) case *parser.Begin, *parser.StartTrans: return c.handleBegin() case *parser.Commit: return c.handleCommit() case *parser.Rollback: // log.Debug(hack.String(stmt.(*parser.Rollback).Point)) if len(stmt.(*parser.Rollback).Point) > 0 { return c.handleExec(stmt, sqlstmt, false) } return c.handleRollback() case parser.IShow: return c.handleShow(sqlstmt, v) case parser.IDDLStatement: return c.handleDDL(v, sqlstmt) case *parser.Do, *parser.Call, *parser.FlushTables: return c.handleExec(stmt, sqlstmt, false) //add the describe table module case *parser.DescribeTable, *parser.DescribeStmt: return c.handleQuery(v, sqlstmt) case *parser.Use: if err := c.useDB(hack.String(stmt.(*parser.Use).DB)); err != nil { return c.handleMySQLError(err) } else { return c.fc.WriteOK(nil) } case *parser.SavePoint: return c.handleExec(stmt, sqlstmt, false) // return c.handleQuery(v, sqlstmt) case *parser.SetTrans: // log.Warnf("set tx iso level ") t_sl := hack.Slice(sqlstmt) tmp := make([]byte, len(t_sl)) copy(tmp, t_sl) // log.Debug(sqlstmt, t_sl, tmp, len(t_sl)) c.txIsolationInDef = false sql := hack.String(tmp) // log.Debug(sql, len(sql)) c.txIsolationStmt = sql // log.Warnf("set tx iso level finish ") if c.isInTransaction() { return c.handleExec(stmt, sqlstmt, false) } return c.fc.WriteOK(nil) default: log.Warnf("session %d : statement %T[%s] not support now", c.sessionId, stmt, sqlstmt) err := errors.New("statement not support now") return c.handleMySQLError( NewDefaultError(ER_SYNTAX_ERROR, err.Error())) } return nil }