func (c *Canal) tryDump() error { if len(c.master.Name) > 0 && c.master.Position > 0 { // we will sync with binlog name and position log.Infof("skip dump, use last binlog replication pos (%s, %d)", c.master.Name, c.master.Position) return nil } if c.dumper == nil { log.Info("skip dump, no mysqldump") return nil } h := &dumpParseHandler{c: c} start := time.Now() log.Info("try dump MySQL and parse") if err := c.dumper.DumpAndParse(h); err != nil { return err } log.Infof("dump MySQL and parse OK, use %0.2f seconds, start binlog replication at (%s, %d)", time.Now().Sub(start).Seconds(), h.name, h.pos) c.master.Update(h.name, uint32(h.pos)) c.master.Save(true) return nil }
func readIndexFile(configDir string, rule *config.Rule) ([]byte, error) { if rule.IndexFile != "" { // Index file explicitly specified. Fail if not found. path := rule.IndexFile if !strings.HasPrefix(rule.IndexFile, "/") { // indexFile is relative to config dir path = configDir + "/" + rule.IndexFile } log.Infof("Using index setting from %s", path) return ioutil.ReadFile(path) } else { var path string // No index file specified. Read file if default (<cfdDir>/<idx>.idx.josn) exists // strip trailing -[0-9]+ so indexes with version suffixes match a base settings file if m := regexp.MustCompile("(.+)-[0-9]+").FindStringSubmatch(rule.Index); len(m) == 0 { path = configDir + "/" + rule.Index + ".idx.json" } else { path = configDir + "/" + m[0] + ".idx.json" } data, err := ioutil.ReadFile(path) if os.IsNotExist(err) { return nil, nil } else { log.Infof("Using index settings from %s", path) return data, err } } }
// Elect a best slave which has the most up-to-date data with master func (g *Group) Elect() (string, error) { g.m.Lock() defer g.m.Unlock() var addr string var checkOffset int64 = 0 var checkPriority int = 0 for _, slave := range g.Slaves { m, err := slave.doRelpInfo() if err != nil { log.Infof("slave %s get replication info err %v, skip it", slave.Addr, err) continue } if m["slave"] == MasterType { log.Errorf("server %s is not slave now, skip it", slave.Addr) continue } if m["master_link_status"] == "up" { log.Infof("slave %s master_link_status is up, master %s may be not down???", slave.Addr, g.Master.Addr) return "", ErrNodeAlive } priority, _ := strconv.Atoi(m["slave_priority"]) replOffset, _ := strconv.ParseInt(m["slave_repl_offset"], 10, 64) used := false // like redis-sentinel, first check priority, then salve repl offset if checkPriority < priority { used = true } else if checkPriority == priority { if checkOffset < replOffset { used = true } } if used { addr = slave.Addr checkPriority = priority checkOffset = replOffset } } if len(addr) == 0 { log.Errorf("no proper candidate to be promoted") return "", ErrNoCandidate } log.Infof("select slave %s as new master, priority:%d, repl_offset:%d", addr, checkPriority, checkOffset) return addr, nil }
func (c *Canal) startSyncBinlog() error { pos := mysql.Position{c.master.Name, c.master.Position} log.Infof("start sync binlog at %v", pos) s, err := c.syncer.StartSync(pos) if err != nil { return errors.Errorf("start sync replication at %v error %v", pos, err) } timeout := time.Second forceSavePos := false for { ev, err := s.GetEventTimeout(timeout) if err != nil && err != replication.ErrGetEventTimeout { return errors.Trace(err) } else if err == replication.ErrGetEventTimeout { timeout = 2 * timeout continue } timeout = time.Second //next binlog pos pos.Pos = ev.Header.LogPos forceSavePos = false switch e := ev.Event.(type) { case *replication.RotateEvent: pos.Name = string(e.NextLogName) pos.Pos = uint32(e.Position) // r.ev <- pos forceSavePos = true log.Infof("rotate binlog to %v", pos) case *replication.RowsEvent: // we only focus row based event if err = c.handleRowsEvent(ev); err != nil { log.Errorf("handle rows event error %v", err) return errors.Trace(err) } case *replication.TableMapEvent: continue default: } c.master.Update(pos.Name, pos.Pos) c.master.Save(forceSavePos) } return nil }
func (d *Dumper) parseMetadataFile(meta string, w io.Writer) error { log.Infof("Parsing: %s", meta) if file, err := os.Open(meta); err != nil { return err } else { defer file.Close() scanner := bufio.NewScanner(file) binLogExp := regexp.MustCompile("\\s+Log:\\s+(.+)") binLogPosExp := regexp.MustCompile("\\s+Pos:\\s+(\\d+)") binLog := "" binLogPos := "" for scanner.Scan() { line := scanner.Text() if m := binLogExp.FindStringSubmatch(line); len(m) > 0 { binLog = m[1] } else if m := binLogPosExp.FindStringSubmatch(line); len(m) > 0 { binLogPos = m[1] } } if err = scanner.Err(); err != nil { return err } else { stmnt := fmt.Sprintf("CHANGE MASTER TO MASTER_LOG_FILE='%s', MASTER_LOG_POS=%s;\n", binLog, binLogPos) log.Debug(stmnt) _, err = fmt.Fprintf(w, stmnt) return err } } }
func (app *App) slaveof(masterAddr string, restart bool, readonly bool) error { app.m.Lock() defer app.m.Unlock() //in master mode and no slaveof, only set readonly if len(app.cfg.SlaveOf) == 0 && len(masterAddr) == 0 { app.cfg.SetReadonly(readonly) return nil } if !app.ldb.ReplicationUsed() { return fmt.Errorf("slaveof must enable replication") } app.cfg.SlaveOf = masterAddr if len(masterAddr) == 0 { log.Infof("slaveof no one, stop replication") if err := app.m.stopReplication(); err != nil { return err } app.cfg.SetReadonly(readonly) } else { return app.m.startReplication(masterAddr, restart) } return nil }
// Promote the slave to master, then let other slaves replicate from it func (g *Group) Promote(addr string) error { g.m.Lock() defer g.m.Unlock() node := g.Slaves[addr] if err := node.slaveof("no", "one"); err != nil { return err } delete(g.Slaves, addr) g.Master = node host, port, _ := net.SplitHostPort(addr) for _, slave := range g.Slaves { if err := slave.slaveof(host, port); err != nil { // if we go here, the replication topology may be wrong // so use fatal level and we should fix it manually log.Fatalf("slaveof %s to master %s err %v", slave.Addr, addr, err) } else { log.Infof("slaveof %s to master %s ok", slave.Addr, addr) } } return nil }
func (c *Canal) Close() { log.Infof("close canal") c.m.Lock() defer c.m.Unlock() if c.isClosed() { return } c.closed.Set(true) close(c.quit) c.connLock.Lock() c.conn.Close() c.conn = nil c.connLock.Unlock() if c.syncer != nil { c.syncer.Close() c.syncer = nil } c.master.Close() c.wg.Wait() }
func (r *River) Close() { log.Infof("closing river") close(r.quit) r.canal.Close() r.wg.Wait() }
func (d *Dumper) mysqldump(w io.Writer) error { log.Trace("mysqldump") args := make([]string, 0, 16) // Common args seps := strings.Split(d.Addr, ":") args = append(args, fmt.Sprintf("--host=%s", seps[0])) if len(seps) > 1 { args = append(args, fmt.Sprintf("--port=%s", seps[1])) } args = append(args, fmt.Sprintf("--user=%s", d.User)) args = append(args, fmt.Sprintf("--password=%s", d.Password)) args = append(args, "--master-data") args = append(args, "--single-transaction") args = append(args, "--skip-lock-tables") // Disable uncessary data args = append(args, "--compact") args = append(args, "--skip-opt") args = append(args, "--quick") // We only care about data args = append(args, "--no-create-info") // Multi row is easy for us to parse the data args = append(args, "--skip-extended-insert") for db, tables := range d.IgnoreTables { for _, table := range tables { args = append(args, fmt.Sprintf("--ignore-table=%s.%s", db, table)) } } if len(d.Tables) == 0 && len(d.Databases) == 0 { args = append(args, "--all-databases") } else if len(d.Tables) == 0 { args = append(args, "--databases") args = append(args, d.Databases...) } else { args = append(args, d.TableDB) args = append(args, d.Tables...) // If we only dump some tables, the dump data will not have database name // which makes us hard to parse, so here we add it manually. w.Write([]byte(fmt.Sprintf("USE `%s`;\n", d.TableDB))) } cmd := exec.Command(d.ExecutionPath, args...) cmd.Stderr = d.ErrOut cmd.Stdout = w log.Infof("Executing dump: %+v", cmd) return cmd.Run() }
func (g *Group) doRole() error { v, err := g.Master.doRole() if err != nil { return ErrNodeDown } // the first line is server type serverType, _ := redis.String(v[0], nil) if serverType != MasterType { log.Errorf("server %s is not master now", g.Master.Addr) return ErrNodeType } // second is master replication offset, g.Master.Offset, _ = redis.Int64(v[1], nil) // then slave list [host, port, offset] slaves, _ := redis.Values(v[2], nil) nodes := make(map[string]*Node, len(slaves)) for i := 0; i < len(slaves); i++ { ss, _ := redis.Strings(slaves[i], nil) var n Node n.Addr = fmt.Sprintf("%s:%s", ss[0], ss[1]) n.Offset, _ = strconv.ParseInt(fmt.Sprintf("%s", ss[2]), 10, 64) nodes[n.Addr] = &n } // we don't care slave add or remove too much, so only log for addr, _ := range nodes { if _, ok := g.Slaves[addr]; !ok { log.Infof("slave %s added", addr) } } for addr, slave := range g.Slaves { if _, ok := nodes[addr]; !ok { log.Infof("slave %s removed", addr) slave.close() } } g.Slaves = nodes return nil }
func (r *River) createIndex(idx string, settings map[string]interface{}) error { exists, err := r.es.IndexExists(idx).Do() if exists { log.Warnf("Index '%s' already exists; settings and mappings not updated", idx) return nil } log.Infof("Creating index with settings from %v: %v", idx, settings) _, err = r.es.CreateIndex(idx).BodyJson(settings).Do() return err }
func (d *Dumper) parseDumpFile(dump string, w io.Writer) error { log.Infof("Parsing: %s", dump) lastSlash := strings.LastIndex(dump, "/") + 1 database := strings.Split(dump[lastSlash:len(dump)], ".")[0] stmnt := fmt.Sprintf("CREATE DATABASE IF NOT EXISTS `%s`;\n\nUSE `%s`;\n", database, database) log.Debug(stmnt) if _, err := fmt.Fprintf(w, stmnt); err != nil { return err } else if file, err := os.Open(dump); err != nil { return err } else { defer file.Close() scanner := bufio.NewScanner(file) scanner.Buffer(make([]byte, 1024*1024), 1024*1024) insertExp := regexp.MustCompile("^INSERT INTO `.+` VALUES$") valuesExp := regexp.MustCompile("^\\(.+\\)[;,]$") n := 0 for scanner.Scan() { n = n + 1 if n%10000 == 0 { log.Infof("%d lines parsed ", n) } line := scanner.Text() if insertExp.FindString(line) != "" { stmnt := fmt.Sprintf("%s\n", line) _, err = w.Write([]byte(stmnt)) } else if valuesExp.FindString(line) != "" { stmnt := fmt.Sprintf("%s\n", line) _, err = w.Write([]byte(stmnt)) } if err != nil { log.Errorf("Failed after %d lines parsed due to %v: %v", n, err, line) return err } } log.Infof("Parsing completed with %d lines parsed", n) return scanner.Err() } }
func (app *App) removeSlave(c *client, activeQuit bool) { addr := c.slaveListeningAddr app.slock.Lock() defer app.slock.Unlock() if _, ok := app.slaves[addr]; ok { delete(app.slaves, addr) log.Infof("remove slave %s", addr) asyncNotifyUint64(app.slaveSyncAck, c.lastLogID.Get()) } }
func (a *App) setMasters(addrs []string) error { if a.cluster != nil { if a.cluster.IsLeader() { return a.cluster.SetMasters(addrs, 10*time.Second) } else { log.Infof("%s is not leader, skip", a.c.Addr) } } else { a.masters.SetMasters(addrs) } return nil }
// resolve paths like: // /zk/nyc/vt/tablets/*/action // /zk/global/vt/keyspaces/*/shards/*/action // /zk/*/vt/tablets/*/action // into real existing paths // // If you send paths that don't contain any wildcard and // don't exist, this function will return an empty array. func ResolveWildcards(zconn Conn, zkPaths []string) ([]string, error) { // check all the paths start with /zk/ before doing anything // time consuming // relax this in case we are not talking to a metaconn and // just want to talk to a specified instance. // for _, zkPath := range zkPaths { // if _, err := ZkCellFromZkPath(zkPath); err != nil { // return nil, err // } // } results := make([][]string, len(zkPaths)) wg := &sync.WaitGroup{} mu := &sync.Mutex{} var firstError error for i, zkPath := range zkPaths { wg.Add(1) parts := strings.Split(zkPath, "/") go func(i int) { defer wg.Done() subResult, err := resolveRecursive(zconn, parts, true) if err != nil { mu.Lock() if firstError != nil { log.Infof("Multiple error: %v", err) } else { firstError = err } mu.Unlock() } else { results[i] = subResult } }(i) } wg.Wait() if firstError != nil { return nil, firstError } result := make([]string, 0, 32) for i := 0; i < len(zkPaths); i++ { subResult := results[i] if subResult != nil { result = append(result, subResult...) } } return result, nil }
func (c *Canal) handleQueryEvent(e *replication.BinlogEvent) error { ev := e.Event.(*replication.QueryEvent) query, err := replication.ParseQuery(string(ev.Query)) log.Debugf("query parsed: %v, %v", query, err) if err == replication.ErrIgnored { return nil } else if err != nil { log.Infof("failed to parse: %v, %v", string(ev.Query), err) return nil } else { schema := string(ev.Schema) if query.Schema != "" { // Schema overridden in query schema = query.Schema } table, err := c.GetTable(schema, query.Table) if err == errTableIgnored { // ignore return nil } else if err != nil { return errors.Trace(err) } switch query.Operation { case replication.ADD: // Flush everything before changing schema c.flushEventHandlers() table.AddColumn(query.Column, query.Type, query.Extra) log.Infof("Adding new column %v %v to %v.%v", query.Column, query.Type, schema, query.Table) break case replication.MODIFY: case replication.DELETE: default: } return nil } }
func (c *Canal) tryDump() error { if len(c.master.Name) > 0 { // we will sync with binlog name and position log.Infof("Skip dump, use last binlog replication pos (%s, %d)", c.master.Name, c.master.Position) return nil } if c.dumper == nil { log.Errorf("Skip dump, no dumper provided") return nil } h := &dumpParseHandler{c: c} start := time.Now() log.Info("Start dump") if err := c.dumper.DumpAndParse(h); err != nil { return errors.Trace(err) } log.Infof("Dump completed in %0.2f seconds", time.Now().Sub(start).Seconds()) c.master.Update(h.name, uint32(h.pos)) c.master.Save(true) return nil }
func newZk(cfg *Config, fsm *masterFSM) (Cluster, error) { z := new(Zk) var err error if !strings.HasPrefix(cfg.Zk.BaseDir, "/zk") { return nil, fmt.Errorf("invalid zk base dir %s, must have prefix /zk", cfg.Zk.BaseDir) } addr := strings.Join(cfg.Zk.Addr, ",") if addr == "memory" { // only for test log.Infof("only for test, use memory") z.conn = zkhelper.NewConn() } else { z.conn, err = zkhelper.ConnectToZk(addr) } if err != nil { return nil, err } z.c = cfg z.fsm = fsm z.isLeader.Set(false) z.leaderCh = make(chan bool, 1) z.actionCh = make(chan *zkAction, 10) z.quit = make(chan struct{}) if _, err = zkhelper.CreateOrUpdate(z.conn, cfg.Zk.BaseDir, "", 0, zkhelper.DefaultDirACLs(), true); err != nil { log.Errorf("create %s error: %v", cfg.Zk.BaseDir, err) return nil, err } onRetryLock := func() { z.noticeLeaderCh(false) } z.elector = createElection(z.conn, cfg.Zk.BaseDir, cfg.Addr, onRetryLock) z.checkLeader() return z, nil }
func loadMasterInfo(name string) (*masterInfo, error) { var m masterInfo m.name = name f, err := os.Open(name) if err != nil && !os.IsNotExist(err) { return nil, errors.Trace(err) } else if os.IsNotExist(err) { log.Infof("No MasterInfo recorded at %v", name) return &m, nil } defer f.Close() _, err = toml.DecodeReader(f, &m) return &m, err }
func (s *stat) Run(addr string) { if len(addr) == 0 { return } log.Infof("run status http server %s", addr) var err error s.l, err = net.Listen("tcp", addr) if err != nil { log.Errorf("listen stat addr %s err %v", addr, err) return } srv := http.Server{} mux := http.NewServeMux() mux.Handle("/stat", s) srv.Handler = mux srv.Serve(s.l) }
func (z *Zk) handleAction(a *action) error { log.Infof("handle action %s, masters: %v", a.Cmd, a.Masters) m := z.fsm.Copy() m.handleAction(a) masters := m.GetMasters() data, _ := json.Marshal(masters) zkPath := fmt.Sprintf("%s/masters", z.c.Zk.BaseDir) _, err := z.conn.Set(zkPath, data, -1) if err != nil { return err } z.fsm.SetMasters(masters) return nil }
func NewApp(c *Config) (*App, error) { var err error a := new(App) a.c = c a.quit = make(chan struct{}) a.groups = make(map[string]*Group) a.masters = newMasterFSM() if c.MaxDownTime <= 0 { c.MaxDownTime = 3 } if a.c.CheckInterval <= 0 { a.c.CheckInterval = 1000 } if len(c.Addr) > 0 { a.l, err = net.Listen("tcp", c.Addr) if err != nil { return nil, err } } switch c.Broker { case "raft": a.cluster, err = newRaft(c, a.masters) case "zk": a.cluster, err = newZk(c, a.masters) default: log.Infof("unsupported broker %s, use no cluster", c.Broker) a.cluster = nil } if err != nil { return nil, err } return a, nil }
func (t *electorTask) Run() error { t.z.wg.Add(1) defer t.z.wg.Done() log.Infof("begin leader %s, run", t.z.c.Addr) if err := t.z.getMasters(); err != nil { t.interrupted.Set(true) log.Errorf("get masters err %v", err) return err } t.z.noticeLeaderCh(true) for { select { case <-t.z.quit: log.Info("zk close, interrupt elector running task") t.z.noticeLeaderCh(false) t.interrupted.Set(true) return nil case <-t.stop: log.Info("stop elector running task") return nil case a := <-t.z.actionCh: if a.timeout.Get() { log.Warnf("wait action %s masters %v timeout, skip it", a.a.Cmd, a.a.Masters) } else { err := t.z.handleAction(a.a) a.ch <- err } } } return nil }
func NewCanal(cfg *Config) (*Canal, error) { c := new(Canal) c.cfg = cfg c.closed.Set(false) c.quit = make(chan struct{}) os.MkdirAll(cfg.DataDir, 0755) c.dumpDoneCh = make(chan struct{}) c.rsHandlers = make([]RowsEventHandler, 0, 4) c.tables = make(map[string]*schema.Table) var err error if c.master, err = loadMasterInfo(c.masterInfoPath()); err != nil { return nil, err } else if len(c.master.Addr) != 0 && c.master.Addr != c.cfg.Addr { log.Infof("MySQL addr %s in old master.info, but new %s, reset", c.master.Addr, c.cfg.Addr) // may use another MySQL, reset c.master = &masterInfo{} } c.master.Addr = c.cfg.Addr if err := c.prepareDumper(); err != nil { return nil, err } if err = c.prepareSyncer(); err != nil { return nil, err } if err := c.checkBinlogRowFormat(); err != nil { return nil, err } return c, nil }
func TestParseQuery(t *testing.T) { variations := [...]string{ "ALTER TABLE t1 ADD c1 VARCHAR(256) DEFAULT", "alter table t1 add c1 varchar(256) default", "ALTER TABLE `t1` ADD `c1` VARCHAR(256) DEFAULT", } for _, v := range variations { q, err := ParseQuery(v) assert.NoError(t, err) log.Infof("query: %v", q) assert.Equal(t, "t1", q.Table) assert.Equal(t, AlterOp("ADD"), q.Operation) assert.Equal(t, "c1", q.Column) assert.Equal(t, "VARCHAR(256)", q.Type) assert.Equal(t, "DEFAULT", q.Extra) } _, err := ParseQuery("UPDATE TABLE t1 ADD c1 VARCHAR(256)") assert.Equal(t, ErrIgnored, err) q, err := ParseQuery("ALTER TABLE db1.t1 ADD c1 VARCHAR(256) DEFAULT") assert.NoError(t, err) assert.Equal(t, "db1", q.Schema) assert.Equal(t, "t1", q.Table) q, err = ParseQuery("ALTER TABLE `db1.t1` ADD c1 VARCHAR(256) DEFAULT") assert.NoError(t, err) assert.Equal(t, "", q.Schema) assert.Equal(t, "db1.t1", q.Table) // BUG: this doesn't work //q, err = ParseQuery("ALTER TABLE db1.`t1 2` ADD c1 VARCHAR(256) DEFAULT") //assert.NoError(t, err) //assert.Equal(t, "db1", q.Schema) //assert.Equal(t, "t1 2", q.Table) }
func (h *testRowsEventHandler) Do(e *RowsEvent) error { log.Infof("%s %v\n", e.Action, e.Rows) return nil }
// RunTask returns nil when the underlyingtask ends or the error it // generated. func (ze *zElector) RunTask(task *electorTask) error { leaderPath := path.Join(ze.path, "leader") for { _, err := zkhelper.CreateRecursive(ze.zconn, leaderPath, "", 0, zk.WorldACL(zkhelper.PERM_FILE)) if err == nil || zkhelper.ZkErrorEqual(err, zk.ErrNodeExists) { break } log.Warnf("election leader create failed: %v", err) time.Sleep(500 * time.Millisecond) } for { err := ze.Lock("RunTask") if err != nil { log.Warnf("election lock failed: %v", err) if err == zkhelper.ErrInterrupted { return zkhelper.ErrInterrupted } continue } // Confirm your win and deliver acceptance speech. This notifies // listeners who will have been watching the leader node for // changes. _, err = ze.zconn.Set(leaderPath, []byte(ze.contents), -1) if err != nil { log.Warnf("election promotion failed: %v", err) continue } log.Infof("election promote leader %v", leaderPath) taskErrChan := make(chan error) go func() { taskErrChan <- task.Run() }() watchLeader: // Watch the leader so we can get notified if something goes wrong. data, _, watch, err := ze.zconn.GetW(leaderPath) if err != nil { log.Warnf("election unable to watch leader node %v %v", leaderPath, err) // FIXME(msolo) Add delay goto watchLeader } if string(data) != ze.contents { log.Warnf("election unable to promote leader") task.Stop() // We won the election, but we didn't become the leader. How is that possible? // (see Bush v. Gore for some inspiration) // It means: // 1. Someone isn't playing by the election rules (a bad actor). // Hard to detect - let's assume we don't have this problem. :) // 2. We lost our connection somehow and the ephemeral lock was cleared, // allowing someone else to win the election. continue } // This is where we start our target process and watch for its failure. waitForEvent: select { case <-ze.interrupted: log.Warn("election interrupted - stop child process") task.Stop() // Once the process dies from the signal, this will all tear down. goto waitForEvent case taskErr := <-taskErrChan: // If our code fails, unlock to trigger an election. log.Infof("election child process ended: %v", taskErr) ze.Unlock() if task.Interrupted() { log.Warnf("election child process interrupted - stepping down") return zkhelper.ErrInterrupted } continue case zevent := <-watch: // We had a zk connection hiccup. We have a few choices, // but it depends on the constraints and the events. // // If we get SESSION_EXPIRED our connection loss triggered an // election that we won't have won and the thus the lock was // automatically freed. We have no choice but to start over. if zevent.State == zk.StateExpired { log.Warnf("election leader watch expired") task.Stop() continue } // Otherwise, we had an intermittent issue or something touched // the node. Either we lost our position or someone broke // protocol and touched the leader node. We just reconnect and // revalidate. In the meantime, assume we are still the leader // until we determine otherwise. // // On a reconnect we will be able to see the leader // information. If we still hold the position, great. If not, we // kill the associated process. // // On a leader node change, we need to perform the same // validation. It's possible an election completes without the // old leader realizing he is out of touch. log.Warnf("election leader watch event %v", zevent) goto watchLeader } } panic("unreachable") }
func (c *Canal) startSyncBinlog() error { pos := mysql.Position{c.master.Name, c.master.Position} log.Infof("Start sync'ing binlog from %v", pos) s, err := c.syncer.StartSync(pos) if err != nil { return errors.Errorf("Failed starting sync at %v: %v", pos, err) } originalTimeout := time.Second timeout := originalTimeout forceSavePos := false for { ev, err := s.GetEventTimeout(timeout) if err != nil && err != replication.ErrGetEventTimeout { return errors.Trace(err) } else if err == replication.ErrGetEventTimeout { if timeout == 2*originalTimeout { log.Debugf("Flushing event handlers since sync has gone idle") if err := c.flushEventHandlers(); err != nil { log.Warnf("Error occurred during flush: %v", err) } } timeout = 2 * timeout continue } timeout = time.Second //next binlog pos pos.Pos = ev.Header.LogPos forceSavePos = false log.Debugf("Syncing %v", ev) switch e := ev.Event.(type) { case *replication.RotateEvent: c.flushEventHandlers() pos.Name = string(e.NextLogName) pos.Pos = uint32(e.Position) // r.ev <- pos forceSavePos = true log.Infof("Rotate binlog to %v", pos) case *replication.RowsEvent: // we only focus row based event if err = c.handleRowsEvent(ev); err != nil { log.Errorf("Error handling rows event: %v", err) return errors.Trace(err) } case *replication.QueryEvent: if err = c.handleQueryEvent(ev); err != nil { log.Errorf("Error handling rows event: %v", err) return errors.Trace(err) } default: log.Debugf("Ignored event: %+v", e) } c.master.Update(pos.Name, pos.Pos) c.master.Save(forceSavePos) } return nil }
// LockWithTimeout returns nil when the lock is acquired. A lock is // held if the file exists and you are the creator. Setting the wait // to zero makes this a nonblocking lock check. // // FIXME(msolo) Disallow non-super users from removing the lock? func (zm *zMutex) LockWithTimeout(wait time.Duration, desc string) (err error) { timer := time.NewTimer(wait) defer func() { if panicErr := recover(); panicErr != nil || err != nil { zm.deleteLock() } }() // Ensure the rendezvous node is here. // FIXME(msolo) Assuming locks are contended, it will be cheaper to assume this just // exists. _, err = zkhelper.CreateRecursive(zm.zconn, zm.path, "", 0, zk.WorldACL(zkhelper.PERM_DIRECTORY)) if err != nil && !zkhelper.ZkErrorEqual(err, zk.ErrNodeExists) { return err } lockPrefix := path.Join(zm.path, "lock-") zflags := zk.FlagSequence if zm.ephemeral { zflags = zflags | zk.FlagEphemeral } // update node content var lockContent map[string]interface{} err = json.Unmarshal([]byte(zm.contents), &lockContent) if err != nil { return err } lockContent["desc"] = desc newContent, err := json.Marshal(lockContent) if err != nil { return err } createlock: lockCreated, err := zm.zconn.Create(lockPrefix, newContent, int32(zflags), zk.WorldACL(zkhelper.PERM_FILE)) if err != nil { return err } name := path.Base(lockCreated) zm.mu.Lock() zm.name = name zm.mu.Unlock() trylock: children, _, err := zm.zconn.Children(zm.path) if err != nil { return fmt.Errorf("zkutil: trylock failed %v", err) } sort.Strings(children) if len(children) == 0 { return fmt.Errorf("zkutil: empty lock: %v", zm.path) } if children[0] == name { // We are the lock owner. return nil } if zm.onRetryLock != nil { zm.onRetryLock() } // This is the degenerate case of a nonblocking lock check. It's not optimal, but // also probably not worth optimizing. if wait == 0 { return zkhelper.ErrTimeout } prevLock := "" for i := 1; i < len(children); i++ { if children[i] == name { prevLock = children[i-1] break } } if prevLock == "" { // This is an interesting case. The node disappeared // underneath us, probably due to a session loss. We can // recreate the lock node (with a new sequence number) and // keep trying. log.Warnf("zkutil: no lock node found: %v/%v", zm.path, zm.name) goto createlock } zkPrevLock := path.Join(zm.path, prevLock) exist, stat, watch, err := zm.zconn.ExistsW(zkPrevLock) if err != nil { // FIXME(msolo) Should this be a retry? return fmt.Errorf("zkutil: unable to watch previous lock node %v %v", zkPrevLock, err) } if stat == nil || !exist { goto trylock } select { case <-timer.C: return zkhelper.ErrTimeout case <-zm.interrupted: return zkhelper.ErrInterrupted case event := <-watch: log.Infof("zkutil: lock event: %v", event) // The precise event doesn't matter - try to read again regardless. goto trylock } panic("unexpected") }