func (c *Canal) tryDump() error {
	if len(c.master.Name) > 0 && c.master.Position > 0 {
		// we will sync with binlog name and position
		log.Infof("skip dump, use last binlog replication pos (%s, %d)", c.master.Name, c.master.Position)
		return nil
	}

	if c.dumper == nil {
		log.Info("skip dump, no mysqldump")
		return nil
	}

	h := &dumpParseHandler{c: c}

	start := time.Now()
	log.Info("try dump MySQL and parse")
	if err := c.dumper.DumpAndParse(h); err != nil {
		return err
	}

	log.Infof("dump MySQL and parse OK, use %0.2f seconds, start binlog replication at (%s, %d)",
		time.Now().Sub(start).Seconds(), h.name, h.pos)

	c.master.Update(h.name, uint32(h.pos))
	c.master.Save(true)

	return nil
}
func readIndexFile(configDir string, rule *config.Rule) ([]byte, error) {
	if rule.IndexFile != "" {
		// Index file explicitly specified. Fail if not found.
		path := rule.IndexFile
		if !strings.HasPrefix(rule.IndexFile, "/") {
			// indexFile is relative to config dir
			path = configDir + "/" + rule.IndexFile
		}
		log.Infof("Using index setting from %s", path)
		return ioutil.ReadFile(path)
	} else {
		var path string
		// No index file specified. Read file if default (<cfdDir>/<idx>.idx.josn) exists
		// strip trailing -[0-9]+ so indexes with version suffixes match a base settings file
		if m := regexp.MustCompile("(.+)-[0-9]+").FindStringSubmatch(rule.Index); len(m) == 0 {
			path = configDir + "/" + rule.Index + ".idx.json"
		} else {
			path = configDir + "/" + m[0] + ".idx.json"
		}
		data, err := ioutil.ReadFile(path)
		if os.IsNotExist(err) {
			return nil, nil
		} else {
			log.Infof("Using index settings from %s", path)
			return data, err
		}
	}
}
Exemple #3
0
// Elect a best slave which has the most up-to-date data with master
func (g *Group) Elect() (string, error) {
	g.m.Lock()
	defer g.m.Unlock()

	var addr string
	var checkOffset int64 = 0
	var checkPriority int = 0

	for _, slave := range g.Slaves {
		m, err := slave.doRelpInfo()
		if err != nil {
			log.Infof("slave %s get replication info err %v, skip it", slave.Addr, err)
			continue
		}

		if m["slave"] == MasterType {
			log.Errorf("server %s is not slave now, skip it", slave.Addr)
			continue
		}

		if m["master_link_status"] == "up" {
			log.Infof("slave %s master_link_status is up, master %s may be not down???",
				slave.Addr, g.Master.Addr)
			return "", ErrNodeAlive
		}

		priority, _ := strconv.Atoi(m["slave_priority"])
		replOffset, _ := strconv.ParseInt(m["slave_repl_offset"], 10, 64)

		used := false
		// like redis-sentinel, first check priority, then salve repl offset
		if checkPriority < priority {
			used = true
		} else if checkPriority == priority {
			if checkOffset < replOffset {
				used = true
			}
		}

		if used {
			addr = slave.Addr
			checkPriority = priority
			checkOffset = replOffset
		}
	}

	if len(addr) == 0 {
		log.Errorf("no proper candidate to be promoted")
		return "", ErrNoCandidate
	}

	log.Infof("select slave %s as new master, priority:%d, repl_offset:%d", addr, checkPriority, checkOffset)

	return addr, nil
}
Exemple #4
0
func (c *Canal) startSyncBinlog() error {
	pos := mysql.Position{c.master.Name, c.master.Position}

	log.Infof("start sync binlog at %v", pos)

	s, err := c.syncer.StartSync(pos)
	if err != nil {
		return errors.Errorf("start sync replication at %v error %v", pos, err)
	}

	timeout := time.Second
	forceSavePos := false
	for {
		ev, err := s.GetEventTimeout(timeout)
		if err != nil && err != replication.ErrGetEventTimeout {
			return errors.Trace(err)
		} else if err == replication.ErrGetEventTimeout {
			timeout = 2 * timeout
			continue
		}

		timeout = time.Second

		//next binlog pos
		pos.Pos = ev.Header.LogPos

		forceSavePos = false

		switch e := ev.Event.(type) {
		case *replication.RotateEvent:
			pos.Name = string(e.NextLogName)
			pos.Pos = uint32(e.Position)
			// r.ev <- pos
			forceSavePos = true
			log.Infof("rotate binlog to %v", pos)
		case *replication.RowsEvent:
			// we only focus row based event
			if err = c.handleRowsEvent(ev); err != nil {
				log.Errorf("handle rows event error %v", err)
				return errors.Trace(err)
			}
		case *replication.TableMapEvent:
			continue
		default:
		}

		c.master.Update(pos.Name, pos.Pos)
		c.master.Save(forceSavePos)
	}

	return nil
}
Exemple #5
0
func (d *Dumper) parseMetadataFile(meta string, w io.Writer) error {
	log.Infof("Parsing: %s", meta)
	if file, err := os.Open(meta); err != nil {
		return err
	} else {
		defer file.Close()

		scanner := bufio.NewScanner(file)

		binLogExp := regexp.MustCompile("\\s+Log:\\s+(.+)")
		binLogPosExp := regexp.MustCompile("\\s+Pos:\\s+(\\d+)")

		binLog := ""
		binLogPos := ""

		for scanner.Scan() {
			line := scanner.Text()
			if m := binLogExp.FindStringSubmatch(line); len(m) > 0 {
				binLog = m[1]
			} else if m := binLogPosExp.FindStringSubmatch(line); len(m) > 0 {
				binLogPos = m[1]
			}
		}

		if err = scanner.Err(); err != nil {
			return err
		} else {
			stmnt := fmt.Sprintf("CHANGE MASTER TO MASTER_LOG_FILE='%s', MASTER_LOG_POS=%s;\n", binLog, binLogPos)
			log.Debug(stmnt)
			_, err = fmt.Fprintf(w, stmnt)
			return err
		}
	}
}
Exemple #6
0
func (app *App) slaveof(masterAddr string, restart bool, readonly bool) error {
	app.m.Lock()
	defer app.m.Unlock()

	//in master mode and no slaveof, only set readonly
	if len(app.cfg.SlaveOf) == 0 && len(masterAddr) == 0 {
		app.cfg.SetReadonly(readonly)
		return nil
	}

	if !app.ldb.ReplicationUsed() {
		return fmt.Errorf("slaveof must enable replication")
	}

	app.cfg.SlaveOf = masterAddr

	if len(masterAddr) == 0 {
		log.Infof("slaveof no one, stop replication")
		if err := app.m.stopReplication(); err != nil {
			return err
		}

		app.cfg.SetReadonly(readonly)
	} else {
		return app.m.startReplication(masterAddr, restart)
	}

	return nil
}
Exemple #7
0
// Promote the slave to master, then let other slaves replicate from it
func (g *Group) Promote(addr string) error {
	g.m.Lock()
	defer g.m.Unlock()

	node := g.Slaves[addr]

	if err := node.slaveof("no", "one"); err != nil {
		return err
	}

	delete(g.Slaves, addr)

	g.Master = node

	host, port, _ := net.SplitHostPort(addr)
	for _, slave := range g.Slaves {
		if err := slave.slaveof(host, port); err != nil {
			// if we go here, the replication topology may be wrong
			// so use fatal level and we should fix it manually
			log.Fatalf("slaveof %s to master %s err %v", slave.Addr, addr, err)
		} else {
			log.Infof("slaveof %s to master %s ok", slave.Addr, addr)
		}
	}

	return nil
}
func (c *Canal) Close() {
	log.Infof("close canal")

	c.m.Lock()
	defer c.m.Unlock()

	if c.isClosed() {
		return
	}

	c.closed.Set(true)

	close(c.quit)

	c.connLock.Lock()
	c.conn.Close()
	c.conn = nil
	c.connLock.Unlock()

	if c.syncer != nil {
		c.syncer.Close()
		c.syncer = nil
	}

	c.master.Close()

	c.wg.Wait()
}
func (r *River) Close() {
	log.Infof("closing river")
	close(r.quit)

	r.canal.Close()

	r.wg.Wait()
}
Exemple #10
0
func (d *Dumper) mysqldump(w io.Writer) error {
	log.Trace("mysqldump")
	args := make([]string, 0, 16)

	// Common args
	seps := strings.Split(d.Addr, ":")
	args = append(args, fmt.Sprintf("--host=%s", seps[0]))
	if len(seps) > 1 {
		args = append(args, fmt.Sprintf("--port=%s", seps[1]))
	}

	args = append(args, fmt.Sprintf("--user=%s", d.User))
	args = append(args, fmt.Sprintf("--password=%s", d.Password))

	args = append(args, "--master-data")
	args = append(args, "--single-transaction")
	args = append(args, "--skip-lock-tables")

	// Disable uncessary data
	args = append(args, "--compact")
	args = append(args, "--skip-opt")
	args = append(args, "--quick")

	// We only care about data
	args = append(args, "--no-create-info")

	// Multi row is easy for us to parse the data
	args = append(args, "--skip-extended-insert")

	for db, tables := range d.IgnoreTables {
		for _, table := range tables {
			args = append(args, fmt.Sprintf("--ignore-table=%s.%s", db, table))
		}
	}

	if len(d.Tables) == 0 && len(d.Databases) == 0 {
		args = append(args, "--all-databases")
	} else if len(d.Tables) == 0 {
		args = append(args, "--databases")
		args = append(args, d.Databases...)
	} else {
		args = append(args, d.TableDB)
		args = append(args, d.Tables...)

		// If we only dump some tables, the dump data will not have database name
		// which makes us hard to parse, so here we add it manually.

		w.Write([]byte(fmt.Sprintf("USE `%s`;\n", d.TableDB)))
	}

	cmd := exec.Command(d.ExecutionPath, args...)

	cmd.Stderr = d.ErrOut
	cmd.Stdout = w
	log.Infof("Executing dump: %+v", cmd)

	return cmd.Run()
}
Exemple #11
0
func (g *Group) doRole() error {
	v, err := g.Master.doRole()
	if err != nil {
		return ErrNodeDown
	}

	// the first line is server type
	serverType, _ := redis.String(v[0], nil)
	if serverType != MasterType {
		log.Errorf("server %s is not master now", g.Master.Addr)
		return ErrNodeType
	}

	// second is master replication offset,
	g.Master.Offset, _ = redis.Int64(v[1], nil)

	// then slave list [host, port, offset]
	slaves, _ := redis.Values(v[2], nil)
	nodes := make(map[string]*Node, len(slaves))
	for i := 0; i < len(slaves); i++ {
		ss, _ := redis.Strings(slaves[i], nil)
		var n Node
		n.Addr = fmt.Sprintf("%s:%s", ss[0], ss[1])
		n.Offset, _ = strconv.ParseInt(fmt.Sprintf("%s", ss[2]), 10, 64)
		nodes[n.Addr] = &n
	}

	// we don't care slave add or remove too much, so only log
	for addr, _ := range nodes {
		if _, ok := g.Slaves[addr]; !ok {
			log.Infof("slave %s added", addr)
		}
	}

	for addr, slave := range g.Slaves {
		if _, ok := nodes[addr]; !ok {
			log.Infof("slave %s removed", addr)
			slave.close()
		}
	}

	g.Slaves = nodes
	return nil
}
func (r *River) createIndex(idx string, settings map[string]interface{}) error {
	exists, err := r.es.IndexExists(idx).Do()
	if exists {
		log.Warnf("Index '%s' already exists; settings and mappings not updated", idx)
		return nil
	}
	log.Infof("Creating index with settings from %v: %v", idx, settings)
	_, err = r.es.CreateIndex(idx).BodyJson(settings).Do()
	return err
}
Exemple #13
0
func (d *Dumper) parseDumpFile(dump string, w io.Writer) error {
	log.Infof("Parsing: %s", dump)
	lastSlash := strings.LastIndex(dump, "/") + 1
	database := strings.Split(dump[lastSlash:len(dump)], ".")[0]
	stmnt := fmt.Sprintf("CREATE DATABASE IF NOT EXISTS `%s`;\n\nUSE `%s`;\n", database, database)
	log.Debug(stmnt)
	if _, err := fmt.Fprintf(w, stmnt); err != nil {
		return err
	} else if file, err := os.Open(dump); err != nil {
		return err
	} else {
		defer file.Close()
		scanner := bufio.NewScanner(file)
		scanner.Buffer(make([]byte, 1024*1024), 1024*1024)
		insertExp := regexp.MustCompile("^INSERT INTO `.+` VALUES$")
		valuesExp := regexp.MustCompile("^\\(.+\\)[;,]$")

		n := 0

		for scanner.Scan() {
			n = n + 1
			if n%10000 == 0 {
				log.Infof("%d lines parsed ", n)
			}
			line := scanner.Text()
			if insertExp.FindString(line) != "" {
				stmnt := fmt.Sprintf("%s\n", line)
				_, err = w.Write([]byte(stmnt))
			} else if valuesExp.FindString(line) != "" {
				stmnt := fmt.Sprintf("%s\n", line)
				_, err = w.Write([]byte(stmnt))
			}
			if err != nil {
				log.Errorf("Failed after %d lines parsed due to %v: %v", n, err, line)
				return err
			}
		}
		log.Infof("Parsing completed with %d lines parsed", n)
		return scanner.Err()
	}
}
Exemple #14
0
func (app *App) removeSlave(c *client, activeQuit bool) {
	addr := c.slaveListeningAddr

	app.slock.Lock()
	defer app.slock.Unlock()

	if _, ok := app.slaves[addr]; ok {
		delete(app.slaves, addr)
		log.Infof("remove slave %s", addr)
		asyncNotifyUint64(app.slaveSyncAck, c.lastLogID.Get())
	}
}
Exemple #15
0
func (a *App) setMasters(addrs []string) error {
	if a.cluster != nil {
		if a.cluster.IsLeader() {
			return a.cluster.SetMasters(addrs, 10*time.Second)
		} else {
			log.Infof("%s is not leader, skip", a.c.Addr)
		}
	} else {
		a.masters.SetMasters(addrs)
	}
	return nil
}
Exemple #16
0
// resolve paths like:
// /zk/nyc/vt/tablets/*/action
// /zk/global/vt/keyspaces/*/shards/*/action
// /zk/*/vt/tablets/*/action
// into real existing paths
//
// If you send paths that don't contain any wildcard and
// don't exist, this function will return an empty array.
func ResolveWildcards(zconn Conn, zkPaths []string) ([]string, error) {
	// check all the paths start with /zk/ before doing anything
	// time consuming
	// relax this in case we are not talking to a metaconn and
	// just want to talk to a specified instance.
	// for _, zkPath := range zkPaths {
	// 	if _, err := ZkCellFromZkPath(zkPath); err != nil {
	// 		return nil, err
	// 	}
	// }

	results := make([][]string, len(zkPaths))
	wg := &sync.WaitGroup{}
	mu := &sync.Mutex{}
	var firstError error

	for i, zkPath := range zkPaths {
		wg.Add(1)
		parts := strings.Split(zkPath, "/")
		go func(i int) {
			defer wg.Done()
			subResult, err := resolveRecursive(zconn, parts, true)
			if err != nil {
				mu.Lock()
				if firstError != nil {
					log.Infof("Multiple error: %v", err)
				} else {
					firstError = err
				}
				mu.Unlock()
			} else {
				results[i] = subResult
			}
		}(i)
	}

	wg.Wait()
	if firstError != nil {
		return nil, firstError
	}

	result := make([]string, 0, 32)
	for i := 0; i < len(zkPaths); i++ {
		subResult := results[i]
		if subResult != nil {
			result = append(result, subResult...)
		}
	}

	return result, nil
}
Exemple #17
0
func (c *Canal) handleQueryEvent(e *replication.BinlogEvent) error {
	ev := e.Event.(*replication.QueryEvent)
	query, err := replication.ParseQuery(string(ev.Query))
	log.Debugf("query parsed: %v, %v", query, err)
	if err == replication.ErrIgnored {
		return nil
	} else if err != nil {
		log.Infof("failed to parse: %v, %v", string(ev.Query), err)
		return nil
	} else {
		schema := string(ev.Schema)
		if query.Schema != "" {
			// Schema overridden in query
			schema = query.Schema
		}
		table, err := c.GetTable(schema, query.Table)
		if err == errTableIgnored {
			// ignore
			return nil
		} else if err != nil {
			return errors.Trace(err)
		}

		switch query.Operation {
		case replication.ADD:
			// Flush everything before changing schema
			c.flushEventHandlers()
			table.AddColumn(query.Column, query.Type, query.Extra)
			log.Infof("Adding new column %v %v to %v.%v", query.Column, query.Type, schema, query.Table)
			break
		case replication.MODIFY:
		case replication.DELETE:
		default:
		}
		return nil
	}
}
Exemple #18
0
func (c *Canal) tryDump() error {
	if len(c.master.Name) > 0 {
		// we will sync with binlog name and position
		log.Infof("Skip dump, use last binlog replication pos (%s, %d)", c.master.Name, c.master.Position)
		return nil
	}
	if c.dumper == nil {
		log.Errorf("Skip dump, no dumper provided")
		return nil
	}

	h := &dumpParseHandler{c: c}
	start := time.Now()
	log.Info("Start dump")
	if err := c.dumper.DumpAndParse(h); err != nil {
		return errors.Trace(err)
	}

	log.Infof("Dump completed in %0.2f seconds", time.Now().Sub(start).Seconds())

	c.master.Update(h.name, uint32(h.pos))
	c.master.Save(true)
	return nil
}
Exemple #19
0
func newZk(cfg *Config, fsm *masterFSM) (Cluster, error) {
	z := new(Zk)

	var err error

	if !strings.HasPrefix(cfg.Zk.BaseDir, "/zk") {
		return nil, fmt.Errorf("invalid zk base dir %s, must have prefix /zk", cfg.Zk.BaseDir)
	}

	addr := strings.Join(cfg.Zk.Addr, ",")
	if addr == "memory" {
		// only for test
		log.Infof("only for test, use memory")
		z.conn = zkhelper.NewConn()
	} else {
		z.conn, err = zkhelper.ConnectToZk(addr)
	}

	if err != nil {
		return nil, err
	}

	z.c = cfg
	z.fsm = fsm
	z.isLeader.Set(false)
	z.leaderCh = make(chan bool, 1)
	z.actionCh = make(chan *zkAction, 10)

	z.quit = make(chan struct{})

	if _, err = zkhelper.CreateOrUpdate(z.conn, cfg.Zk.BaseDir, "", 0, zkhelper.DefaultDirACLs(), true); err != nil {
		log.Errorf("create %s error: %v", cfg.Zk.BaseDir, err)
		return nil, err
	}

	onRetryLock := func() {
		z.noticeLeaderCh(false)
	}

	z.elector = createElection(z.conn, cfg.Zk.BaseDir, cfg.Addr, onRetryLock)

	z.checkLeader()

	return z, nil
}
Exemple #20
0
func loadMasterInfo(name string) (*masterInfo, error) {
	var m masterInfo

	m.name = name

	f, err := os.Open(name)
	if err != nil && !os.IsNotExist(err) {
		return nil, errors.Trace(err)
	} else if os.IsNotExist(err) {
		log.Infof("No MasterInfo recorded at %v", name)
		return &m, nil
	}
	defer f.Close()

	_, err = toml.DecodeReader(f, &m)

	return &m, err
}
func (s *stat) Run(addr string) {
	if len(addr) == 0 {
		return
	}
	log.Infof("run status http server %s", addr)
	var err error
	s.l, err = net.Listen("tcp", addr)
	if err != nil {
		log.Errorf("listen stat addr %s err %v", addr, err)
		return
	}

	srv := http.Server{}
	mux := http.NewServeMux()
	mux.Handle("/stat", s)
	srv.Handler = mux

	srv.Serve(s.l)
}
Exemple #22
0
func (z *Zk) handleAction(a *action) error {
	log.Infof("handle action %s, masters: %v", a.Cmd, a.Masters)

	m := z.fsm.Copy()

	m.handleAction(a)

	masters := m.GetMasters()
	data, _ := json.Marshal(masters)

	zkPath := fmt.Sprintf("%s/masters", z.c.Zk.BaseDir)

	_, err := z.conn.Set(zkPath, data, -1)
	if err != nil {
		return err
	}

	z.fsm.SetMasters(masters)
	return nil
}
Exemple #23
0
func NewApp(c *Config) (*App, error) {
	var err error

	a := new(App)
	a.c = c
	a.quit = make(chan struct{})
	a.groups = make(map[string]*Group)

	a.masters = newMasterFSM()

	if c.MaxDownTime <= 0 {
		c.MaxDownTime = 3
	}

	if a.c.CheckInterval <= 0 {
		a.c.CheckInterval = 1000
	}

	if len(c.Addr) > 0 {
		a.l, err = net.Listen("tcp", c.Addr)
		if err != nil {
			return nil, err
		}
	}

	switch c.Broker {
	case "raft":
		a.cluster, err = newRaft(c, a.masters)
	case "zk":
		a.cluster, err = newZk(c, a.masters)
	default:
		log.Infof("unsupported broker %s, use no cluster", c.Broker)
		a.cluster = nil
	}

	if err != nil {
		return nil, err
	}

	return a, nil
}
Exemple #24
0
func (t *electorTask) Run() error {
	t.z.wg.Add(1)
	defer t.z.wg.Done()

	log.Infof("begin leader %s, run", t.z.c.Addr)

	if err := t.z.getMasters(); err != nil {
		t.interrupted.Set(true)

		log.Errorf("get masters err %v", err)
		return err
	}

	t.z.noticeLeaderCh(true)

	for {
		select {
		case <-t.z.quit:
			log.Info("zk close, interrupt elector running task")

			t.z.noticeLeaderCh(false)

			t.interrupted.Set(true)
			return nil
		case <-t.stop:
			log.Info("stop elector running task")
			return nil
		case a := <-t.z.actionCh:
			if a.timeout.Get() {
				log.Warnf("wait action %s masters %v timeout, skip it", a.a.Cmd, a.a.Masters)
			} else {
				err := t.z.handleAction(a.a)

				a.ch <- err
			}
		}
	}

	return nil
}
func NewCanal(cfg *Config) (*Canal, error) {
	c := new(Canal)
	c.cfg = cfg
	c.closed.Set(false)
	c.quit = make(chan struct{})

	os.MkdirAll(cfg.DataDir, 0755)

	c.dumpDoneCh = make(chan struct{})
	c.rsHandlers = make([]RowsEventHandler, 0, 4)
	c.tables = make(map[string]*schema.Table)

	var err error
	if c.master, err = loadMasterInfo(c.masterInfoPath()); err != nil {
		return nil, err
	} else if len(c.master.Addr) != 0 && c.master.Addr != c.cfg.Addr {
		log.Infof("MySQL addr %s in old master.info, but new %s, reset", c.master.Addr, c.cfg.Addr)
		// may use another MySQL, reset
		c.master = &masterInfo{}
	}

	c.master.Addr = c.cfg.Addr

	if err := c.prepareDumper(); err != nil {
		return nil, err
	}

	if err = c.prepareSyncer(); err != nil {
		return nil, err
	}

	if err := c.checkBinlogRowFormat(); err != nil {
		return nil, err
	}

	return c, nil
}
Exemple #26
0
func TestParseQuery(t *testing.T) {
	variations := [...]string{
		"ALTER TABLE t1 ADD c1 VARCHAR(256) DEFAULT",
		"alter table t1 add c1 varchar(256) default",
		"ALTER TABLE `t1` ADD `c1` VARCHAR(256) DEFAULT",
	}
	for _, v := range variations {
		q, err := ParseQuery(v)
		assert.NoError(t, err)
		log.Infof("query: %v", q)
		assert.Equal(t, "t1", q.Table)
		assert.Equal(t, AlterOp("ADD"), q.Operation)
		assert.Equal(t, "c1", q.Column)
		assert.Equal(t, "VARCHAR(256)", q.Type)
		assert.Equal(t, "DEFAULT", q.Extra)
	}

	_, err := ParseQuery("UPDATE TABLE t1 ADD c1 VARCHAR(256)")
	assert.Equal(t, ErrIgnored, err)

	q, err := ParseQuery("ALTER TABLE db1.t1 ADD c1 VARCHAR(256) DEFAULT")
	assert.NoError(t, err)
	assert.Equal(t, "db1", q.Schema)
	assert.Equal(t, "t1", q.Table)

	q, err = ParseQuery("ALTER TABLE `db1.t1` ADD c1 VARCHAR(256) DEFAULT")
	assert.NoError(t, err)
	assert.Equal(t, "", q.Schema)
	assert.Equal(t, "db1.t1", q.Table)

	// BUG: this doesn't work
	//q, err = ParseQuery("ALTER TABLE db1.`t1 2` ADD c1 VARCHAR(256) DEFAULT")
	//assert.NoError(t, err)
	//assert.Equal(t, "db1", q.Schema)
	//assert.Equal(t, "t1 2", q.Table)
}
func (h *testRowsEventHandler) Do(e *RowsEvent) error {
	log.Infof("%s %v\n", e.Action, e.Rows)
	return nil
}
Exemple #28
0
// RunTask returns nil when the underlyingtask ends or the error it
// generated.
func (ze *zElector) RunTask(task *electorTask) error {
	leaderPath := path.Join(ze.path, "leader")
	for {
		_, err := zkhelper.CreateRecursive(ze.zconn, leaderPath, "", 0, zk.WorldACL(zkhelper.PERM_FILE))
		if err == nil || zkhelper.ZkErrorEqual(err, zk.ErrNodeExists) {
			break
		}
		log.Warnf("election leader create failed: %v", err)
		time.Sleep(500 * time.Millisecond)
	}

	for {
		err := ze.Lock("RunTask")
		if err != nil {
			log.Warnf("election lock failed: %v", err)
			if err == zkhelper.ErrInterrupted {
				return zkhelper.ErrInterrupted
			}
			continue
		}
		// Confirm your win and deliver acceptance speech. This notifies
		// listeners who will have been watching the leader node for
		// changes.
		_, err = ze.zconn.Set(leaderPath, []byte(ze.contents), -1)
		if err != nil {
			log.Warnf("election promotion failed: %v", err)
			continue
		}

		log.Infof("election promote leader %v", leaderPath)
		taskErrChan := make(chan error)
		go func() {
			taskErrChan <- task.Run()
		}()

	watchLeader:
		// Watch the leader so we can get notified if something goes wrong.
		data, _, watch, err := ze.zconn.GetW(leaderPath)
		if err != nil {
			log.Warnf("election unable to watch leader node %v %v", leaderPath, err)
			// FIXME(msolo) Add delay
			goto watchLeader
		}

		if string(data) != ze.contents {
			log.Warnf("election unable to promote leader")
			task.Stop()
			// We won the election, but we didn't become the leader. How is that possible?
			// (see Bush v. Gore for some inspiration)
			// It means:
			//   1. Someone isn't playing by the election rules (a bad actor).
			//      Hard to detect - let's assume we don't have this problem. :)
			//   2. We lost our connection somehow and the ephemeral lock was cleared,
			//      allowing someone else to win the election.
			continue
		}

		// This is where we start our target process and watch for its failure.
	waitForEvent:
		select {
		case <-ze.interrupted:
			log.Warn("election interrupted - stop child process")
			task.Stop()
			// Once the process dies from the signal, this will all tear down.
			goto waitForEvent
		case taskErr := <-taskErrChan:
			// If our code fails, unlock to trigger an election.
			log.Infof("election child process ended: %v", taskErr)
			ze.Unlock()
			if task.Interrupted() {
				log.Warnf("election child process interrupted - stepping down")
				return zkhelper.ErrInterrupted
			}
			continue
		case zevent := <-watch:
			// We had a zk connection hiccup.  We have a few choices,
			// but it depends on the constraints and the events.
			//
			// If we get SESSION_EXPIRED our connection loss triggered an
			// election that we won't have won and the thus the lock was
			// automatically freed. We have no choice but to start over.
			if zevent.State == zk.StateExpired {
				log.Warnf("election leader watch expired")
				task.Stop()
				continue
			}

			// Otherwise, we had an intermittent issue or something touched
			// the node. Either we lost our position or someone broke
			// protocol and touched the leader node.  We just reconnect and
			// revalidate. In the meantime, assume we are still the leader
			// until we determine otherwise.
			//
			// On a reconnect we will be able to see the leader
			// information. If we still hold the position, great. If not, we
			// kill the associated process.
			//
			// On a leader node change, we need to perform the same
			// validation. It's possible an election completes without the
			// old leader realizing he is out of touch.
			log.Warnf("election leader watch event %v", zevent)
			goto watchLeader
		}
	}
	panic("unreachable")
}
Exemple #29
0
func (c *Canal) startSyncBinlog() error {
	pos := mysql.Position{c.master.Name, c.master.Position}
	log.Infof("Start sync'ing binlog from %v", pos)
	s, err := c.syncer.StartSync(pos)
	if err != nil {
		return errors.Errorf("Failed starting sync at %v: %v", pos, err)
	}

	originalTimeout := time.Second
	timeout := originalTimeout
	forceSavePos := false
	for {
		ev, err := s.GetEventTimeout(timeout)
		if err != nil && err != replication.ErrGetEventTimeout {
			return errors.Trace(err)
		} else if err == replication.ErrGetEventTimeout {
			if timeout == 2*originalTimeout {
				log.Debugf("Flushing event handlers since sync has gone idle")
				if err := c.flushEventHandlers(); err != nil {
					log.Warnf("Error occurred during flush: %v", err)
				}
			}
			timeout = 2 * timeout
			continue
		}

		timeout = time.Second

		//next binlog pos
		pos.Pos = ev.Header.LogPos

		forceSavePos = false

		log.Debugf("Syncing %v", ev)
		switch e := ev.Event.(type) {
		case *replication.RotateEvent:
			c.flushEventHandlers()
			pos.Name = string(e.NextLogName)
			pos.Pos = uint32(e.Position)
			// r.ev <- pos
			forceSavePos = true
			log.Infof("Rotate binlog to %v", pos)
		case *replication.RowsEvent:
			// we only focus row based event
			if err = c.handleRowsEvent(ev); err != nil {
				log.Errorf("Error handling rows event: %v", err)
				return errors.Trace(err)
			}
		case *replication.QueryEvent:
			if err = c.handleQueryEvent(ev); err != nil {
				log.Errorf("Error handling rows event: %v", err)
				return errors.Trace(err)
			}
		default:
			log.Debugf("Ignored event: %+v", e)
		}
		c.master.Update(pos.Name, pos.Pos)
		c.master.Save(forceSavePos)
	}

	return nil
}
Exemple #30
0
// LockWithTimeout returns nil when the lock is acquired. A lock is
// held if the file exists and you are the creator. Setting the wait
// to zero makes this a nonblocking lock check.
//
// FIXME(msolo) Disallow non-super users from removing the lock?
func (zm *zMutex) LockWithTimeout(wait time.Duration, desc string) (err error) {
	timer := time.NewTimer(wait)
	defer func() {
		if panicErr := recover(); panicErr != nil || err != nil {
			zm.deleteLock()
		}
	}()
	// Ensure the rendezvous node is here.
	// FIXME(msolo) Assuming locks are contended, it will be cheaper to assume this just
	// exists.
	_, err = zkhelper.CreateRecursive(zm.zconn, zm.path, "", 0, zk.WorldACL(zkhelper.PERM_DIRECTORY))
	if err != nil && !zkhelper.ZkErrorEqual(err, zk.ErrNodeExists) {
		return err
	}

	lockPrefix := path.Join(zm.path, "lock-")
	zflags := zk.FlagSequence
	if zm.ephemeral {
		zflags = zflags | zk.FlagEphemeral
	}

	// update node content
	var lockContent map[string]interface{}
	err = json.Unmarshal([]byte(zm.contents), &lockContent)
	if err != nil {
		return err
	}
	lockContent["desc"] = desc
	newContent, err := json.Marshal(lockContent)
	if err != nil {
		return err
	}

createlock:
	lockCreated, err := zm.zconn.Create(lockPrefix, newContent, int32(zflags), zk.WorldACL(zkhelper.PERM_FILE))
	if err != nil {
		return err
	}
	name := path.Base(lockCreated)
	zm.mu.Lock()
	zm.name = name
	zm.mu.Unlock()

trylock:
	children, _, err := zm.zconn.Children(zm.path)
	if err != nil {
		return fmt.Errorf("zkutil: trylock failed %v", err)
	}
	sort.Strings(children)
	if len(children) == 0 {
		return fmt.Errorf("zkutil: empty lock: %v", zm.path)
	}

	if children[0] == name {
		// We are the lock owner.
		return nil
	}

	if zm.onRetryLock != nil {
		zm.onRetryLock()
	}

	// This is the degenerate case of a nonblocking lock check. It's not optimal, but
	// also probably not worth optimizing.
	if wait == 0 {
		return zkhelper.ErrTimeout
	}
	prevLock := ""
	for i := 1; i < len(children); i++ {
		if children[i] == name {
			prevLock = children[i-1]
			break
		}
	}
	if prevLock == "" {
		// This is an interesting case. The node disappeared
		// underneath us, probably due to a session loss. We can
		// recreate the lock node (with a new sequence number) and
		// keep trying.
		log.Warnf("zkutil: no lock node found: %v/%v", zm.path, zm.name)
		goto createlock
	}

	zkPrevLock := path.Join(zm.path, prevLock)
	exist, stat, watch, err := zm.zconn.ExistsW(zkPrevLock)
	if err != nil {
		// FIXME(msolo) Should this be a retry?
		return fmt.Errorf("zkutil: unable to watch previous lock node %v %v", zkPrevLock, err)
	}
	if stat == nil || !exist {
		goto trylock
	}
	select {
	case <-timer.C:
		return zkhelper.ErrTimeout
	case <-zm.interrupted:
		return zkhelper.ErrInterrupted
	case event := <-watch:
		log.Infof("zkutil: lock event: %v", event)
		// The precise event doesn't matter - try to read again regardless.
		goto trylock
	}
	panic("unexpected")
}