Пример #1
0
func (n *Node) doCommand(cmd string, args ...interface{}) (interface{}, error) {
	var err error
	var v interface{}
	for i := 0; i < 3; i++ {
		if n.conn == nil {
			n.conn, err = redis.DialTimeout("tcp", n.Addr, 5*time.Second, 0, 0)
			if err != nil {
				log.Errorf("dial %s error: %v, try again", n.Addr, err)
				continue
			}

		}

		v, err = n.conn.Do(cmd, args...)
		if err != nil {
			log.Errorf("do %s command for %s error: %v, try again", cmd, n.Addr, err)
			n.conn.Close()
			n.conn = nil
			continue
		} else {
			return v, nil
		}
	}

	// go here means do command error, maybe redis is down.
	return nil, err
}
Пример #2
0
func newTableReader(base string, index int64, useMmap bool) (*tableReader, error) {
	if index <= 0 {
		return nil, fmt.Errorf("invalid index %d", index)
	}
	t := new(tableReader)
	t.base = base
	t.index = index

	t.useMmap = useMmap

	var err error

	if err = t.check(); err != nil {
		log.Errorf("check %d error: %s, try to repair", t.index, err.Error())

		if err = t.repair(); err != nil {
			log.Errorf("repair %d error: %s", t.index, err.Error())
			return nil, err
		}
	}

	t.close()

	return t, nil
}
Пример #3
0
func (s *snapshotStore) checkSnapshots() error {
	cfg := s.cfg
	snapshots, err := ioutil.ReadDir(cfg.Snapshot.Path)
	if err != nil {
		log.Errorf("read %s error: %s", cfg.Snapshot.Path, err.Error())
		return err
	}

	names := []string{}
	for _, info := range snapshots {
		if path.Ext(info.Name()) == ".tmp" {
			log.Errorf("temp snapshot file name %s, try remove", info.Name())
			os.Remove(path.Join(cfg.Snapshot.Path, info.Name()))
			continue
		}

		if _, err := parseSnapshotName(info.Name()); err != nil {
			log.Errorf("invalid snapshot file name %s, err: %s", info.Name(), err.Error())
			continue
		}

		names = append(names, info.Name())
	}

	//from old to new
	sort.Strings(names)

	s.names = names

	s.purge(false)

	return nil
}
Пример #4
0
func (m *master) fullSync() error {
	log.Info("begin full sync")

	if err := m.conn.Send("fullsync"); err != nil {
		return err
	}

	m.state.Set(replSyncState)

	dumpPath := path.Join(m.app.cfg.DataDir, "master.dump")
	f, err := os.OpenFile(dumpPath, os.O_CREATE|os.O_WRONLY, 0644)
	if err != nil {
		return err
	}

	defer os.Remove(dumpPath)

	err = m.conn.ReceiveBulkTo(f)
	f.Close()
	if err != nil {
		log.Errorf("read dump data error %s", err.Error())
		return err
	}

	if _, err = m.app.ldb.LoadDumpFile(dumpPath); err != nil {
		log.Errorf("load dump file error %s", err.Error())
		return err
	}

	return nil
}
Пример #5
0
// Submit submits the current batch of actions in bulk and resets Count to 0.
func (b *Bulker) Submit() error {
	size := b.bulker.NumberOfActions()
	if size == 0 {
		return nil
	}
	b.LastResponse, b.LastError = b.bulker.Do()
	if b.LastError != nil {
		log.Errorf("Bulk update %d/%d failed due to %v: %+v", size, b.MaxActions, b.LastError, b.LastResponse)
		return b.LastError
	}
	if b.LastResponse.Errors {
		var buffer bytes.Buffer
		failed := b.LastResponse.Failed()
		count := len(failed)
		buffer.WriteString(fmt.Sprintf("%v actions failed in bulk update:\n", count))
		for i, er := range failed {
			buffer.WriteString(fmt.Sprintf("\t%v:%v\n", er, er.Error))
			if i == 2 {
				if count > 3 {
					buffer.WriteString(fmt.Sprintf("\t...\n"))
				}
				break
			}
		}
		log.Errorf(buffer.String())
		// show bulk errors but continue
		//b.LastError = errors.Errorf("%v actions failed during bulk update", count)
	} else {
		log.Debugf("Bulk update %d/%d succeeded", size, b.MaxActions)
	}
	return b.LastError
}
Пример #6
0
// Elect a best slave which has the most up-to-date data with master
func (g *Group) Elect() (string, error) {
	g.m.Lock()
	defer g.m.Unlock()

	var addr string
	var checkOffset int64 = 0
	var checkPriority int = 0

	for _, slave := range g.Slaves {
		m, err := slave.doRelpInfo()
		if err != nil {
			log.Infof("slave %s get replication info err %v, skip it", slave.Addr, err)
			continue
		}

		if m["slave"] == MasterType {
			log.Errorf("server %s is not slave now, skip it", slave.Addr)
			continue
		}

		if m["master_link_status"] == "up" {
			log.Infof("slave %s master_link_status is up, master %s may be not down???",
				slave.Addr, g.Master.Addr)
			return "", ErrNodeAlive
		}

		priority, _ := strconv.Atoi(m["slave_priority"])
		replOffset, _ := strconv.ParseInt(m["slave_repl_offset"], 10, 64)

		used := false
		// like redis-sentinel, first check priority, then salve repl offset
		if checkPriority < priority {
			used = true
		} else if checkPriority == priority {
			if checkOffset < replOffset {
				used = true
			}
		}

		if used {
			addr = slave.Addr
			checkPriority = priority
			checkOffset = replOffset
		}
	}

	if len(addr) == 0 {
		log.Errorf("no proper candidate to be promoted")
		return "", ErrNoCandidate
	}

	log.Infof("select slave %s as new master, priority:%d, repl_offset:%d", addr, checkPriority, checkOffset)

	return addr, nil
}
Пример #7
0
func (s *FileStore) purgeTableReaders(purges []*tableReader) {
	for _, r := range purges {
		dataName := fmtTableDataName(r.base, r.index)
		metaName := fmtTableMetaName(r.base, r.index)
		r.Close()
		if err := os.Remove(dataName); err != nil {
			log.Errorf("purge table data %s err: %s", dataName, err.Error())
		}
		if err := os.Remove(metaName); err != nil {
			log.Errorf("purge table meta %s err: %s", metaName, err.Error())
		}

	}
}
Пример #8
0
func (c *Canal) flushEventHandlers() error {
	c.rsLock.Lock()
	defer c.rsLock.Unlock()

	var err error
	for _, h := range c.rsHandlers {
		if err = h.Complete(); err != nil && err != ErrHandleInterrupted {
			log.Errorf("Complete %v err: %v", h, err)
		} else if err == ErrHandleInterrupted {
			log.Errorf("Complete %v err, interrupted", h)
			return ErrHandleInterrupted
		}
	}
	return nil
}
Пример #9
0
func (s *FileStore) Close() error {
	close(s.quit)

	s.wm.Lock()
	s.rm.Lock()

	if r, err := s.w.Flush(); err != nil {
		if err != errNilHandler {
			log.Errorf("close err: %s", err.Error())
		}
	} else {
		r.Close()
		s.w.Close()
	}

	for i := range s.rs {
		s.rs[i].Close()
	}

	s.rs = tableReaders{}

	s.rm.Unlock()
	s.wm.Unlock()

	return nil
}
Пример #10
0
func (h *rowsEventHandler) Do(e *canal.RowsEvent) error {
	rule, ok := h.r.rules[ruleKey(e.Table.Schema, e.Table.Name)]
	if !ok {
		return nil
	}

	var reqs []*elastic.BulkRequest
	var err error
	switch e.Action {
	case canal.InsertAction:
		reqs, err = h.r.makeInsertRequest(rule, e.Rows)
	case canal.DeleteAction:
		reqs, err = h.r.makeDeleteRequest(rule, e.Rows)
	case canal.UpdateAction:
		reqs, err = h.r.makeUpdateRequest(rule, e.Rows)
	default:
		return errors.Errorf("invalid rows action %s", e.Action)
	}

	if err != nil {
		return errors.Errorf("make %s ES request err %v", e.Action, err)
	}

	if err := h.r.doBulk(reqs); err != nil {
		log.Errorf("do ES bulks err %v, stop", err)
		return canal.ErrHandleInterrupted
	}

	return nil
}
Пример #11
0
func (s *FileStore) load() error {
	fs, err := ioutil.ReadDir(s.base)
	if err != nil {
		return err
	}

	s.rs = make(tableReaders, 0, len(fs))

	var r *tableReader
	var index int64
	for _, f := range fs {
		if _, err := fmt.Sscanf(f.Name(), "%08d.data", &index); err == nil {
			if r, err = newTableReader(s.base, index, s.cfg.Replication.UseMmap); err != nil {
				log.Errorf("load table %s err: %s", f.Name(), err.Error())
			} else {
				s.rs = append(s.rs, r)
			}
		}
	}

	if err := s.rs.check(); err != nil {
		return err
	}

	return nil
}
Пример #12
0
func (c *Canal) travelRowsEventHandler(e *RowsEvent) error {
	c.rsLock.Lock()
	defer c.rsLock.Unlock()

	var err error
	for _, h := range c.rsHandlers {
		if err = h.Do(e); err != nil && err != ErrHandleInterrupted {
			log.Errorf("handle %v err: %v", h, err)
		} else if err == ErrHandleInterrupted {
			log.Errorf("handle %v err, interrupted", h)
			return ErrHandleInterrupted
		}

	}
	return nil
}
Пример #13
0
func (c *Canal) run() error {
	defer c.wg.Done()

	if err := c.tryDump(); err != nil {
		log.Errorf("Dump failed due to: %v", err)
		return err
	}
	close(c.dumpDoneCh)
	if err := c.startSyncBinlog(); err != nil {
		if !c.isClosed() {
			log.Errorf("Sync failed due to: %v", err)
		}
		return errors.Trace(err)
	}
	return nil
}
Пример #14
0
func (m *mmapReadFile) Close() error {
	if m.m != nil {
		if err := m.m.Unmap(); err != nil {
			log.Errorf("unmap %s error %s", m.name, err.Error())
		}
		m.m = nil
	}

	if m.f != nil {
		if err := m.f.Close(); err != nil {
			log.Errorf("close %s error %s", m.name, err.Error())
		}
		m.f = nil
	}

	return nil
}
Пример #15
0
func (r *River) Run() error {
	if err := r.canal.Start(); err != nil {
		log.Errorf("start canal err %v", err)
		return err
	}

	return nil
}
Пример #16
0
func (c *Canal) run() error {
	defer c.wg.Done()

	if err := c.tryDump(); err != nil {
		log.Errorf("canal dump mysql err: %v", err)
		return err
	}

	close(c.dumpDoneCh)

	if err := c.startSyncBinlog(); err != nil {
		if !c.isClosed() {
			log.Errorf("canal start sync binlog err: %v", err)
		}
		return err
	}

	return nil
}
Пример #17
0
func (h *dumpParseHandler) Data(db string, table string, values []string) error {
	if h.c.isClosed() {
		return errCanalClosed
	}

	tableInfo, err := h.c.GetTable(db, table)
	if err != nil {
		log.Errorf("get %s.%s information err: %v", db, table, err)
		return errors.Trace(err)
	}

	vs := make([]interface{}, len(values))
	log.Debugf("Handling Data: %v", values)
	for i, v := range values {
		if v == "NULL" {
			vs[i] = nil
		} else if firstChar := v[0]; firstChar == '\'' || firstChar == '"' {
			vs[i] = v[1 : len(v)-1]
		} else {
			if tableInfo.Columns[i].Type == schema.TYPE_NUMBER {
				n, err := strconv.ParseInt(v, 10, 64)
				if err != nil {
					log.Errorf("parse row %v at %d error %v, skip", values, i, err)
					return dump.ErrSkip
				}
				vs[i] = n
			} else if tableInfo.Columns[i].Type == schema.TYPE_FLOAT {
				f, err := strconv.ParseFloat(v, 64)
				if err != nil {
					log.Errorf("parse row %v at %d error %v, skip", values, i, err)
					return dump.ErrSkip
				}
				vs[i] = f
			} else {
				log.Errorf("parse row %v at %d err: invalid type %v for value %v, skip", values, i, tableInfo.Columns[i].Type, v)
				return dump.ErrSkip
			}
		}
	}

	events := newRowsEvent(tableInfo, InsertAction, [][]interface{}{vs})
	return h.c.travelRowsEventHandler(events)
}
Пример #18
0
func (r *Replication) run() {
	defer r.wg.Done()

	syncTc := time.NewTicker(1 * time.Second)
	purgeTc := time.NewTicker(1 * time.Hour)

	for {
		select {
		case <-purgeTc.C:
			n := (r.cfg.Replication.ExpiredLogDays * 24 * 3600)
			r.m.Lock()
			err := r.s.PurgeExpired(int64(n))
			r.m.Unlock()
			if err != nil {
				log.Errorf("purge expired log error %s", err.Error())
			}
		case <-syncTc.C:
			if r.cfg.Replication.SyncLog == 1 {
				r.m.Lock()
				err := r.s.Sync()
				r.m.Unlock()
				if err != nil {
					log.Errorf("sync store error %s", err.Error())
				}
			}
			if r.cfg.Replication.SyncLog != 2 {
				//we will sync commit id every 1 second
				r.m.Lock()
				err := r.updateCommitID(r.commitID, true)
				r.m.Unlock()

				if err != nil {
					log.Errorf("sync commitid error %s", err.Error())
				}
			}
		case <-r.quit:
			syncTc.Stop()
			purgeTc.Stop()
			return
		}
	}
}
Пример #19
0
func (fsm *masterFSM) Apply(l *raft.Log) interface{} {
	var a action
	if err := json.Unmarshal(l.Data, &a); err != nil {
		log.Errorf("decode raft log err %v", err)
		return err
	}

	fsm.handleAction(&a)

	return nil
}
Пример #20
0
func (r *River) doBulk(reqs []*elastic.BulkRequest) error {
	if len(reqs) == 0 {
		return nil
	}

	if resp, err := r.es.Bulk(reqs); err != nil {
		log.Errorf("sync docs err %v after binlog %s", err, r.canal.SyncedPosition())
		return errors.Trace(err)
	} else if resp.Errors {
		for i := 0; i < len(resp.Items); i++ {
			for action, item := range resp.Items[i] {
				if len(item.Error) > 0 {
					log.Errorf("%s index: %s, type: %s, id: %s, status: %d, error: %s",
						action, item.Index, item.Type, item.ID, item.Status, item.Error)
				}
			}
		}
	}

	return nil
}
Пример #21
0
func (c *Canal) startSyncBinlog() error {
	pos := mysql.Position{c.master.Name, c.master.Position}

	log.Infof("start sync binlog at %v", pos)

	s, err := c.syncer.StartSync(pos)
	if err != nil {
		return errors.Errorf("start sync replication at %v error %v", pos, err)
	}

	timeout := time.Second
	forceSavePos := false
	for {
		ev, err := s.GetEventTimeout(timeout)
		if err != nil && err != replication.ErrGetEventTimeout {
			return errors.Trace(err)
		} else if err == replication.ErrGetEventTimeout {
			timeout = 2 * timeout
			continue
		}

		timeout = time.Second

		//next binlog pos
		pos.Pos = ev.Header.LogPos

		forceSavePos = false

		switch e := ev.Event.(type) {
		case *replication.RotateEvent:
			pos.Name = string(e.NextLogName)
			pos.Pos = uint32(e.Position)
			// r.ev <- pos
			forceSavePos = true
			log.Infof("rotate binlog to %v", pos)
		case *replication.RowsEvent:
			// we only focus row based event
			if err = c.handleRowsEvent(ev); err != nil {
				log.Errorf("handle rows event error %v", err)
				return errors.Trace(err)
			}
		case *replication.TableMapEvent:
			continue
		default:
		}

		c.master.Update(pos.Name, pos.Pos)
		c.master.Save(forceSavePos)
	}

	return nil
}
Пример #22
0
func (s *syncer) Do(e *canal.RowsEvent) error {
	if !s.ignoreEvent(e) {
		actions, err := Convert(s.rules, e)
		if err == nil {
			err = s.bulker.Add(actions)
		}
		if err != nil {
			log.Errorf("Handler failing due to %v", err)
			return canal.ErrHandleInterrupted
		}
	}
	return nil
}
Пример #23
0
func (z *Zk) checkLeader() {
	task := new(electorTask)
	task.z = z
	task.interrupted.Set(false)
	task.stop = make(chan struct{})

	go func() {
		err := z.elector.RunTask(task)
		if err != nil {
			log.Errorf("run elector task err: %v", err)
		}
	}()
}
Пример #24
0
func (a *App) onAfterFailover(downMaster string, newMaster string) error {
	a.hMutex.Lock()
	defer a.hMutex.Unlock()

	for _, h := range a.afterHandlers {
		if err := h(downMaster, newMaster); err != nil {
			log.Errorf("do after failover handler for %s -> %s err: %v", downMaster, newMaster, err)
			if err == ErrGiveupFailover {
				return ErrGiveupFailover
			}
		}
	}

	return nil
}
Пример #25
0
func (w *httpWriter) genericWrite(result interface{}) {

	m := map[string]interface{}{
		w.cmd: result,
	}
	switch w.contentType {
	case "json":
		writeJSON(&m, w.w)
	case "bson":
		writeBSON(&m, w.w)
	case "msgpack":
		writeMsgPack(&m, w.w)
	default:
		log.Errorf("invalid content type %s", w.contentType)
	}
}
Пример #26
0
func (t *tableWriter) storeLog(l *Log) error {
	if l.ID == 0 {
		return ErrStoreLogID
	}

	if t.closed {
		return fmt.Errorf("table writer is closed")
	}

	if t.last > 0 && l.ID != t.last+1 {
		return ErrStoreLogID
	}

	if t.data != nil && t.data.Offset() > t.maxLogSize {
		return errTableNeedFlush
	}

	var err error
	if err = t.openFile(); err != nil {
		return err
	}

	offsetPos := t.data.Offset()
	if err = l.Encode(t.data); err != nil {
		return err
	}

	binary.BigEndian.PutUint32(t.posBuf, uint32(offsetPos))
	if _, err = t.meta.Write(t.posBuf); err != nil {
		return err
	}

	if t.first == 0 {
		t.first = l.ID
	}

	t.last = l.ID
	t.lastTime = l.CreateTime

	if t.syncType == 2 {
		if err := t.data.Sync(); err != nil {
			log.Errorf("sync table error %s", err.Error())
		}
	}

	return nil
}
Пример #27
0
func (s *snapshotStore) run() {
	t := time.NewTicker(60 * time.Minute)
	defer t.Stop()

	for {
		select {
		case <-t.C:
			s.Lock()
			if err := s.checkSnapshots(); err != nil {
				log.Errorf("check snapshots error %s", err.Error())
			}
			s.Unlock()
		case <-s.quit:
			return
		}
	}
}
Пример #28
0
func newZk(cfg *Config, fsm *masterFSM) (Cluster, error) {
	z := new(Zk)

	var err error

	if !strings.HasPrefix(cfg.Zk.BaseDir, "/zk") {
		return nil, fmt.Errorf("invalid zk base dir %s, must have prefix /zk", cfg.Zk.BaseDir)
	}

	addr := strings.Join(cfg.Zk.Addr, ",")
	if addr == "memory" {
		// only for test
		log.Infof("only for test, use memory")
		z.conn = zkhelper.NewConn()
	} else {
		z.conn, err = zkhelper.ConnectToZk(addr)
	}

	if err != nil {
		return nil, err
	}

	z.c = cfg
	z.fsm = fsm
	z.isLeader.Set(false)
	z.leaderCh = make(chan bool, 1)
	z.actionCh = make(chan *zkAction, 10)

	z.quit = make(chan struct{})

	if _, err = zkhelper.CreateOrUpdate(z.conn, cfg.Zk.BaseDir, "", 0, zkhelper.DefaultDirACLs(), true); err != nil {
		log.Errorf("create %s error: %v", cfg.Zk.BaseDir, err)
		return nil, err
	}

	onRetryLock := func() {
		z.noticeLeaderCh(false)
	}

	z.elector = createElection(z.conn, cfg.Zk.BaseDir, cfg.Addr, onRetryLock)

	z.checkLeader()

	return z, nil
}
Пример #29
0
func (g *Group) doRole() error {
	v, err := g.Master.doRole()
	if err != nil {
		return ErrNodeDown
	}

	// the first line is server type
	serverType, _ := redis.String(v[0], nil)
	if serverType != MasterType {
		log.Errorf("server %s is not master now", g.Master.Addr)
		return ErrNodeType
	}

	// second is master replication offset,
	g.Master.Offset, _ = redis.Int64(v[1], nil)

	// then slave list [host, port, offset]
	slaves, _ := redis.Values(v[2], nil)
	nodes := make(map[string]*Node, len(slaves))
	for i := 0; i < len(slaves); i++ {
		ss, _ := redis.Strings(slaves[i], nil)
		var n Node
		n.Addr = fmt.Sprintf("%s:%s", ss[0], ss[1])
		n.Offset, _ = strconv.ParseInt(fmt.Sprintf("%s", ss[2]), 10, 64)
		nodes[n.Addr] = &n
	}

	// we don't care slave add or remove too much, so only log
	for addr, _ := range nodes {
		if _, ok := g.Slaves[addr]; !ok {
			log.Infof("slave %s added", addr)
		}
	}

	for addr, slave := range g.Slaves {
		if _, ok := nodes[addr]; !ok {
			log.Infof("slave %s removed", addr)
			slave.close()
		}
	}

	g.Slaves = nodes
	return nil
}
Пример #30
0
func (s *stat) Run(addr string) {
	if len(addr) == 0 {
		return
	}
	log.Infof("run status http server %s", addr)
	var err error
	s.l, err = net.Listen("tcp", addr)
	if err != nil {
		log.Errorf("listen stat addr %s err %v", addr, err)
		return
	}

	srv := http.Server{}
	mux := http.NewServeMux()
	mux.Handle("/stat", s)
	srv.Handler = mux

	srv.Serve(s.l)
}