func (n *Node) doCommand(cmd string, args ...interface{}) (interface{}, error) { var err error var v interface{} for i := 0; i < 3; i++ { if n.conn == nil { n.conn, err = redis.DialTimeout("tcp", n.Addr, 5*time.Second, 0, 0) if err != nil { log.Errorf("dial %s error: %v, try again", n.Addr, err) continue } } v, err = n.conn.Do(cmd, args...) if err != nil { log.Errorf("do %s command for %s error: %v, try again", cmd, n.Addr, err) n.conn.Close() n.conn = nil continue } else { return v, nil } } // go here means do command error, maybe redis is down. return nil, err }
func newTableReader(base string, index int64, useMmap bool) (*tableReader, error) { if index <= 0 { return nil, fmt.Errorf("invalid index %d", index) } t := new(tableReader) t.base = base t.index = index t.useMmap = useMmap var err error if err = t.check(); err != nil { log.Errorf("check %d error: %s, try to repair", t.index, err.Error()) if err = t.repair(); err != nil { log.Errorf("repair %d error: %s", t.index, err.Error()) return nil, err } } t.close() return t, nil }
func (s *snapshotStore) checkSnapshots() error { cfg := s.cfg snapshots, err := ioutil.ReadDir(cfg.Snapshot.Path) if err != nil { log.Errorf("read %s error: %s", cfg.Snapshot.Path, err.Error()) return err } names := []string{} for _, info := range snapshots { if path.Ext(info.Name()) == ".tmp" { log.Errorf("temp snapshot file name %s, try remove", info.Name()) os.Remove(path.Join(cfg.Snapshot.Path, info.Name())) continue } if _, err := parseSnapshotName(info.Name()); err != nil { log.Errorf("invalid snapshot file name %s, err: %s", info.Name(), err.Error()) continue } names = append(names, info.Name()) } //from old to new sort.Strings(names) s.names = names s.purge(false) return nil }
func (m *master) fullSync() error { log.Info("begin full sync") if err := m.conn.Send("fullsync"); err != nil { return err } m.state.Set(replSyncState) dumpPath := path.Join(m.app.cfg.DataDir, "master.dump") f, err := os.OpenFile(dumpPath, os.O_CREATE|os.O_WRONLY, 0644) if err != nil { return err } defer os.Remove(dumpPath) err = m.conn.ReceiveBulkTo(f) f.Close() if err != nil { log.Errorf("read dump data error %s", err.Error()) return err } if _, err = m.app.ldb.LoadDumpFile(dumpPath); err != nil { log.Errorf("load dump file error %s", err.Error()) return err } return nil }
// Submit submits the current batch of actions in bulk and resets Count to 0. func (b *Bulker) Submit() error { size := b.bulker.NumberOfActions() if size == 0 { return nil } b.LastResponse, b.LastError = b.bulker.Do() if b.LastError != nil { log.Errorf("Bulk update %d/%d failed due to %v: %+v", size, b.MaxActions, b.LastError, b.LastResponse) return b.LastError } if b.LastResponse.Errors { var buffer bytes.Buffer failed := b.LastResponse.Failed() count := len(failed) buffer.WriteString(fmt.Sprintf("%v actions failed in bulk update:\n", count)) for i, er := range failed { buffer.WriteString(fmt.Sprintf("\t%v:%v\n", er, er.Error)) if i == 2 { if count > 3 { buffer.WriteString(fmt.Sprintf("\t...\n")) } break } } log.Errorf(buffer.String()) // show bulk errors but continue //b.LastError = errors.Errorf("%v actions failed during bulk update", count) } else { log.Debugf("Bulk update %d/%d succeeded", size, b.MaxActions) } return b.LastError }
// Elect a best slave which has the most up-to-date data with master func (g *Group) Elect() (string, error) { g.m.Lock() defer g.m.Unlock() var addr string var checkOffset int64 = 0 var checkPriority int = 0 for _, slave := range g.Slaves { m, err := slave.doRelpInfo() if err != nil { log.Infof("slave %s get replication info err %v, skip it", slave.Addr, err) continue } if m["slave"] == MasterType { log.Errorf("server %s is not slave now, skip it", slave.Addr) continue } if m["master_link_status"] == "up" { log.Infof("slave %s master_link_status is up, master %s may be not down???", slave.Addr, g.Master.Addr) return "", ErrNodeAlive } priority, _ := strconv.Atoi(m["slave_priority"]) replOffset, _ := strconv.ParseInt(m["slave_repl_offset"], 10, 64) used := false // like redis-sentinel, first check priority, then salve repl offset if checkPriority < priority { used = true } else if checkPriority == priority { if checkOffset < replOffset { used = true } } if used { addr = slave.Addr checkPriority = priority checkOffset = replOffset } } if len(addr) == 0 { log.Errorf("no proper candidate to be promoted") return "", ErrNoCandidate } log.Infof("select slave %s as new master, priority:%d, repl_offset:%d", addr, checkPriority, checkOffset) return addr, nil }
func (s *FileStore) purgeTableReaders(purges []*tableReader) { for _, r := range purges { dataName := fmtTableDataName(r.base, r.index) metaName := fmtTableMetaName(r.base, r.index) r.Close() if err := os.Remove(dataName); err != nil { log.Errorf("purge table data %s err: %s", dataName, err.Error()) } if err := os.Remove(metaName); err != nil { log.Errorf("purge table meta %s err: %s", metaName, err.Error()) } } }
func (c *Canal) flushEventHandlers() error { c.rsLock.Lock() defer c.rsLock.Unlock() var err error for _, h := range c.rsHandlers { if err = h.Complete(); err != nil && err != ErrHandleInterrupted { log.Errorf("Complete %v err: %v", h, err) } else if err == ErrHandleInterrupted { log.Errorf("Complete %v err, interrupted", h) return ErrHandleInterrupted } } return nil }
func (s *FileStore) Close() error { close(s.quit) s.wm.Lock() s.rm.Lock() if r, err := s.w.Flush(); err != nil { if err != errNilHandler { log.Errorf("close err: %s", err.Error()) } } else { r.Close() s.w.Close() } for i := range s.rs { s.rs[i].Close() } s.rs = tableReaders{} s.rm.Unlock() s.wm.Unlock() return nil }
func (h *rowsEventHandler) Do(e *canal.RowsEvent) error { rule, ok := h.r.rules[ruleKey(e.Table.Schema, e.Table.Name)] if !ok { return nil } var reqs []*elastic.BulkRequest var err error switch e.Action { case canal.InsertAction: reqs, err = h.r.makeInsertRequest(rule, e.Rows) case canal.DeleteAction: reqs, err = h.r.makeDeleteRequest(rule, e.Rows) case canal.UpdateAction: reqs, err = h.r.makeUpdateRequest(rule, e.Rows) default: return errors.Errorf("invalid rows action %s", e.Action) } if err != nil { return errors.Errorf("make %s ES request err %v", e.Action, err) } if err := h.r.doBulk(reqs); err != nil { log.Errorf("do ES bulks err %v, stop", err) return canal.ErrHandleInterrupted } return nil }
func (s *FileStore) load() error { fs, err := ioutil.ReadDir(s.base) if err != nil { return err } s.rs = make(tableReaders, 0, len(fs)) var r *tableReader var index int64 for _, f := range fs { if _, err := fmt.Sscanf(f.Name(), "%08d.data", &index); err == nil { if r, err = newTableReader(s.base, index, s.cfg.Replication.UseMmap); err != nil { log.Errorf("load table %s err: %s", f.Name(), err.Error()) } else { s.rs = append(s.rs, r) } } } if err := s.rs.check(); err != nil { return err } return nil }
func (c *Canal) travelRowsEventHandler(e *RowsEvent) error { c.rsLock.Lock() defer c.rsLock.Unlock() var err error for _, h := range c.rsHandlers { if err = h.Do(e); err != nil && err != ErrHandleInterrupted { log.Errorf("handle %v err: %v", h, err) } else if err == ErrHandleInterrupted { log.Errorf("handle %v err, interrupted", h) return ErrHandleInterrupted } } return nil }
func (c *Canal) run() error { defer c.wg.Done() if err := c.tryDump(); err != nil { log.Errorf("Dump failed due to: %v", err) return err } close(c.dumpDoneCh) if err := c.startSyncBinlog(); err != nil { if !c.isClosed() { log.Errorf("Sync failed due to: %v", err) } return errors.Trace(err) } return nil }
func (m *mmapReadFile) Close() error { if m.m != nil { if err := m.m.Unmap(); err != nil { log.Errorf("unmap %s error %s", m.name, err.Error()) } m.m = nil } if m.f != nil { if err := m.f.Close(); err != nil { log.Errorf("close %s error %s", m.name, err.Error()) } m.f = nil } return nil }
func (r *River) Run() error { if err := r.canal.Start(); err != nil { log.Errorf("start canal err %v", err) return err } return nil }
func (c *Canal) run() error { defer c.wg.Done() if err := c.tryDump(); err != nil { log.Errorf("canal dump mysql err: %v", err) return err } close(c.dumpDoneCh) if err := c.startSyncBinlog(); err != nil { if !c.isClosed() { log.Errorf("canal start sync binlog err: %v", err) } return err } return nil }
func (h *dumpParseHandler) Data(db string, table string, values []string) error { if h.c.isClosed() { return errCanalClosed } tableInfo, err := h.c.GetTable(db, table) if err != nil { log.Errorf("get %s.%s information err: %v", db, table, err) return errors.Trace(err) } vs := make([]interface{}, len(values)) log.Debugf("Handling Data: %v", values) for i, v := range values { if v == "NULL" { vs[i] = nil } else if firstChar := v[0]; firstChar == '\'' || firstChar == '"' { vs[i] = v[1 : len(v)-1] } else { if tableInfo.Columns[i].Type == schema.TYPE_NUMBER { n, err := strconv.ParseInt(v, 10, 64) if err != nil { log.Errorf("parse row %v at %d error %v, skip", values, i, err) return dump.ErrSkip } vs[i] = n } else if tableInfo.Columns[i].Type == schema.TYPE_FLOAT { f, err := strconv.ParseFloat(v, 64) if err != nil { log.Errorf("parse row %v at %d error %v, skip", values, i, err) return dump.ErrSkip } vs[i] = f } else { log.Errorf("parse row %v at %d err: invalid type %v for value %v, skip", values, i, tableInfo.Columns[i].Type, v) return dump.ErrSkip } } } events := newRowsEvent(tableInfo, InsertAction, [][]interface{}{vs}) return h.c.travelRowsEventHandler(events) }
func (r *Replication) run() { defer r.wg.Done() syncTc := time.NewTicker(1 * time.Second) purgeTc := time.NewTicker(1 * time.Hour) for { select { case <-purgeTc.C: n := (r.cfg.Replication.ExpiredLogDays * 24 * 3600) r.m.Lock() err := r.s.PurgeExpired(int64(n)) r.m.Unlock() if err != nil { log.Errorf("purge expired log error %s", err.Error()) } case <-syncTc.C: if r.cfg.Replication.SyncLog == 1 { r.m.Lock() err := r.s.Sync() r.m.Unlock() if err != nil { log.Errorf("sync store error %s", err.Error()) } } if r.cfg.Replication.SyncLog != 2 { //we will sync commit id every 1 second r.m.Lock() err := r.updateCommitID(r.commitID, true) r.m.Unlock() if err != nil { log.Errorf("sync commitid error %s", err.Error()) } } case <-r.quit: syncTc.Stop() purgeTc.Stop() return } } }
func (fsm *masterFSM) Apply(l *raft.Log) interface{} { var a action if err := json.Unmarshal(l.Data, &a); err != nil { log.Errorf("decode raft log err %v", err) return err } fsm.handleAction(&a) return nil }
func (r *River) doBulk(reqs []*elastic.BulkRequest) error { if len(reqs) == 0 { return nil } if resp, err := r.es.Bulk(reqs); err != nil { log.Errorf("sync docs err %v after binlog %s", err, r.canal.SyncedPosition()) return errors.Trace(err) } else if resp.Errors { for i := 0; i < len(resp.Items); i++ { for action, item := range resp.Items[i] { if len(item.Error) > 0 { log.Errorf("%s index: %s, type: %s, id: %s, status: %d, error: %s", action, item.Index, item.Type, item.ID, item.Status, item.Error) } } } } return nil }
func (c *Canal) startSyncBinlog() error { pos := mysql.Position{c.master.Name, c.master.Position} log.Infof("start sync binlog at %v", pos) s, err := c.syncer.StartSync(pos) if err != nil { return errors.Errorf("start sync replication at %v error %v", pos, err) } timeout := time.Second forceSavePos := false for { ev, err := s.GetEventTimeout(timeout) if err != nil && err != replication.ErrGetEventTimeout { return errors.Trace(err) } else if err == replication.ErrGetEventTimeout { timeout = 2 * timeout continue } timeout = time.Second //next binlog pos pos.Pos = ev.Header.LogPos forceSavePos = false switch e := ev.Event.(type) { case *replication.RotateEvent: pos.Name = string(e.NextLogName) pos.Pos = uint32(e.Position) // r.ev <- pos forceSavePos = true log.Infof("rotate binlog to %v", pos) case *replication.RowsEvent: // we only focus row based event if err = c.handleRowsEvent(ev); err != nil { log.Errorf("handle rows event error %v", err) return errors.Trace(err) } case *replication.TableMapEvent: continue default: } c.master.Update(pos.Name, pos.Pos) c.master.Save(forceSavePos) } return nil }
func (s *syncer) Do(e *canal.RowsEvent) error { if !s.ignoreEvent(e) { actions, err := Convert(s.rules, e) if err == nil { err = s.bulker.Add(actions) } if err != nil { log.Errorf("Handler failing due to %v", err) return canal.ErrHandleInterrupted } } return nil }
func (z *Zk) checkLeader() { task := new(electorTask) task.z = z task.interrupted.Set(false) task.stop = make(chan struct{}) go func() { err := z.elector.RunTask(task) if err != nil { log.Errorf("run elector task err: %v", err) } }() }
func (a *App) onAfterFailover(downMaster string, newMaster string) error { a.hMutex.Lock() defer a.hMutex.Unlock() for _, h := range a.afterHandlers { if err := h(downMaster, newMaster); err != nil { log.Errorf("do after failover handler for %s -> %s err: %v", downMaster, newMaster, err) if err == ErrGiveupFailover { return ErrGiveupFailover } } } return nil }
func (w *httpWriter) genericWrite(result interface{}) { m := map[string]interface{}{ w.cmd: result, } switch w.contentType { case "json": writeJSON(&m, w.w) case "bson": writeBSON(&m, w.w) case "msgpack": writeMsgPack(&m, w.w) default: log.Errorf("invalid content type %s", w.contentType) } }
func (t *tableWriter) storeLog(l *Log) error { if l.ID == 0 { return ErrStoreLogID } if t.closed { return fmt.Errorf("table writer is closed") } if t.last > 0 && l.ID != t.last+1 { return ErrStoreLogID } if t.data != nil && t.data.Offset() > t.maxLogSize { return errTableNeedFlush } var err error if err = t.openFile(); err != nil { return err } offsetPos := t.data.Offset() if err = l.Encode(t.data); err != nil { return err } binary.BigEndian.PutUint32(t.posBuf, uint32(offsetPos)) if _, err = t.meta.Write(t.posBuf); err != nil { return err } if t.first == 0 { t.first = l.ID } t.last = l.ID t.lastTime = l.CreateTime if t.syncType == 2 { if err := t.data.Sync(); err != nil { log.Errorf("sync table error %s", err.Error()) } } return nil }
func (s *snapshotStore) run() { t := time.NewTicker(60 * time.Minute) defer t.Stop() for { select { case <-t.C: s.Lock() if err := s.checkSnapshots(); err != nil { log.Errorf("check snapshots error %s", err.Error()) } s.Unlock() case <-s.quit: return } } }
func newZk(cfg *Config, fsm *masterFSM) (Cluster, error) { z := new(Zk) var err error if !strings.HasPrefix(cfg.Zk.BaseDir, "/zk") { return nil, fmt.Errorf("invalid zk base dir %s, must have prefix /zk", cfg.Zk.BaseDir) } addr := strings.Join(cfg.Zk.Addr, ",") if addr == "memory" { // only for test log.Infof("only for test, use memory") z.conn = zkhelper.NewConn() } else { z.conn, err = zkhelper.ConnectToZk(addr) } if err != nil { return nil, err } z.c = cfg z.fsm = fsm z.isLeader.Set(false) z.leaderCh = make(chan bool, 1) z.actionCh = make(chan *zkAction, 10) z.quit = make(chan struct{}) if _, err = zkhelper.CreateOrUpdate(z.conn, cfg.Zk.BaseDir, "", 0, zkhelper.DefaultDirACLs(), true); err != nil { log.Errorf("create %s error: %v", cfg.Zk.BaseDir, err) return nil, err } onRetryLock := func() { z.noticeLeaderCh(false) } z.elector = createElection(z.conn, cfg.Zk.BaseDir, cfg.Addr, onRetryLock) z.checkLeader() return z, nil }
func (g *Group) doRole() error { v, err := g.Master.doRole() if err != nil { return ErrNodeDown } // the first line is server type serverType, _ := redis.String(v[0], nil) if serverType != MasterType { log.Errorf("server %s is not master now", g.Master.Addr) return ErrNodeType } // second is master replication offset, g.Master.Offset, _ = redis.Int64(v[1], nil) // then slave list [host, port, offset] slaves, _ := redis.Values(v[2], nil) nodes := make(map[string]*Node, len(slaves)) for i := 0; i < len(slaves); i++ { ss, _ := redis.Strings(slaves[i], nil) var n Node n.Addr = fmt.Sprintf("%s:%s", ss[0], ss[1]) n.Offset, _ = strconv.ParseInt(fmt.Sprintf("%s", ss[2]), 10, 64) nodes[n.Addr] = &n } // we don't care slave add or remove too much, so only log for addr, _ := range nodes { if _, ok := g.Slaves[addr]; !ok { log.Infof("slave %s added", addr) } } for addr, slave := range g.Slaves { if _, ok := nodes[addr]; !ok { log.Infof("slave %s removed", addr) slave.close() } } g.Slaves = nodes return nil }
func (s *stat) Run(addr string) { if len(addr) == 0 { return } log.Infof("run status http server %s", addr) var err error s.l, err = net.Listen("tcp", addr) if err != nil { log.Errorf("listen stat addr %s err %v", addr, err) return } srv := http.Server{} mux := http.NewServeMux() mux.Handle("/stat", s) srv.Handler = mux srv.Serve(s.l) }