func newTableReader(base string, index int64, useMmap bool) (*tableReader, error) { if index <= 0 { return nil, fmt.Errorf("invalid index %d", index) } t := new(tableReader) t.base = base t.index = index t.useMmap = useMmap var err error if err = t.check(); err != nil { log.Errorf("check %d error: %s, try to repair", t.index, err.Error()) if err = t.repair(); err != nil { log.Errorf("repair %d error: %s", t.index, err.Error()) return nil, err } } t.close() return t, nil }
func (m *master) fullSync() error { log.Info("begin full sync") if err := m.conn.Send("fullsync"); err != nil { return err } m.state.Set(replSyncState) dumpPath := path.Join(m.app.cfg.DataDir, "master.dump") f, err := os.OpenFile(dumpPath, os.O_CREATE|os.O_WRONLY, 0644) if err != nil { return err } defer os.Remove(dumpPath) err = m.conn.ReceiveBulkTo(f) f.Close() if err != nil { log.Errorf("read dump data error %s", err.Error()) return err } if _, err = m.app.ldb.LoadDumpFile(dumpPath); err != nil { log.Errorf("load dump file error %s", err.Error()) return err } return nil }
func (s *snapshotStore) checkSnapshots() error { cfg := s.cfg snapshots, err := ioutil.ReadDir(cfg.Snapshot.Path) if err != nil { log.Errorf("read %s error: %s", cfg.Snapshot.Path, err.Error()) return err } names := []string{} for _, info := range snapshots { if path.Ext(info.Name()) == ".tmp" { log.Errorf("temp snapshot file name %s, try remove", info.Name()) os.Remove(path.Join(cfg.Snapshot.Path, info.Name())) continue } if _, err := parseSnapshotName(info.Name()); err != nil { log.Errorf("invalid snapshot file name %s, err: %s", info.Name(), err.Error()) continue } names = append(names, info.Name()) } //from old to new sort.Strings(names) s.names = names s.purge(false) return nil }
func (m *mmapReadFile) Close() error { if m.m != nil { if err := m.m.Unmap(); err != nil { log.Errorf("unmap %s error %s", m.name, err.Error()) } m.m = nil } if m.f != nil { if err := m.f.Close(); err != nil { log.Errorf("close %s error %s", m.name, err.Error()) } m.f = nil } return nil }
func (r *Replication) run() { defer r.wg.Done() syncTc := time.NewTicker(1 * time.Second) purgeTc := time.NewTicker(1 * time.Hour) for { select { case <-purgeTc.C: n := (r.cfg.Replication.ExpiredLogDays * 24 * 3600) r.m.Lock() err := r.s.PurgeExpired(int64(n)) r.m.Unlock() if err != nil { log.Errorf("purge expired log error %s", err.Error()) } case <-syncTc.C: if r.cfg.Replication.SyncLog == 1 { r.m.Lock() err := r.s.Sync() r.m.Unlock() if err != nil { log.Errorf("sync store error %s", err.Error()) } } if r.cfg.Replication.SyncLog != 2 { //we will sync commit id every 1 second r.m.Lock() err := r.updateCommitID(r.commitID, true) r.m.Unlock() if err != nil { log.Errorf("sync commitid error %s", err.Error()) } } case <-r.quit: syncTc.Stop() purgeTc.Stop() return } } }
func (w *httpWriter) genericWrite(result interface{}) { m := map[string]interface{}{ w.cmd: result, } switch w.contentType { case "json": writeJSON(&m, w.w) case "bson": writeBSON(&m, w.w) case "msgpack": writeMsgPack(&m, w.w) default: log.Errorf("invalid content type %s", w.contentType) } }
func (t *tableWriter) storeLog(l *Log) error { if l.ID == 0 { return ErrStoreLogID } if t.closed { return fmt.Errorf("table writer is closed") } if t.last > 0 && l.ID != t.last+1 { return ErrStoreLogID } if t.data != nil && t.data.Offset() > t.maxLogSize { return errTableNeedFlush } var err error if err = t.openFile(); err != nil { return err } offsetPos := t.data.Offset() if err = l.Encode(t.data); err != nil { return err } binary.BigEndian.PutUint32(t.posBuf, uint32(offsetPos)) if _, err = t.meta.Write(t.posBuf); err != nil { return err } if t.first == 0 { t.first = l.ID } t.last = l.ID t.lastTime = l.CreateTime if t.syncType == 2 { if err := t.data.Sync(); err != nil { log.Errorf("sync table error %s", err.Error()) } } return nil }
func (s *snapshotStore) run() { t := time.NewTicker(60 * time.Minute) defer t.Stop() for { select { case <-t.C: s.Lock() if err := s.checkSnapshots(); err != nil { log.Errorf("check snapshots error %s", err.Error()) } s.Unlock() case <-s.quit: return } } }
func (l *Ledis) handleReplication() error { l.wLock.Lock() defer l.wLock.Unlock() defer AsyncNotify(l.rDoneCh) rl := &rpl.Log{} var err error for { if err = l.r.NextNeedCommitLog(rl); err != nil { if err != rpl.ErrNoBehindLog { log.Errorf("get next commit log err, %s", err.Error) return err } else { return nil } } else { l.rbatch.Rollback() if rl.Compression == 1 { //todo optimize if rl.Data, err = snappy.Decode(nil, rl.Data); err != nil { log.Errorf("decode log error %s", err.Error()) return err } } if bd, err := store.NewBatchData(rl.Data); err != nil { log.Errorf("decode batch log error %s", err.Error()) return err } else if err = bd.Replay(l.rbatch); err != nil { log.Errorf("replay batch log error %s", err.Error()) } l.commitLock.Lock() if err = l.rbatch.Commit(); err != nil { log.Errorf("commit log error %s", err.Error()) } else if err = l.r.UpdateCommitID(rl.ID); err != nil { log.Errorf("update commit id error %s", err.Error()) } l.commitLock.Unlock() if err != nil { return err } } } }
func (s *snapshotStore) purge(create bool) { var names []string maxNum := s.cfg.Snapshot.MaxNum num := len(s.names) - maxNum if create { num++ if num > len(s.names) { num = len(s.names) } } if num > 0 { names = append([]string{}, s.names[0:num]...) n := copy(s.names, s.names[num:]) s.names = s.names[0:n] } for _, name := range names { if err := os.Remove(s.snapshotPath(name)); err != nil { log.Errorf("purge snapshot %s error %s", name, err.Error()) } } }
func (r *Replication) Close() error { close(r.quit) r.wg.Wait() r.m.Lock() defer r.m.Unlock() if r.s != nil { r.s.Close() r.s = nil } if err := r.updateCommitID(r.commitID, true); err != nil { log.Errorf("update commit id err %s", err.Error()) } if r.commitLog != nil { r.commitLog.Close() r.commitLog = nil } return nil }
func (t *tableReader) repair() error { t.close() var err error var data writeFile var meta writeFile //repair will use raw file mode data, err = newWriteFile(false, fmtTableDataName(t.base, t.index), 0) data.SetOffset(int64(data.Size())) meta, err = newWriteFile(false, fmtTableMetaName(t.base, t.index), int64(defaultLogNumInFile*4)) var l Log var pos int64 = 0 var nextPos int64 = 0 b := make([]byte, 4) t.first = 0 t.last = 0 for { nextPos, err = t.decodeLogHead(&l, data, pos) if err != nil { //if error, we may lost all logs from pos log.Errorf("%s may lost logs from %d", data.Name(), pos) break } if l.ID == 0 { log.Errorf("%s may lost logs from %d, invalid log 0", data.Name(), pos) break } if t.first == 0 { t.first = l.ID } if t.last == 0 { t.last = l.ID } else if l.ID <= t.last { log.Errorf("%s may lost logs from %d, invalid logid %d", t.data.Name(), pos, l.ID) break } t.last = l.ID t.lastTime = l.CreateTime binary.BigEndian.PutUint32(b, uint32(pos)) meta.Write(b) pos = nextPos t.lastTime = l.CreateTime } var e error if err := meta.Close(); err != nil { e = err } data.SetOffset(pos) if _, err = data.Write(magic); err != nil { log.Errorf("write magic error %s", err.Error()) } if err = data.Close(); err != nil { return err } return e }
func (app *App) publishNewLog(l *rpl.Log) { if !app.cfg.Replication.Sync { //no sync replication, we will do async return } app.info.Replication.PubLogNum.Add(1) app.slock.Lock() slaveNum := len(app.slaves) total := (slaveNum + 1) / 2 if app.cfg.Replication.WaitMaxSlaveAcks > 0 { total = num.MinInt(total, app.cfg.Replication.WaitMaxSlaveAcks) } n := 0 logId := l.ID for _, s := range app.slaves { lastLogID := s.lastLogID.Get() if lastLogID == logId { //slave has already owned this log n++ } else if lastLogID > logId { log.Errorf("invalid slave %s, lastlogid %d > %d", s.slaveListeningAddr, lastLogID, logId) } } app.slock.Unlock() if n >= total { //at least total slaves have owned this log return } startTime := time.Now() done := make(chan struct{}, 1) go func() { n := 0 for i := 0; i < slaveNum; i++ { id := <-app.slaveSyncAck if id < logId { log.Infof("some slave may close with last logid %d < %d", id, logId) } else { n++ if n >= total { break } } } done <- struct{}{} }() select { case <-done: case <-time.After(time.Duration(app.cfg.Replication.WaitSyncTime) * time.Millisecond): log.Info("replication wait timeout") } stopTime := time.Now() app.info.Replication.PubLogAckNum.Add(1) app.info.Replication.PubLogTotalAckTime.Add(stopTime.Sub(startTime)) }
func (m *master) runReplication(restart bool) { defer func() { m.state.Set(replConnectState) m.wg.Done() }() for { m.state.Set(replConnectState) if m.isQuited() { return } if err := m.checkConn(); err != nil { log.Errorf("check master %s connection error %s, try 3s later", m.addr, err.Error()) select { case <-time.After(3 * time.Second): case <-m.quit: return } continue } if m.isQuited() { return } m.state.Set(replConnectedState) if err := m.replConf(); err != nil { if strings.Contains(err.Error(), ledis.ErrRplNotSupport.Error()) { log.Fatalf("master doesn't support replication, wait 10s and retry") select { case <-time.After(10 * time.Second): case <-m.quit: return } } else { log.Errorf("replconf error %s", err.Error()) } continue } if restart { if err := m.fullSync(); err != nil { log.Errorf("restart fullsync error %s", err.Error()) continue } m.state.Set(replConnectedState) } for { if err := m.sync(); err != nil { log.Errorf("sync error %s", err.Error()) break } m.state.Set(replConnectedState) if m.isQuited() { return } } } return }