Exemplo n.º 1
0
func newTableReader(base string, index int64, useMmap bool) (*tableReader, error) {
	if index <= 0 {
		return nil, fmt.Errorf("invalid index %d", index)
	}
	t := new(tableReader)
	t.base = base
	t.index = index

	t.useMmap = useMmap

	var err error

	if err = t.check(); err != nil {
		log.Errorf("check %d error: %s, try to repair", t.index, err.Error())

		if err = t.repair(); err != nil {
			log.Errorf("repair %d error: %s", t.index, err.Error())
			return nil, err
		}
	}

	t.close()

	return t, nil
}
Exemplo n.º 2
0
func (m *master) fullSync() error {
	log.Info("begin full sync")

	if err := m.conn.Send("fullsync"); err != nil {
		return err
	}

	m.state.Set(replSyncState)

	dumpPath := path.Join(m.app.cfg.DataDir, "master.dump")
	f, err := os.OpenFile(dumpPath, os.O_CREATE|os.O_WRONLY, 0644)
	if err != nil {
		return err
	}

	defer os.Remove(dumpPath)

	err = m.conn.ReceiveBulkTo(f)
	f.Close()
	if err != nil {
		log.Errorf("read dump data error %s", err.Error())
		return err
	}

	if _, err = m.app.ldb.LoadDumpFile(dumpPath); err != nil {
		log.Errorf("load dump file error %s", err.Error())
		return err
	}

	return nil
}
Exemplo n.º 3
0
func (s *snapshotStore) checkSnapshots() error {
	cfg := s.cfg
	snapshots, err := ioutil.ReadDir(cfg.Snapshot.Path)
	if err != nil {
		log.Errorf("read %s error: %s", cfg.Snapshot.Path, err.Error())
		return err
	}

	names := []string{}
	for _, info := range snapshots {
		if path.Ext(info.Name()) == ".tmp" {
			log.Errorf("temp snapshot file name %s, try remove", info.Name())
			os.Remove(path.Join(cfg.Snapshot.Path, info.Name()))
			continue
		}

		if _, err := parseSnapshotName(info.Name()); err != nil {
			log.Errorf("invalid snapshot file name %s, err: %s", info.Name(), err.Error())
			continue
		}

		names = append(names, info.Name())
	}

	//from old to new
	sort.Strings(names)

	s.names = names

	s.purge(false)

	return nil
}
Exemplo n.º 4
0
func (m *mmapReadFile) Close() error {
	if m.m != nil {
		if err := m.m.Unmap(); err != nil {
			log.Errorf("unmap %s error %s", m.name, err.Error())
		}
		m.m = nil
	}

	if m.f != nil {
		if err := m.f.Close(); err != nil {
			log.Errorf("close %s error %s", m.name, err.Error())
		}
		m.f = nil
	}

	return nil
}
Exemplo n.º 5
0
Arquivo: rpl.go Projeto: eswdd/bosun
func (r *Replication) run() {
	defer r.wg.Done()

	syncTc := time.NewTicker(1 * time.Second)
	purgeTc := time.NewTicker(1 * time.Hour)

	for {
		select {
		case <-purgeTc.C:
			n := (r.cfg.Replication.ExpiredLogDays * 24 * 3600)
			r.m.Lock()
			err := r.s.PurgeExpired(int64(n))
			r.m.Unlock()
			if err != nil {
				log.Errorf("purge expired log error %s", err.Error())
			}
		case <-syncTc.C:
			if r.cfg.Replication.SyncLog == 1 {
				r.m.Lock()
				err := r.s.Sync()
				r.m.Unlock()
				if err != nil {
					log.Errorf("sync store error %s", err.Error())
				}
			}
			if r.cfg.Replication.SyncLog != 2 {
				//we will sync commit id every 1 second
				r.m.Lock()
				err := r.updateCommitID(r.commitID, true)
				r.m.Unlock()

				if err != nil {
					log.Errorf("sync commitid error %s", err.Error())
				}
			}
		case <-r.quit:
			syncTc.Stop()
			purgeTc.Stop()
			return
		}
	}
}
Exemplo n.º 6
0
func (w *httpWriter) genericWrite(result interface{}) {

	m := map[string]interface{}{
		w.cmd: result,
	}
	switch w.contentType {
	case "json":
		writeJSON(&m, w.w)
	case "bson":
		writeBSON(&m, w.w)
	case "msgpack":
		writeMsgPack(&m, w.w)
	default:
		log.Errorf("invalid content type %s", w.contentType)
	}
}
Exemplo n.º 7
0
func (t *tableWriter) storeLog(l *Log) error {
	if l.ID == 0 {
		return ErrStoreLogID
	}

	if t.closed {
		return fmt.Errorf("table writer is closed")
	}

	if t.last > 0 && l.ID != t.last+1 {
		return ErrStoreLogID
	}

	if t.data != nil && t.data.Offset() > t.maxLogSize {
		return errTableNeedFlush
	}

	var err error
	if err = t.openFile(); err != nil {
		return err
	}

	offsetPos := t.data.Offset()
	if err = l.Encode(t.data); err != nil {
		return err
	}

	binary.BigEndian.PutUint32(t.posBuf, uint32(offsetPos))
	if _, err = t.meta.Write(t.posBuf); err != nil {
		return err
	}

	if t.first == 0 {
		t.first = l.ID
	}

	t.last = l.ID
	t.lastTime = l.CreateTime

	if t.syncType == 2 {
		if err := t.data.Sync(); err != nil {
			log.Errorf("sync table error %s", err.Error())
		}
	}

	return nil
}
Exemplo n.º 8
0
func (s *snapshotStore) run() {
	t := time.NewTicker(60 * time.Minute)
	defer t.Stop()

	for {
		select {
		case <-t.C:
			s.Lock()
			if err := s.checkSnapshots(); err != nil {
				log.Errorf("check snapshots error %s", err.Error())
			}
			s.Unlock()
		case <-s.quit:
			return
		}
	}
}
Exemplo n.º 9
0
func (l *Ledis) handleReplication() error {
	l.wLock.Lock()
	defer l.wLock.Unlock()

	defer AsyncNotify(l.rDoneCh)

	rl := &rpl.Log{}

	var err error
	for {
		if err = l.r.NextNeedCommitLog(rl); err != nil {
			if err != rpl.ErrNoBehindLog {
				log.Errorf("get next commit log err, %s", err.Error)
				return err
			} else {
				return nil
			}
		} else {
			l.rbatch.Rollback()

			if rl.Compression == 1 {
				//todo optimize
				if rl.Data, err = snappy.Decode(nil, rl.Data); err != nil {
					log.Errorf("decode log error %s", err.Error())
					return err
				}
			}

			if bd, err := store.NewBatchData(rl.Data); err != nil {
				log.Errorf("decode batch log error %s", err.Error())
				return err
			} else if err = bd.Replay(l.rbatch); err != nil {
				log.Errorf("replay batch log error %s", err.Error())
			}

			l.commitLock.Lock()
			if err = l.rbatch.Commit(); err != nil {
				log.Errorf("commit log error %s", err.Error())
			} else if err = l.r.UpdateCommitID(rl.ID); err != nil {
				log.Errorf("update commit id error %s", err.Error())
			}

			l.commitLock.Unlock()
			if err != nil {
				return err
			}
		}

	}
}
Exemplo n.º 10
0
func (s *snapshotStore) purge(create bool) {
	var names []string
	maxNum := s.cfg.Snapshot.MaxNum
	num := len(s.names) - maxNum

	if create {
		num++
		if num > len(s.names) {
			num = len(s.names)
		}
	}

	if num > 0 {
		names = append([]string{}, s.names[0:num]...)
		n := copy(s.names, s.names[num:])
		s.names = s.names[0:n]
	}

	for _, name := range names {
		if err := os.Remove(s.snapshotPath(name)); err != nil {
			log.Errorf("purge snapshot %s error %s", name, err.Error())
		}
	}
}
Exemplo n.º 11
0
Arquivo: rpl.go Projeto: eswdd/bosun
func (r *Replication) Close() error {
	close(r.quit)

	r.wg.Wait()

	r.m.Lock()
	defer r.m.Unlock()

	if r.s != nil {
		r.s.Close()
		r.s = nil
	}

	if err := r.updateCommitID(r.commitID, true); err != nil {
		log.Errorf("update commit id err %s", err.Error())
	}

	if r.commitLog != nil {
		r.commitLog.Close()
		r.commitLog = nil
	}

	return nil
}
Exemplo n.º 12
0
func (t *tableReader) repair() error {
	t.close()

	var err error
	var data writeFile
	var meta writeFile

	//repair will use raw file mode
	data, err = newWriteFile(false, fmtTableDataName(t.base, t.index), 0)
	data.SetOffset(int64(data.Size()))

	meta, err = newWriteFile(false, fmtTableMetaName(t.base, t.index), int64(defaultLogNumInFile*4))

	var l Log
	var pos int64 = 0
	var nextPos int64 = 0
	b := make([]byte, 4)

	t.first = 0
	t.last = 0

	for {
		nextPos, err = t.decodeLogHead(&l, data, pos)
		if err != nil {
			//if error, we may lost all logs from pos
			log.Errorf("%s may lost logs from %d", data.Name(), pos)
			break
		}

		if l.ID == 0 {
			log.Errorf("%s may lost logs from %d, invalid log 0", data.Name(), pos)
			break
		}

		if t.first == 0 {
			t.first = l.ID
		}

		if t.last == 0 {
			t.last = l.ID
		} else if l.ID <= t.last {
			log.Errorf("%s may lost logs from %d, invalid logid %d", t.data.Name(), pos, l.ID)
			break
		}

		t.last = l.ID
		t.lastTime = l.CreateTime

		binary.BigEndian.PutUint32(b, uint32(pos))
		meta.Write(b)

		pos = nextPos

		t.lastTime = l.CreateTime
	}

	var e error
	if err := meta.Close(); err != nil {
		e = err
	}

	data.SetOffset(pos)

	if _, err = data.Write(magic); err != nil {
		log.Errorf("write magic error %s", err.Error())
	}

	if err = data.Close(); err != nil {
		return err
	}

	return e
}
Exemplo n.º 13
0
func (app *App) publishNewLog(l *rpl.Log) {
	if !app.cfg.Replication.Sync {
		//no sync replication, we will do async
		return
	}

	app.info.Replication.PubLogNum.Add(1)

	app.slock.Lock()

	slaveNum := len(app.slaves)

	total := (slaveNum + 1) / 2
	if app.cfg.Replication.WaitMaxSlaveAcks > 0 {
		total = num.MinInt(total, app.cfg.Replication.WaitMaxSlaveAcks)
	}

	n := 0
	logId := l.ID
	for _, s := range app.slaves {
		lastLogID := s.lastLogID.Get()
		if lastLogID == logId {
			//slave has already owned this log
			n++
		} else if lastLogID > logId {
			log.Errorf("invalid slave %s, lastlogid %d > %d", s.slaveListeningAddr, lastLogID, logId)
		}
	}

	app.slock.Unlock()

	if n >= total {
		//at least total slaves have owned this log
		return
	}

	startTime := time.Now()
	done := make(chan struct{}, 1)
	go func() {
		n := 0
		for i := 0; i < slaveNum; i++ {
			id := <-app.slaveSyncAck
			if id < logId {
				log.Infof("some slave may close with last logid %d < %d", id, logId)
			} else {
				n++
				if n >= total {
					break
				}
			}
		}
		done <- struct{}{}
	}()

	select {
	case <-done:
	case <-time.After(time.Duration(app.cfg.Replication.WaitSyncTime) * time.Millisecond):
		log.Info("replication wait timeout")
	}

	stopTime := time.Now()
	app.info.Replication.PubLogAckNum.Add(1)
	app.info.Replication.PubLogTotalAckTime.Add(stopTime.Sub(startTime))
}
Exemplo n.º 14
0
func (m *master) runReplication(restart bool) {
	defer func() {
		m.state.Set(replConnectState)
		m.wg.Done()
	}()

	for {
		m.state.Set(replConnectState)

		if m.isQuited() {
			return
		}

		if err := m.checkConn(); err != nil {
			log.Errorf("check master %s connection error %s, try 3s later", m.addr, err.Error())

			select {
			case <-time.After(3 * time.Second):
			case <-m.quit:
				return
			}
			continue
		}

		if m.isQuited() {
			return
		}

		m.state.Set(replConnectedState)

		if err := m.replConf(); err != nil {
			if strings.Contains(err.Error(), ledis.ErrRplNotSupport.Error()) {
				log.Fatalf("master doesn't support replication, wait 10s and retry")
				select {
				case <-time.After(10 * time.Second):
				case <-m.quit:
					return
				}
			} else {
				log.Errorf("replconf error %s", err.Error())
			}

			continue
		}

		if restart {
			if err := m.fullSync(); err != nil {
				log.Errorf("restart fullsync error %s", err.Error())
				continue
			}
			m.state.Set(replConnectedState)
		}

		for {
			if err := m.sync(); err != nil {
				log.Errorf("sync error %s", err.Error())
				break
			}
			m.state.Set(replConnectedState)

			if m.isQuited() {
				return
			}
		}
	}

	return
}