Exemple #1
0
func (s *FileStore) storeLog(l *Log) error {
	err := s.w.StoreLog(l)
	if err == nil {
		return nil
	} else if err != errTableNeedFlush {
		return err
	}

	var r *tableReader
	r, err = s.w.Flush()

	if err != nil {
		log.Fatalf("write table flush error %s, can not store!!!", err.Error())

		s.w.Close()

		return err
	}

	s.rm.Lock()
	s.rs = append(s.rs, r)
	s.rm.Unlock()

	err = s.w.StoreLog(l)

	return err
}
Exemple #2
0
// Promote the slave to master, then let other slaves replicate from it
func (g *Group) Promote(addr string) error {
	g.m.Lock()
	defer g.m.Unlock()

	node := g.Slaves[addr]

	if err := node.slaveof("no", "one"); err != nil {
		return err
	}

	delete(g.Slaves, addr)

	g.Master = node

	host, port, _ := net.SplitHostPort(addr)
	for _, slave := range g.Slaves {
		if err := slave.slaveof(host, port); err != nil {
			// if we go here, the replication topology may be wrong
			// so use fatal level and we should fix it manually
			log.Fatalf("slaveof %s to master %s err %v", slave.Addr, addr, err)
		} else {
			log.Infof("slaveof %s to master %s ok", slave.Addr, addr)
		}
	}

	return nil
}
Exemple #3
0
func (t *tableWriter) close() {
	if t.meta != nil {
		if err := t.meta.Close(); err != nil {
			log.Fatalf("close log meta error %s", err.Error())
		}
		t.meta = nil
	}

	if t.data != nil {
		if _, err := t.data.Write(magic); err != nil {
			log.Fatalf("write magic error %s", err.Error())
		}

		if err := t.data.Close(); err != nil {
			log.Fatalf("close log data error %s", err.Error())
		}
		t.data = nil
	}
}
Exemple #4
0
func (c *respClient) run() {
	defer func() {
		if e := recover(); e != nil {
			buf := make([]byte, 4096)
			n := runtime.Stack(buf, false)
			buf = buf[0:n]

			log.Fatalf("client run panic %s:%v", buf, e)
		}

		c.client.close()

		c.conn.Close()

		// if c.tx != nil {
		// 	c.tx.Rollback()
		// 	c.tx = nil
		// }

		c.app.removeSlave(c.client, c.activeQuit)

		c.app.delRespClient(c)

		c.app.connWait.Done()
	}()

	select {
	case <-c.app.quit:
		//check app closed
		return
	default:
		break
	}

	kc := time.Duration(c.app.cfg.ConnKeepaliveInterval) * time.Second
	for {
		if kc > 0 {
			c.conn.SetReadDeadline(time.Now().Add(kc))
		}

		c.cmd = ""
		c.args = nil

		reqData, err := c.respReader.ParseRequest()
		if err == nil {
			err = c.handleRequest(reqData)
		}

		if err != nil {
			return
		}
	}
}
Exemple #5
0
func (l *Ledis) handleCommit(g commitDataGetter, c commiter) error {
	l.commitLock.Lock()

	var err error
	if l.r != nil {
		var rl *rpl.Log
		if rl, err = l.r.Log(g.Data()); err != nil {
			l.commitLock.Unlock()

			log.Fatalf("write wal error %s", err.Error())
			return err
		}

		l.propagate(rl)

		if err = c.Commit(); err != nil {
			l.commitLock.Unlock()

			log.Fatalf("commit error %s", err.Error())
			l.noticeReplication()
			return err
		}

		if err = l.r.UpdateCommitID(rl.ID); err != nil {
			l.commitLock.Unlock()

			log.Fatalf("update commit id error %s", err.Error())
			l.noticeReplication()
			return err
		}
	} else {
		err = c.Commit()
	}

	l.commitLock.Unlock()

	return err
}
Exemple #6
0
func (l *Ledis) flushAll() error {
	it := l.ldb.NewIterator()
	defer it.Close()

	it.SeekToFirst()

	w := l.ldb.NewWriteBatch()
	defer w.Rollback()

	n := 0
	for ; it.Valid(); it.Next() {
		n++
		if n == 10000 {
			if err := w.Commit(); err != nil {
				log.Fatalf("flush all commit error: %s", err.Error())
				return err
			}
			n = 0
		}
		w.Delete(it.RawKey())
	}

	if err := w.Commit(); err != nil {
		log.Fatalf("flush all commit error: %s", err.Error())
		return err
	}

	if l.r != nil {
		if err := l.r.Clear(); err != nil {
			log.Fatalf("flush all replication clear error: %s", err.Error())
			return err
		}
	}

	return nil
}
Exemple #7
0
func (db *DB) lSetMeta(ek []byte, headSeq int32, tailSeq int32) int32 {
	t := db.listBatch

	var size int32 = tailSeq - headSeq + 1
	if size < 0 {
		//	todo : log error + panic
		log.Fatalf("invalid meta sequence range [%d, %d]", headSeq, tailSeq)
	} else if size == 0 {
		t.Delete(ek)
	} else {
		buf := make([]byte, 8)

		binary.LittleEndian.PutUint32(buf[0:4], uint32(headSeq))
		binary.LittleEndian.PutUint32(buf[4:8], uint32(tailSeq))

		t.Put(ek, buf)
	}

	return size
}
Exemple #8
0
func (m *master) runReplication(restart bool) {
	defer func() {
		m.state.Set(replConnectState)
		m.wg.Done()
	}()

	for {
		m.state.Set(replConnectState)

		if m.isQuited() {
			return
		}

		if err := m.checkConn(); err != nil {
			log.Errorf("check master %s connection error %s, try 3s later", m.addr, err.Error())

			select {
			case <-time.After(3 * time.Second):
			case <-m.quit:
				return
			}
			continue
		}

		if m.isQuited() {
			return
		}

		m.state.Set(replConnectedState)

		if err := m.replConf(); err != nil {
			if strings.Contains(err.Error(), ledis.ErrRplNotSupport.Error()) {
				log.Fatalf("master doesn't support replication, wait 10s and retry")
				select {
				case <-time.After(10 * time.Second):
				case <-m.quit:
					return
				}
			} else {
				log.Errorf("replconf error %s", err.Error())
			}

			continue
		}

		if restart {
			if err := m.fullSync(); err != nil {
				log.Errorf("restart fullsync error %s", err.Error())
				continue
			}
			m.state.Set(replConnectedState)
		}

		for {
			if err := m.sync(); err != nil {
				log.Errorf("sync error %s", err.Error())
				break
			}
			m.state.Set(replConnectedState)

			if m.isQuited() {
				return
			}
		}
	}

	return
}
Exemple #9
0
func (a *App) checkMaster(wg *sync.WaitGroup, g *Group) {
	defer wg.Done()

	// later, add check strategy, like check failed n numbers in n seconds and do failover, etc.
	// now only check once.
	err := g.Check()
	if err == nil {
		return
	}

	oldMaster := g.Master.Addr

	if err == ErrNodeType {
		log.Errorf("server %s is not master now, we will skip it", oldMaster)

		// server is not master, we will not check it.
		a.delMasters([]string{oldMaster})
		return
	}

	errNum := time.Duration(g.CheckErrNum.Get())
	downTime := errNum * time.Duration(a.c.CheckInterval) * time.Millisecond
	if downTime < time.Duration(a.c.MaxDownTime)*time.Second {
		log.Warnf("check master %s err %v, down time: %0.2fs, retry check", oldMaster, err, downTime.Seconds())
		return
	}

	// If check error, we will remove it from saved masters and not check.
	// I just want to avoid some errors if below failover failed, at that time,
	// handling it manually seems a better way.
	// If you want to recheck it, please add it again.
	a.delMasters([]string{oldMaster})

	log.Errorf("check master %s err %v, do failover", oldMaster, err)

	if err := a.onBeforeFailover(oldMaster); err != nil {
		//give up failover
		return
	}

	// first elect a candidate
	newMaster, err := g.Elect()
	if err != nil {
		// elect error
		return
	}

	log.Errorf("master is down, elect %s as new master, do failover", newMaster)

	// promote the candiate to master
	err = g.Promote(newMaster)

	if err != nil {
		log.Fatalf("do master %s failover err: %v", oldMaster, err)
		return
	}

	a.addMasters([]string{newMaster})

	a.onAfterFailover(oldMaster, newMaster)
}