Beispiel #1
0
func newEtcdStore(
	proposalc chan<- []byte,
	snapshotc <-chan raftpb.Snapshot,
	entryc <-chan raftpb.Entry,
	confentryc chan<- raftpb.Entry,
	logger mesh.Logger,
) *etcdStore {
	// It would be much better if we could have a proper in-memory backend. Alas:
	// backend.Backend is tightly coupled to bolt.DB, and both are tightly coupled
	// to os.Open &c. So we'd need to fork both Bolt and backend. A task for
	// another day.
	f, err := ioutil.TempFile(os.TempDir(), "mesh_etcd_backend_")
	if err != nil {
		panic(err)
	}
	dbPath := f.Name()
	f.Close()
	logger.Printf("etcd store: using %s", dbPath)

	b := backend.NewDefaultBackend(dbPath)
	lessor := lease.NewLessor(b)
	index := &consistentIndex{0}
	kv := mvcc.New(b, lessor, index)

	s := &etcdStore{
		proposalc:   proposalc,
		snapshotc:   snapshotc,
		entryc:      entryc,
		confentryc:  confentryc,
		actionc:     make(chan func()),
		quitc:       make(chan struct{}),
		terminatedc: make(chan struct{}),
		logger:      logger,

		dbPath: dbPath,
		kv:     kv,
		lessor: lessor,
		index:  index,

		idgen:   makeIDGen(),
		pending: map[uint64]responseChans{},
	}
	go s.loop()
	return s
}
Beispiel #2
0
// makeDB copies the database snapshot to the snapshot directory
func makeDB(snapdir, dbfile string) {
	f, ferr := os.OpenFile(dbfile, os.O_RDONLY, 0600)
	if ferr != nil {
		ExitWithError(ExitInvalidInput, ferr)
	}
	defer f.Close()

	if err := os.MkdirAll(snapdir, 0755); err != nil {
		ExitWithError(ExitIO, err)
	}

	dbpath := path.Join(snapdir, "db")
	db, dberr := os.OpenFile(dbpath, os.O_WRONLY|os.O_CREATE, 0600)
	if dberr != nil {
		ExitWithError(ExitIO, dberr)
	}
	if _, err := io.Copy(db, f); err != nil {
		ExitWithError(ExitIO, err)
	}
	db.Close()

	// update consistentIndex so applies go through on etcdserver despite
	// having a new raft instance
	be := backend.NewDefaultBackend(dbpath)
	s := mvcc.NewStore(be, nil, &initIndex{})
	id := s.TxnBegin()
	btx := be.BatchTx()
	del := func(k, v []byte) error {
		_, _, err := s.TxnDeleteRange(id, k, nil)
		return err
	}
	// delete stored members from old cluster since using new members
	btx.UnsafeForEach([]byte("members"), del)
	btx.UnsafeForEach([]byte("members_removed"), del)
	// trigger write-out of new consistent index
	s.TxnEnd(id)
	s.Commit()
	s.Close()
}
Beispiel #3
0
func (s *EtcdServer) applySnapshot(ep *etcdProgress, apply *apply) {
	if raft.IsEmptySnap(apply.snapshot) {
		return
	}

	if apply.snapshot.Metadata.Index <= ep.appliedi {
		plog.Panicf("snapshot index [%d] should > appliedi[%d] + 1",
			apply.snapshot.Metadata.Index, ep.appliedi)
	}

	snapfn, err := s.r.storage.DBFilePath(apply.snapshot.Metadata.Index)
	if err != nil {
		plog.Panicf("get database snapshot file path error: %v", err)
	}

	fn := path.Join(s.Cfg.SnapDir(), databaseFilename)
	if err := os.Rename(snapfn, fn); err != nil {
		plog.Panicf("rename snapshot file error: %v", err)
	}

	newbe := backend.NewDefaultBackend(fn)
	if err := s.kv.Restore(newbe); err != nil {
		plog.Panicf("restore KV error: %v", err)
	}
	s.consistIndex.setConsistentIndex(s.kv.ConsistentIndex())

	// Closing old backend might block until all the txns
	// on the backend are finished.
	// We do not want to wait on closing the old backend.
	s.bemu.Lock()
	oldbe := s.be
	go func() {
		if err := oldbe.Close(); err != nil {
			plog.Panicf("close backend error: %v", err)
		}
	}()

	s.be = newbe
	s.bemu.Unlock()

	if s.lessor != nil {
		s.lessor.Recover(newbe, s.kv)
	}

	if err := s.restoreAlarms(); err != nil {
		plog.Panicf("restore alarms error: %v", err)
	}

	if s.authStore != nil {
		s.authStore.Recover(newbe)
	}

	if err := s.store.Recovery(apply.snapshot.Data); err != nil {
		plog.Panicf("recovery store error: %v", err)
	}
	s.cluster.SetBackend(s.be)
	s.cluster.Recover()

	// recover raft transport
	s.r.transport.RemoveAllPeers()
	for _, m := range s.cluster.Members() {
		if m.ID == s.ID() {
			continue
		}
		s.r.transport.AddPeer(m.ID, m.PeerURLs)
	}

	ep.appliedi = apply.snapshot.Metadata.Index
	ep.snapi = ep.appliedi
	ep.confState = apply.snapshot.Metadata.ConfState
	plog.Infof("recovered from incoming snapshot at index %d", ep.snapi)
}
Beispiel #4
0
// NewServer creates a new EtcdServer from the supplied configuration. The
// configuration is considered static for the lifetime of the EtcdServer.
func NewServer(cfg *ServerConfig) (srv *EtcdServer, err error) {
	st := store.New(StoreClusterPrefix, StoreKeysPrefix)

	var (
		w  *wal.WAL
		n  raft.Node
		s  *raft.MemoryStorage
		id types.ID
		cl *membership.RaftCluster
	)

	if terr := fileutil.TouchDirAll(cfg.DataDir); terr != nil {
		return nil, fmt.Errorf("cannot access data directory: %v", terr)
	}

	// Run the migrations.
	dataVer, err := version.DetectDataDir(cfg.DataDir)
	if err != nil {
		return nil, err
	}
	if err = upgradeDataDir(cfg.DataDir, cfg.Name, dataVer); err != nil {
		return nil, err
	}

	haveWAL := wal.Exist(cfg.WALDir())

	if err = os.MkdirAll(cfg.SnapDir(), privateDirMode); err != nil && !os.IsExist(err) {
		plog.Fatalf("create snapshot directory error: %v", err)
	}
	ss := snap.New(cfg.SnapDir())
	be := backend.NewDefaultBackend(path.Join(cfg.SnapDir(), databaseFilename))
	defer func() {
		if err != nil {
			be.Close()
		}
	}()

	prt, err := rafthttp.NewRoundTripper(cfg.PeerTLSInfo, cfg.peerDialTimeout())
	if err != nil {
		return nil, err
	}
	var remotes []*membership.Member
	switch {
	case !haveWAL && !cfg.NewCluster:
		if err = cfg.VerifyJoinExisting(); err != nil {
			return nil, err
		}
		cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialPeerURLsMap)
		if err != nil {
			return nil, err
		}
		existingCluster, gerr := GetClusterFromRemotePeers(getRemotePeerURLs(cl, cfg.Name), prt)
		if gerr != nil {
			return nil, fmt.Errorf("cannot fetch cluster info from peer urls: %v", gerr)
		}
		if err = membership.ValidateClusterAndAssignIDs(cl, existingCluster); err != nil {
			return nil, fmt.Errorf("error validating peerURLs %s: %v", existingCluster, err)
		}
		if !isCompatibleWithCluster(cl, cl.MemberByName(cfg.Name).ID, prt) {
			return nil, fmt.Errorf("incomptible with current running cluster")
		}

		remotes = existingCluster.Members()
		cl.SetID(existingCluster.ID())
		cl.SetStore(st)
		cl.SetBackend(be)
		cfg.Print()
		id, n, s, w = startNode(cfg, cl, nil)
	case !haveWAL && cfg.NewCluster:
		if err = cfg.VerifyBootstrap(); err != nil {
			return nil, err
		}
		cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialPeerURLsMap)
		if err != nil {
			return nil, err
		}
		m := cl.MemberByName(cfg.Name)
		if isMemberBootstrapped(cl, cfg.Name, prt, cfg.bootstrapTimeout()) {
			return nil, fmt.Errorf("member %s has already been bootstrapped", m.ID)
		}
		if cfg.ShouldDiscover() {
			var str string
			str, err = discovery.JoinCluster(cfg.DiscoveryURL, cfg.DiscoveryProxy, m.ID, cfg.InitialPeerURLsMap.String())
			if err != nil {
				return nil, &DiscoveryError{Op: "join", Err: err}
			}
			var urlsmap types.URLsMap
			urlsmap, err = types.NewURLsMap(str)
			if err != nil {
				return nil, err
			}
			if checkDuplicateURL(urlsmap) {
				return nil, fmt.Errorf("discovery cluster %s has duplicate url", urlsmap)
			}
			if cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, urlsmap); err != nil {
				return nil, err
			}
		}
		cl.SetStore(st)
		cl.SetBackend(be)
		cfg.PrintWithInitial()
		id, n, s, w = startNode(cfg, cl, cl.MemberIDs())
	case haveWAL:
		if err = fileutil.IsDirWriteable(cfg.MemberDir()); err != nil {
			return nil, fmt.Errorf("cannot write to member directory: %v", err)
		}

		if err = fileutil.IsDirWriteable(cfg.WALDir()); err != nil {
			return nil, fmt.Errorf("cannot write to WAL directory: %v", err)
		}

		if cfg.ShouldDiscover() {
			plog.Warningf("discovery token ignored since a cluster has already been initialized. Valid log found at %q", cfg.WALDir())
		}
		var snapshot *raftpb.Snapshot
		snapshot, err = ss.Load()
		if err != nil && err != snap.ErrNoSnapshot {
			return nil, err
		}
		if snapshot != nil {
			if err = st.Recovery(snapshot.Data); err != nil {
				plog.Panicf("recovered store from snapshot error: %v", err)
			}
			plog.Infof("recovered store from snapshot at index %d", snapshot.Metadata.Index)
		}
		cfg.Print()
		if !cfg.ForceNewCluster {
			id, cl, n, s, w = restartNode(cfg, snapshot)
		} else {
			id, cl, n, s, w = restartAsStandaloneNode(cfg, snapshot)
		}
		cl.SetStore(st)
		cl.SetBackend(be)
		cl.Recover()
	default:
		return nil, fmt.Errorf("unsupported bootstrap config")
	}

	if terr := fileutil.TouchDirAll(cfg.MemberDir()); terr != nil {
		return nil, fmt.Errorf("cannot access member directory: %v", terr)
	}

	sstats := &stats.ServerStats{
		Name: cfg.Name,
		ID:   id.String(),
	}
	sstats.Initialize()
	lstats := stats.NewLeaderStats(id.String())

	srv = &EtcdServer{
		readych:   make(chan struct{}),
		Cfg:       cfg,
		snapCount: cfg.SnapCount,
		errorc:    make(chan error, 1),
		store:     st,
		r: raftNode{
			Node:        n,
			ticker:      time.Tick(time.Duration(cfg.TickMs) * time.Millisecond),
			raftStorage: s,
			storage:     NewStorage(w, ss),
		},
		id:            id,
		attributes:    membership.Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()},
		cluster:       cl,
		stats:         sstats,
		lstats:        lstats,
		SyncTicker:    time.Tick(500 * time.Millisecond),
		peerRt:        prt,
		reqIDGen:      idutil.NewGenerator(uint16(id), time.Now()),
		forceVersionC: make(chan struct{}),
		msgSnapC:      make(chan raftpb.Message, maxInFlightMsgSnap),
	}

	srv.applyV2 = &applierV2store{store: srv.store, cluster: srv.cluster}

	srv.be = be
	srv.lessor = lease.NewLessor(srv.be)
	srv.kv = mvcc.New(srv.be, srv.lessor, &srv.consistIndex)
	srv.consistIndex.setConsistentIndex(srv.kv.ConsistentIndex())
	srv.authStore = auth.NewAuthStore(srv.be)
	if h := cfg.AutoCompactionRetention; h != 0 {
		srv.compactor = compactor.NewPeriodic(h, srv.kv, srv)
		srv.compactor.Run()
	}

	if err = srv.restoreAlarms(); err != nil {
		return nil, err
	}

	// TODO: move transport initialization near the definition of remote
	tr := &rafthttp.Transport{
		TLSInfo:     cfg.PeerTLSInfo,
		DialTimeout: cfg.peerDialTimeout(),
		ID:          id,
		URLs:        cfg.PeerURLs,
		ClusterID:   cl.ID(),
		Raft:        srv,
		Snapshotter: ss,
		ServerStats: sstats,
		LeaderStats: lstats,
		ErrorC:      srv.errorc,
	}
	if err = tr.Start(); err != nil {
		return nil, err
	}
	// add all remotes into transport
	for _, m := range remotes {
		if m.ID != id {
			tr.AddRemote(m.ID, m.PeerURLs)
		}
	}
	for _, m := range cl.Members() {
		if m.ID != id {
			tr.AddPeer(m.ID, m.PeerURLs)
		}
	}
	srv.r.transport = tr

	return srv, nil
}
Beispiel #5
0
func (s *EtcdServer) applySnapshot(ep *etcdProgress, apply *apply) {
	if raft.IsEmptySnap(apply.snapshot) {
		return
	}

	plog.Infof("applying snapshot at index %d...", ep.snapi)
	defer plog.Infof("finished applying incoming snapshot at index %d", ep.snapi)

	if apply.snapshot.Metadata.Index <= ep.appliedi {
		plog.Panicf("snapshot index [%d] should > appliedi[%d] + 1",
			apply.snapshot.Metadata.Index, ep.appliedi)
	}

	snapfn, err := s.r.storage.DBFilePath(apply.snapshot.Metadata.Index)
	if err != nil {
		plog.Panicf("get database snapshot file path error: %v", err)
	}

	fn := path.Join(s.Cfg.SnapDir(), databaseFilename)
	if err := os.Rename(snapfn, fn); err != nil {
		plog.Panicf("rename snapshot file error: %v", err)
	}

	newbe := backend.NewDefaultBackend(fn)

	// always recover lessor before kv. When we recover the mvcc.KV it will reattach keys to its leases.
	// If we recover mvcc.KV first, it will attach the keys to the wrong lessor before it recovers.
	if s.lessor != nil {
		plog.Info("recovering lessor...")
		s.lessor.Recover(newbe, s.kv)
		plog.Info("finished recovering lessor")
	}

	plog.Info("restoring mvcc store...")

	if err := s.kv.Restore(newbe); err != nil {
		plog.Panicf("restore KV error: %v", err)
	}
	s.consistIndex.setConsistentIndex(s.kv.ConsistentIndex())

	plog.Info("finished restoring mvcc store")

	// Closing old backend might block until all the txns
	// on the backend are finished.
	// We do not want to wait on closing the old backend.
	s.bemu.Lock()
	oldbe := s.be
	go func() {
		plog.Info("closing old backend...")
		defer plog.Info("finished closing old backend")

		if err := oldbe.Close(); err != nil {
			plog.Panicf("close backend error: %v", err)
		}
	}()

	s.be = newbe
	s.bemu.Unlock()

	plog.Info("recovering alarms...")
	if err := s.restoreAlarms(); err != nil {
		plog.Panicf("restore alarms error: %v", err)
	}
	plog.Info("finished recovering alarms")

	if s.authStore != nil {
		plog.Info("recovering auth store...")
		s.authStore.Recover(newbe)
		plog.Info("finished recovering auth store")
	}

	plog.Info("recovering store v2...")
	if err := s.store.Recovery(apply.snapshot.Data); err != nil {
		plog.Panicf("recovery store error: %v", err)
	}
	plog.Info("finished recovering store v2")

	s.cluster.SetBackend(s.be)
	plog.Info("recovering cluster configuration...")
	s.cluster.Recover(api.UpdateCapability)
	plog.Info("finished recovering cluster configuration")

	plog.Info("removing old peers from network...")
	// recover raft transport
	s.r.transport.RemoveAllPeers()
	plog.Info("finished removing old peers from network")

	plog.Info("adding peers from new cluster configuration into network...")
	for _, m := range s.cluster.Members() {
		if m.ID == s.ID() {
			continue
		}
		s.r.transport.AddPeer(m.ID, m.PeerURLs)
	}
	plog.Info("finished adding peers from new cluster configuration into network...")

	ep.appliedi = apply.snapshot.Metadata.Index
	ep.snapi = ep.appliedi
	ep.confState = apply.snapshot.Metadata.ConfState
}
Beispiel #6
0
// NewServer creates a new EtcdServer from the supplied configuration. The
// configuration is considered static for the lifetime of the EtcdServer.
func NewServer(cfg *ServerConfig) (srv *EtcdServer, err error) {
	st := store.New(StoreClusterPrefix, StoreKeysPrefix)

	var (
		w  *wal.WAL
		n  raft.Node
		s  *raft.MemoryStorage
		id types.ID
		cl *membership.RaftCluster
	)

	if terr := fileutil.TouchDirAll(cfg.DataDir); terr != nil {
		return nil, fmt.Errorf("cannot access data directory: %v", terr)
	}

	haveWAL := wal.Exist(cfg.WALDir())

	if err = fileutil.TouchDirAll(cfg.SnapDir()); err != nil {
		plog.Fatalf("create snapshot directory error: %v", err)
	}
	ss := snap.New(cfg.SnapDir())

	bepath := path.Join(cfg.SnapDir(), databaseFilename)
	beExist := fileutil.Exist(bepath)

	var be backend.Backend
	beOpened := make(chan struct{})
	go func() {
		be = backend.NewDefaultBackend(bepath)
		beOpened <- struct{}{}
	}()

	select {
	case <-beOpened:
	case <-time.After(time.Second):
		plog.Warningf("another etcd process is running with the same data dir and holding the file lock.")
		plog.Warningf("waiting for it to exit before starting...")
		<-beOpened
	}

	defer func() {
		if err != nil {
			be.Close()
		}
	}()

	prt, err := rafthttp.NewRoundTripper(cfg.PeerTLSInfo, cfg.peerDialTimeout())
	if err != nil {
		return nil, err
	}
	var (
		remotes  []*membership.Member
		snapshot *raftpb.Snapshot
	)

	switch {
	case !haveWAL && !cfg.NewCluster:
		if err = cfg.VerifyJoinExisting(); err != nil {
			return nil, err
		}
		cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialPeerURLsMap)
		if err != nil {
			return nil, err
		}
		existingCluster, gerr := GetClusterFromRemotePeers(getRemotePeerURLs(cl, cfg.Name), prt)
		if gerr != nil {
			return nil, fmt.Errorf("cannot fetch cluster info from peer urls: %v", gerr)
		}
		if err = membership.ValidateClusterAndAssignIDs(cl, existingCluster); err != nil {
			return nil, fmt.Errorf("error validating peerURLs %s: %v", existingCluster, err)
		}
		if !isCompatibleWithCluster(cl, cl.MemberByName(cfg.Name).ID, prt) {
			return nil, fmt.Errorf("incompatible with current running cluster")
		}

		remotes = existingCluster.Members()
		cl.SetID(existingCluster.ID())
		cl.SetStore(st)
		cl.SetBackend(be)
		cfg.Print()
		id, n, s, w = startNode(cfg, cl, nil)
	case !haveWAL && cfg.NewCluster:
		if err = cfg.VerifyBootstrap(); err != nil {
			return nil, err
		}
		cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialPeerURLsMap)
		if err != nil {
			return nil, err
		}
		m := cl.MemberByName(cfg.Name)
		if isMemberBootstrapped(cl, cfg.Name, prt, cfg.bootstrapTimeout()) {
			return nil, fmt.Errorf("member %s has already been bootstrapped", m.ID)
		}
		if cfg.ShouldDiscover() {
			var str string
			str, err = discovery.JoinCluster(cfg.DiscoveryURL, cfg.DiscoveryProxy, m.ID, cfg.InitialPeerURLsMap.String())
			if err != nil {
				return nil, &DiscoveryError{Op: "join", Err: err}
			}
			var urlsmap types.URLsMap
			urlsmap, err = types.NewURLsMap(str)
			if err != nil {
				return nil, err
			}
			if checkDuplicateURL(urlsmap) {
				return nil, fmt.Errorf("discovery cluster %s has duplicate url", urlsmap)
			}
			if cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, urlsmap); err != nil {
				return nil, err
			}
		}
		cl.SetStore(st)
		cl.SetBackend(be)
		cfg.PrintWithInitial()
		id, n, s, w = startNode(cfg, cl, cl.MemberIDs())
	case haveWAL:
		if err = fileutil.IsDirWriteable(cfg.MemberDir()); err != nil {
			return nil, fmt.Errorf("cannot write to member directory: %v", err)
		}

		if err = fileutil.IsDirWriteable(cfg.WALDir()); err != nil {
			return nil, fmt.Errorf("cannot write to WAL directory: %v", err)
		}

		if cfg.ShouldDiscover() {
			plog.Warningf("discovery token ignored since a cluster has already been initialized. Valid log found at %q", cfg.WALDir())
		}
		snapshot, err = ss.Load()
		if err != nil && err != snap.ErrNoSnapshot {
			return nil, err
		}
		if snapshot != nil {
			if err = st.Recovery(snapshot.Data); err != nil {
				plog.Panicf("recovered store from snapshot error: %v", err)
			}
			plog.Infof("recovered store from snapshot at index %d", snapshot.Metadata.Index)
		}
		cfg.Print()
		if !cfg.ForceNewCluster {
			id, cl, n, s, w = restartNode(cfg, snapshot)
		} else {
			id, cl, n, s, w = restartAsStandaloneNode(cfg, snapshot)
		}
		cl.SetStore(st)
		cl.SetBackend(be)
		cl.Recover(api.UpdateCapability)
		if cl.Version() != nil && !cl.Version().LessThan(semver.Version{Major: 3}) && !beExist {
			os.RemoveAll(bepath)
			return nil, fmt.Errorf("database file (%v) of the backend is missing", bepath)
		}
	default:
		return nil, fmt.Errorf("unsupported bootstrap config")
	}

	if terr := fileutil.TouchDirAll(cfg.MemberDir()); terr != nil {
		return nil, fmt.Errorf("cannot access member directory: %v", terr)
	}

	sstats := &stats.ServerStats{
		Name: cfg.Name,
		ID:   id.String(),
	}
	sstats.Initialize()
	lstats := stats.NewLeaderStats(id.String())

	heartbeat := time.Duration(cfg.TickMs) * time.Millisecond
	srv = &EtcdServer{
		readych:   make(chan struct{}),
		Cfg:       cfg,
		snapCount: cfg.SnapCount,
		errorc:    make(chan error, 1),
		store:     st,
		r: raftNode{
			isIDRemoved: func(id uint64) bool { return cl.IsIDRemoved(types.ID(id)) },
			Node:        n,
			ticker:      time.Tick(heartbeat),
			// set up contention detectors for raft heartbeat message.
			// expect to send a heartbeat within 2 heartbeat intervals.
			td:          contention.NewTimeoutDetector(2 * heartbeat),
			heartbeat:   heartbeat,
			raftStorage: s,
			storage:     NewStorage(w, ss),
			msgSnapC:    make(chan raftpb.Message, maxInFlightMsgSnap),
			readStateC:  make(chan raft.ReadState, 1),
		},
		id:            id,
		attributes:    membership.Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()},
		cluster:       cl,
		stats:         sstats,
		lstats:        lstats,
		SyncTicker:    time.Tick(500 * time.Millisecond),
		peerRt:        prt,
		reqIDGen:      idutil.NewGenerator(uint16(id), time.Now()),
		forceVersionC: make(chan struct{}),
	}

	srv.applyV2 = &applierV2store{store: srv.store, cluster: srv.cluster}

	srv.be = be
	minTTL := time.Duration((3*cfg.ElectionTicks)/2) * heartbeat

	// always recover lessor before kv. When we recover the mvcc.KV it will reattach keys to its leases.
	// If we recover mvcc.KV first, it will attach the keys to the wrong lessor before it recovers.
	srv.lessor = lease.NewLessor(srv.be, int64(math.Ceil(minTTL.Seconds())))
	srv.kv = mvcc.New(srv.be, srv.lessor, &srv.consistIndex)
	if beExist {
		kvindex := srv.kv.ConsistentIndex()
		// TODO: remove kvindex != 0 checking when we do not expect users to upgrade
		// etcd from pre-3.0 release.
		if snapshot != nil && kvindex < snapshot.Metadata.Index {
			if kvindex != 0 {
				return nil, fmt.Errorf("database file (%v index %d) does not match with snapshot (index %d).", bepath, kvindex, snapshot.Metadata.Index)
			}
			plog.Warningf("consistent index never saved (snapshot index=%d)", snapshot.Metadata.Index)
		}
	}
	srv.consistIndex.setConsistentIndex(srv.kv.ConsistentIndex())

	srv.authStore = auth.NewAuthStore(srv.be)
	if h := cfg.AutoCompactionRetention; h != 0 {
		srv.compactor = compactor.NewPeriodic(h, srv.kv, srv)
		srv.compactor.Run()
	}

	srv.applyV3Base = &applierV3backend{srv}
	if err = srv.restoreAlarms(); err != nil {
		return nil, err
	}

	// TODO: move transport initialization near the definition of remote
	tr := &rafthttp.Transport{
		TLSInfo:     cfg.PeerTLSInfo,
		DialTimeout: cfg.peerDialTimeout(),
		ID:          id,
		URLs:        cfg.PeerURLs,
		ClusterID:   cl.ID(),
		Raft:        srv,
		Snapshotter: ss,
		ServerStats: sstats,
		LeaderStats: lstats,
		ErrorC:      srv.errorc,
	}
	if err = tr.Start(); err != nil {
		return nil, err
	}
	// add all remotes into transport
	for _, m := range remotes {
		if m.ID != id {
			tr.AddRemote(m.ID, m.PeerURLs)
		}
	}
	for _, m := range cl.Members() {
		if m.ID != id {
			tr.AddPeer(m.ID, m.PeerURLs)
		}
	}
	srv.r.transport = tr

	return srv, nil
}
Beispiel #7
0
func main() {
	flag.Parse()
	if len(*migrateDatadir) == 0 {
		glog.Fatal("need to set '--data-dir'")
	}
	dbpath := path.Join(*migrateDatadir, "member", "snap", "db")

	// etcd3 store backend. We will use it to parse v3 data files and extract information.
	be := backend.NewDefaultBackend(dbpath)
	tx := be.BatchTx()

	// etcd2 store backend. We will use v3 data to update this and then save snapshot to disk.
	st := store.New(etcdserver.StoreClusterPrefix, etcdserver.StoreKeysPrefix)
	expireTime := time.Now().Add(*ttl)

	tx.Lock()
	err := tx.UnsafeForEach([]byte("key"), func(k, v []byte) error {
		kv := &mvccpb.KeyValue{}
		kv.Unmarshal(v)

		// This is compact key.
		if !strings.HasPrefix(string(kv.Key), "/") {
			return nil
		}

		ttlOpt := store.TTLOptionSet{}
		if kv.Lease != 0 {
			ttlOpt = store.TTLOptionSet{ExpireTime: expireTime}
		}

		if !isTombstone(k) {
			sk := path.Join(strings.Trim(etcdserver.StoreKeysPrefix, "/"), string(kv.Key))
			_, err := st.Set(sk, false, string(kv.Value), ttlOpt)
			if err != nil {
				return err
			}
		} else {
			st.Delete(string(kv.Key), false, false)
		}

		return nil
	})
	if err != nil {
		glog.Fatal(err)
	}
	tx.Unlock()

	if err := traverseAndDeleteEmptyDir(st, "/"); err != nil {
		glog.Fatal(err)
	}

	// rebuild cluster state.
	metadata, hardstate, oldSt, err := rebuild(*migrateDatadir)
	if err != nil {
		glog.Fatal(err)
	}

	// In the following, it's low level logic that saves metadata and data into v2 snapshot.
	backupPath := *migrateDatadir + ".rollback.backup"
	if err := os.Rename(*migrateDatadir, backupPath); err != nil {
		glog.Fatal(err)
	}
	if err := os.MkdirAll(path.Join(*migrateDatadir, "member", "snap"), 0700); err != nil {
		glog.Fatal(err)
	}
	walDir := path.Join(*migrateDatadir, "member", "wal")

	w, err := wal.Create(walDir, metadata)
	if err != nil {
		glog.Fatal(err)
	}
	err = w.SaveSnapshot(walpb.Snapshot{Index: hardstate.Commit, Term: hardstate.Term})
	if err != nil {
		glog.Fatal(err)
	}
	w.Close()

	event, err := oldSt.Get(etcdserver.StoreClusterPrefix, true, false)
	if err != nil {
		glog.Fatal(err)
	}
	// nodes (members info) for ConfState
	nodes := []uint64{}
	traverseMetadata(event.Node, func(n *store.NodeExtern) {
		if n.Key != etcdserver.StoreClusterPrefix {
			// update store metadata
			v := ""
			if !n.Dir {
				v = *n.Value
			}
			if n.Key == path.Join(etcdserver.StoreClusterPrefix, "version") {
				v = rollbackVersion
			}
			if _, err := st.Set(n.Key, n.Dir, v, store.TTLOptionSet{}); err != nil {
				glog.Fatal(err)
			}

			// update nodes
			fields := strings.Split(n.Key, "/")
			if len(fields) == 4 && fields[2] == "members" {
				nodeID, err := strconv.ParseUint(fields[3], 16, 64)
				if err != nil {
					glog.Fatalf("failed to parse member ID (%s): %v", fields[3], err)
				}
				nodes = append(nodes, nodeID)
			}
		}
	})

	data, err := st.Save()
	if err != nil {
		glog.Fatal(err)
	}
	raftSnap := raftpb.Snapshot{
		Data: data,
		Metadata: raftpb.SnapshotMetadata{
			Index: hardstate.Commit,
			Term:  hardstate.Term,
			ConfState: raftpb.ConfState{
				Nodes: nodes,
			},
		},
	}
	snapshotter := snap.New(path.Join(*migrateDatadir, "member", "snap"))
	if err := snapshotter.SaveSnap(raftSnap); err != nil {
		glog.Fatal(err)
	}
	fmt.Println("Finished successfully")
}
Beispiel #8
0
// makeDB copies the database snapshot to the snapshot directory
func makeDB(snapdir, dbfile string) {
	f, ferr := os.OpenFile(dbfile, os.O_RDONLY, 0600)
	if ferr != nil {
		ExitWithError(ExitInvalidInput, ferr)
	}
	defer f.Close()

	// get snapshot integrity hash
	if _, err := f.Seek(-sha256.Size, os.SEEK_END); err != nil {
		ExitWithError(ExitIO, err)
	}
	sha := make([]byte, sha256.Size)
	if _, err := f.Read(sha); err != nil {
		ExitWithError(ExitIO, err)
	}
	if _, err := f.Seek(0, os.SEEK_SET); err != nil {
		ExitWithError(ExitIO, err)
	}

	if err := os.MkdirAll(snapdir, 0755); err != nil {
		ExitWithError(ExitIO, err)
	}

	dbpath := path.Join(snapdir, "db")
	db, dberr := os.OpenFile(dbpath, os.O_RDWR|os.O_CREATE, 0600)
	if dberr != nil {
		ExitWithError(ExitIO, dberr)
	}
	if _, err := io.Copy(db, f); err != nil {
		ExitWithError(ExitIO, err)
	}

	// truncate away integrity hash, if any.
	off, serr := db.Seek(0, os.SEEK_END)
	if serr != nil {
		ExitWithError(ExitIO, serr)
	}
	hasHash := (off % 512) == sha256.Size
	if hasHash {
		if err := db.Truncate(off - sha256.Size); err != nil {
			ExitWithError(ExitIO, err)
		}
	}

	if !hasHash && !skipHashCheck {
		err := fmt.Errorf("snapshot missing hash but --skip-hash-check=false")
		ExitWithError(ExitBadArgs, err)
	}

	if hasHash && !skipHashCheck {
		// check for match
		if _, err := db.Seek(0, os.SEEK_SET); err != nil {
			ExitWithError(ExitIO, err)
		}
		h := sha256.New()
		if _, err := io.Copy(h, db); err != nil {
			ExitWithError(ExitIO, err)
		}
		dbsha := h.Sum(nil)
		if !reflect.DeepEqual(sha, dbsha) {
			err := fmt.Errorf("expected sha256 %v, got %v", sha, dbsha)
			ExitWithError(ExitInvalidInput, err)
		}
	}

	// db hash is OK, can now modify DB so it can be part of a new cluster
	db.Close()

	// update consistentIndex so applies go through on etcdserver despite
	// having a new raft instance
	be := backend.NewDefaultBackend(dbpath)
	s := mvcc.NewStore(be, nil, &initIndex{})
	id := s.TxnBegin()
	btx := be.BatchTx()
	del := func(k, v []byte) error {
		_, _, err := s.TxnDeleteRange(id, k, nil)
		return err
	}

	// delete stored members from old cluster since using new members
	btx.UnsafeForEach([]byte("members"), del)
	btx.UnsafeForEach([]byte("members_removed"), del)
	// trigger write-out of new consistent index
	s.TxnEnd(id)
	s.Commit()
	s.Close()
}
Beispiel #9
0
func getHash(dbPath string) (hash uint32, err error) {
	b := backend.NewDefaultBackend(dbPath)
	return b.Hash(mvcc.DefaultIgnores)
}