func newEtcdStore( proposalc chan<- []byte, snapshotc <-chan raftpb.Snapshot, entryc <-chan raftpb.Entry, confentryc chan<- raftpb.Entry, logger mesh.Logger, ) *etcdStore { // It would be much better if we could have a proper in-memory backend. Alas: // backend.Backend is tightly coupled to bolt.DB, and both are tightly coupled // to os.Open &c. So we'd need to fork both Bolt and backend. A task for // another day. f, err := ioutil.TempFile(os.TempDir(), "mesh_etcd_backend_") if err != nil { panic(err) } dbPath := f.Name() f.Close() logger.Printf("etcd store: using %s", dbPath) b := backend.NewDefaultBackend(dbPath) lessor := lease.NewLessor(b) index := &consistentIndex{0} kv := mvcc.New(b, lessor, index) s := &etcdStore{ proposalc: proposalc, snapshotc: snapshotc, entryc: entryc, confentryc: confentryc, actionc: make(chan func()), quitc: make(chan struct{}), terminatedc: make(chan struct{}), logger: logger, dbPath: dbPath, kv: kv, lessor: lessor, index: index, idgen: makeIDGen(), pending: map[uint64]responseChans{}, } go s.loop() return s }
// makeDB copies the database snapshot to the snapshot directory func makeDB(snapdir, dbfile string) { f, ferr := os.OpenFile(dbfile, os.O_RDONLY, 0600) if ferr != nil { ExitWithError(ExitInvalidInput, ferr) } defer f.Close() if err := os.MkdirAll(snapdir, 0755); err != nil { ExitWithError(ExitIO, err) } dbpath := path.Join(snapdir, "db") db, dberr := os.OpenFile(dbpath, os.O_WRONLY|os.O_CREATE, 0600) if dberr != nil { ExitWithError(ExitIO, dberr) } if _, err := io.Copy(db, f); err != nil { ExitWithError(ExitIO, err) } db.Close() // update consistentIndex so applies go through on etcdserver despite // having a new raft instance be := backend.NewDefaultBackend(dbpath) s := mvcc.NewStore(be, nil, &initIndex{}) id := s.TxnBegin() btx := be.BatchTx() del := func(k, v []byte) error { _, _, err := s.TxnDeleteRange(id, k, nil) return err } // delete stored members from old cluster since using new members btx.UnsafeForEach([]byte("members"), del) btx.UnsafeForEach([]byte("members_removed"), del) // trigger write-out of new consistent index s.TxnEnd(id) s.Commit() s.Close() }
func (s *EtcdServer) applySnapshot(ep *etcdProgress, apply *apply) { if raft.IsEmptySnap(apply.snapshot) { return } if apply.snapshot.Metadata.Index <= ep.appliedi { plog.Panicf("snapshot index [%d] should > appliedi[%d] + 1", apply.snapshot.Metadata.Index, ep.appliedi) } snapfn, err := s.r.storage.DBFilePath(apply.snapshot.Metadata.Index) if err != nil { plog.Panicf("get database snapshot file path error: %v", err) } fn := path.Join(s.Cfg.SnapDir(), databaseFilename) if err := os.Rename(snapfn, fn); err != nil { plog.Panicf("rename snapshot file error: %v", err) } newbe := backend.NewDefaultBackend(fn) if err := s.kv.Restore(newbe); err != nil { plog.Panicf("restore KV error: %v", err) } s.consistIndex.setConsistentIndex(s.kv.ConsistentIndex()) // Closing old backend might block until all the txns // on the backend are finished. // We do not want to wait on closing the old backend. s.bemu.Lock() oldbe := s.be go func() { if err := oldbe.Close(); err != nil { plog.Panicf("close backend error: %v", err) } }() s.be = newbe s.bemu.Unlock() if s.lessor != nil { s.lessor.Recover(newbe, s.kv) } if err := s.restoreAlarms(); err != nil { plog.Panicf("restore alarms error: %v", err) } if s.authStore != nil { s.authStore.Recover(newbe) } if err := s.store.Recovery(apply.snapshot.Data); err != nil { plog.Panicf("recovery store error: %v", err) } s.cluster.SetBackend(s.be) s.cluster.Recover() // recover raft transport s.r.transport.RemoveAllPeers() for _, m := range s.cluster.Members() { if m.ID == s.ID() { continue } s.r.transport.AddPeer(m.ID, m.PeerURLs) } ep.appliedi = apply.snapshot.Metadata.Index ep.snapi = ep.appliedi ep.confState = apply.snapshot.Metadata.ConfState plog.Infof("recovered from incoming snapshot at index %d", ep.snapi) }
// NewServer creates a new EtcdServer from the supplied configuration. The // configuration is considered static for the lifetime of the EtcdServer. func NewServer(cfg *ServerConfig) (srv *EtcdServer, err error) { st := store.New(StoreClusterPrefix, StoreKeysPrefix) var ( w *wal.WAL n raft.Node s *raft.MemoryStorage id types.ID cl *membership.RaftCluster ) if terr := fileutil.TouchDirAll(cfg.DataDir); terr != nil { return nil, fmt.Errorf("cannot access data directory: %v", terr) } // Run the migrations. dataVer, err := version.DetectDataDir(cfg.DataDir) if err != nil { return nil, err } if err = upgradeDataDir(cfg.DataDir, cfg.Name, dataVer); err != nil { return nil, err } haveWAL := wal.Exist(cfg.WALDir()) if err = os.MkdirAll(cfg.SnapDir(), privateDirMode); err != nil && !os.IsExist(err) { plog.Fatalf("create snapshot directory error: %v", err) } ss := snap.New(cfg.SnapDir()) be := backend.NewDefaultBackend(path.Join(cfg.SnapDir(), databaseFilename)) defer func() { if err != nil { be.Close() } }() prt, err := rafthttp.NewRoundTripper(cfg.PeerTLSInfo, cfg.peerDialTimeout()) if err != nil { return nil, err } var remotes []*membership.Member switch { case !haveWAL && !cfg.NewCluster: if err = cfg.VerifyJoinExisting(); err != nil { return nil, err } cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialPeerURLsMap) if err != nil { return nil, err } existingCluster, gerr := GetClusterFromRemotePeers(getRemotePeerURLs(cl, cfg.Name), prt) if gerr != nil { return nil, fmt.Errorf("cannot fetch cluster info from peer urls: %v", gerr) } if err = membership.ValidateClusterAndAssignIDs(cl, existingCluster); err != nil { return nil, fmt.Errorf("error validating peerURLs %s: %v", existingCluster, err) } if !isCompatibleWithCluster(cl, cl.MemberByName(cfg.Name).ID, prt) { return nil, fmt.Errorf("incomptible with current running cluster") } remotes = existingCluster.Members() cl.SetID(existingCluster.ID()) cl.SetStore(st) cl.SetBackend(be) cfg.Print() id, n, s, w = startNode(cfg, cl, nil) case !haveWAL && cfg.NewCluster: if err = cfg.VerifyBootstrap(); err != nil { return nil, err } cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialPeerURLsMap) if err != nil { return nil, err } m := cl.MemberByName(cfg.Name) if isMemberBootstrapped(cl, cfg.Name, prt, cfg.bootstrapTimeout()) { return nil, fmt.Errorf("member %s has already been bootstrapped", m.ID) } if cfg.ShouldDiscover() { var str string str, err = discovery.JoinCluster(cfg.DiscoveryURL, cfg.DiscoveryProxy, m.ID, cfg.InitialPeerURLsMap.String()) if err != nil { return nil, &DiscoveryError{Op: "join", Err: err} } var urlsmap types.URLsMap urlsmap, err = types.NewURLsMap(str) if err != nil { return nil, err } if checkDuplicateURL(urlsmap) { return nil, fmt.Errorf("discovery cluster %s has duplicate url", urlsmap) } if cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, urlsmap); err != nil { return nil, err } } cl.SetStore(st) cl.SetBackend(be) cfg.PrintWithInitial() id, n, s, w = startNode(cfg, cl, cl.MemberIDs()) case haveWAL: if err = fileutil.IsDirWriteable(cfg.MemberDir()); err != nil { return nil, fmt.Errorf("cannot write to member directory: %v", err) } if err = fileutil.IsDirWriteable(cfg.WALDir()); err != nil { return nil, fmt.Errorf("cannot write to WAL directory: %v", err) } if cfg.ShouldDiscover() { plog.Warningf("discovery token ignored since a cluster has already been initialized. Valid log found at %q", cfg.WALDir()) } var snapshot *raftpb.Snapshot snapshot, err = ss.Load() if err != nil && err != snap.ErrNoSnapshot { return nil, err } if snapshot != nil { if err = st.Recovery(snapshot.Data); err != nil { plog.Panicf("recovered store from snapshot error: %v", err) } plog.Infof("recovered store from snapshot at index %d", snapshot.Metadata.Index) } cfg.Print() if !cfg.ForceNewCluster { id, cl, n, s, w = restartNode(cfg, snapshot) } else { id, cl, n, s, w = restartAsStandaloneNode(cfg, snapshot) } cl.SetStore(st) cl.SetBackend(be) cl.Recover() default: return nil, fmt.Errorf("unsupported bootstrap config") } if terr := fileutil.TouchDirAll(cfg.MemberDir()); terr != nil { return nil, fmt.Errorf("cannot access member directory: %v", terr) } sstats := &stats.ServerStats{ Name: cfg.Name, ID: id.String(), } sstats.Initialize() lstats := stats.NewLeaderStats(id.String()) srv = &EtcdServer{ readych: make(chan struct{}), Cfg: cfg, snapCount: cfg.SnapCount, errorc: make(chan error, 1), store: st, r: raftNode{ Node: n, ticker: time.Tick(time.Duration(cfg.TickMs) * time.Millisecond), raftStorage: s, storage: NewStorage(w, ss), }, id: id, attributes: membership.Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()}, cluster: cl, stats: sstats, lstats: lstats, SyncTicker: time.Tick(500 * time.Millisecond), peerRt: prt, reqIDGen: idutil.NewGenerator(uint16(id), time.Now()), forceVersionC: make(chan struct{}), msgSnapC: make(chan raftpb.Message, maxInFlightMsgSnap), } srv.applyV2 = &applierV2store{store: srv.store, cluster: srv.cluster} srv.be = be srv.lessor = lease.NewLessor(srv.be) srv.kv = mvcc.New(srv.be, srv.lessor, &srv.consistIndex) srv.consistIndex.setConsistentIndex(srv.kv.ConsistentIndex()) srv.authStore = auth.NewAuthStore(srv.be) if h := cfg.AutoCompactionRetention; h != 0 { srv.compactor = compactor.NewPeriodic(h, srv.kv, srv) srv.compactor.Run() } if err = srv.restoreAlarms(); err != nil { return nil, err } // TODO: move transport initialization near the definition of remote tr := &rafthttp.Transport{ TLSInfo: cfg.PeerTLSInfo, DialTimeout: cfg.peerDialTimeout(), ID: id, URLs: cfg.PeerURLs, ClusterID: cl.ID(), Raft: srv, Snapshotter: ss, ServerStats: sstats, LeaderStats: lstats, ErrorC: srv.errorc, } if err = tr.Start(); err != nil { return nil, err } // add all remotes into transport for _, m := range remotes { if m.ID != id { tr.AddRemote(m.ID, m.PeerURLs) } } for _, m := range cl.Members() { if m.ID != id { tr.AddPeer(m.ID, m.PeerURLs) } } srv.r.transport = tr return srv, nil }
func (s *EtcdServer) applySnapshot(ep *etcdProgress, apply *apply) { if raft.IsEmptySnap(apply.snapshot) { return } plog.Infof("applying snapshot at index %d...", ep.snapi) defer plog.Infof("finished applying incoming snapshot at index %d", ep.snapi) if apply.snapshot.Metadata.Index <= ep.appliedi { plog.Panicf("snapshot index [%d] should > appliedi[%d] + 1", apply.snapshot.Metadata.Index, ep.appliedi) } snapfn, err := s.r.storage.DBFilePath(apply.snapshot.Metadata.Index) if err != nil { plog.Panicf("get database snapshot file path error: %v", err) } fn := path.Join(s.Cfg.SnapDir(), databaseFilename) if err := os.Rename(snapfn, fn); err != nil { plog.Panicf("rename snapshot file error: %v", err) } newbe := backend.NewDefaultBackend(fn) // always recover lessor before kv. When we recover the mvcc.KV it will reattach keys to its leases. // If we recover mvcc.KV first, it will attach the keys to the wrong lessor before it recovers. if s.lessor != nil { plog.Info("recovering lessor...") s.lessor.Recover(newbe, s.kv) plog.Info("finished recovering lessor") } plog.Info("restoring mvcc store...") if err := s.kv.Restore(newbe); err != nil { plog.Panicf("restore KV error: %v", err) } s.consistIndex.setConsistentIndex(s.kv.ConsistentIndex()) plog.Info("finished restoring mvcc store") // Closing old backend might block until all the txns // on the backend are finished. // We do not want to wait on closing the old backend. s.bemu.Lock() oldbe := s.be go func() { plog.Info("closing old backend...") defer plog.Info("finished closing old backend") if err := oldbe.Close(); err != nil { plog.Panicf("close backend error: %v", err) } }() s.be = newbe s.bemu.Unlock() plog.Info("recovering alarms...") if err := s.restoreAlarms(); err != nil { plog.Panicf("restore alarms error: %v", err) } plog.Info("finished recovering alarms") if s.authStore != nil { plog.Info("recovering auth store...") s.authStore.Recover(newbe) plog.Info("finished recovering auth store") } plog.Info("recovering store v2...") if err := s.store.Recovery(apply.snapshot.Data); err != nil { plog.Panicf("recovery store error: %v", err) } plog.Info("finished recovering store v2") s.cluster.SetBackend(s.be) plog.Info("recovering cluster configuration...") s.cluster.Recover(api.UpdateCapability) plog.Info("finished recovering cluster configuration") plog.Info("removing old peers from network...") // recover raft transport s.r.transport.RemoveAllPeers() plog.Info("finished removing old peers from network") plog.Info("adding peers from new cluster configuration into network...") for _, m := range s.cluster.Members() { if m.ID == s.ID() { continue } s.r.transport.AddPeer(m.ID, m.PeerURLs) } plog.Info("finished adding peers from new cluster configuration into network...") ep.appliedi = apply.snapshot.Metadata.Index ep.snapi = ep.appliedi ep.confState = apply.snapshot.Metadata.ConfState }
// NewServer creates a new EtcdServer from the supplied configuration. The // configuration is considered static for the lifetime of the EtcdServer. func NewServer(cfg *ServerConfig) (srv *EtcdServer, err error) { st := store.New(StoreClusterPrefix, StoreKeysPrefix) var ( w *wal.WAL n raft.Node s *raft.MemoryStorage id types.ID cl *membership.RaftCluster ) if terr := fileutil.TouchDirAll(cfg.DataDir); terr != nil { return nil, fmt.Errorf("cannot access data directory: %v", terr) } haveWAL := wal.Exist(cfg.WALDir()) if err = fileutil.TouchDirAll(cfg.SnapDir()); err != nil { plog.Fatalf("create snapshot directory error: %v", err) } ss := snap.New(cfg.SnapDir()) bepath := path.Join(cfg.SnapDir(), databaseFilename) beExist := fileutil.Exist(bepath) var be backend.Backend beOpened := make(chan struct{}) go func() { be = backend.NewDefaultBackend(bepath) beOpened <- struct{}{} }() select { case <-beOpened: case <-time.After(time.Second): plog.Warningf("another etcd process is running with the same data dir and holding the file lock.") plog.Warningf("waiting for it to exit before starting...") <-beOpened } defer func() { if err != nil { be.Close() } }() prt, err := rafthttp.NewRoundTripper(cfg.PeerTLSInfo, cfg.peerDialTimeout()) if err != nil { return nil, err } var ( remotes []*membership.Member snapshot *raftpb.Snapshot ) switch { case !haveWAL && !cfg.NewCluster: if err = cfg.VerifyJoinExisting(); err != nil { return nil, err } cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialPeerURLsMap) if err != nil { return nil, err } existingCluster, gerr := GetClusterFromRemotePeers(getRemotePeerURLs(cl, cfg.Name), prt) if gerr != nil { return nil, fmt.Errorf("cannot fetch cluster info from peer urls: %v", gerr) } if err = membership.ValidateClusterAndAssignIDs(cl, existingCluster); err != nil { return nil, fmt.Errorf("error validating peerURLs %s: %v", existingCluster, err) } if !isCompatibleWithCluster(cl, cl.MemberByName(cfg.Name).ID, prt) { return nil, fmt.Errorf("incompatible with current running cluster") } remotes = existingCluster.Members() cl.SetID(existingCluster.ID()) cl.SetStore(st) cl.SetBackend(be) cfg.Print() id, n, s, w = startNode(cfg, cl, nil) case !haveWAL && cfg.NewCluster: if err = cfg.VerifyBootstrap(); err != nil { return nil, err } cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialPeerURLsMap) if err != nil { return nil, err } m := cl.MemberByName(cfg.Name) if isMemberBootstrapped(cl, cfg.Name, prt, cfg.bootstrapTimeout()) { return nil, fmt.Errorf("member %s has already been bootstrapped", m.ID) } if cfg.ShouldDiscover() { var str string str, err = discovery.JoinCluster(cfg.DiscoveryURL, cfg.DiscoveryProxy, m.ID, cfg.InitialPeerURLsMap.String()) if err != nil { return nil, &DiscoveryError{Op: "join", Err: err} } var urlsmap types.URLsMap urlsmap, err = types.NewURLsMap(str) if err != nil { return nil, err } if checkDuplicateURL(urlsmap) { return nil, fmt.Errorf("discovery cluster %s has duplicate url", urlsmap) } if cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, urlsmap); err != nil { return nil, err } } cl.SetStore(st) cl.SetBackend(be) cfg.PrintWithInitial() id, n, s, w = startNode(cfg, cl, cl.MemberIDs()) case haveWAL: if err = fileutil.IsDirWriteable(cfg.MemberDir()); err != nil { return nil, fmt.Errorf("cannot write to member directory: %v", err) } if err = fileutil.IsDirWriteable(cfg.WALDir()); err != nil { return nil, fmt.Errorf("cannot write to WAL directory: %v", err) } if cfg.ShouldDiscover() { plog.Warningf("discovery token ignored since a cluster has already been initialized. Valid log found at %q", cfg.WALDir()) } snapshot, err = ss.Load() if err != nil && err != snap.ErrNoSnapshot { return nil, err } if snapshot != nil { if err = st.Recovery(snapshot.Data); err != nil { plog.Panicf("recovered store from snapshot error: %v", err) } plog.Infof("recovered store from snapshot at index %d", snapshot.Metadata.Index) } cfg.Print() if !cfg.ForceNewCluster { id, cl, n, s, w = restartNode(cfg, snapshot) } else { id, cl, n, s, w = restartAsStandaloneNode(cfg, snapshot) } cl.SetStore(st) cl.SetBackend(be) cl.Recover(api.UpdateCapability) if cl.Version() != nil && !cl.Version().LessThan(semver.Version{Major: 3}) && !beExist { os.RemoveAll(bepath) return nil, fmt.Errorf("database file (%v) of the backend is missing", bepath) } default: return nil, fmt.Errorf("unsupported bootstrap config") } if terr := fileutil.TouchDirAll(cfg.MemberDir()); terr != nil { return nil, fmt.Errorf("cannot access member directory: %v", terr) } sstats := &stats.ServerStats{ Name: cfg.Name, ID: id.String(), } sstats.Initialize() lstats := stats.NewLeaderStats(id.String()) heartbeat := time.Duration(cfg.TickMs) * time.Millisecond srv = &EtcdServer{ readych: make(chan struct{}), Cfg: cfg, snapCount: cfg.SnapCount, errorc: make(chan error, 1), store: st, r: raftNode{ isIDRemoved: func(id uint64) bool { return cl.IsIDRemoved(types.ID(id)) }, Node: n, ticker: time.Tick(heartbeat), // set up contention detectors for raft heartbeat message. // expect to send a heartbeat within 2 heartbeat intervals. td: contention.NewTimeoutDetector(2 * heartbeat), heartbeat: heartbeat, raftStorage: s, storage: NewStorage(w, ss), msgSnapC: make(chan raftpb.Message, maxInFlightMsgSnap), readStateC: make(chan raft.ReadState, 1), }, id: id, attributes: membership.Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()}, cluster: cl, stats: sstats, lstats: lstats, SyncTicker: time.Tick(500 * time.Millisecond), peerRt: prt, reqIDGen: idutil.NewGenerator(uint16(id), time.Now()), forceVersionC: make(chan struct{}), } srv.applyV2 = &applierV2store{store: srv.store, cluster: srv.cluster} srv.be = be minTTL := time.Duration((3*cfg.ElectionTicks)/2) * heartbeat // always recover lessor before kv. When we recover the mvcc.KV it will reattach keys to its leases. // If we recover mvcc.KV first, it will attach the keys to the wrong lessor before it recovers. srv.lessor = lease.NewLessor(srv.be, int64(math.Ceil(minTTL.Seconds()))) srv.kv = mvcc.New(srv.be, srv.lessor, &srv.consistIndex) if beExist { kvindex := srv.kv.ConsistentIndex() // TODO: remove kvindex != 0 checking when we do not expect users to upgrade // etcd from pre-3.0 release. if snapshot != nil && kvindex < snapshot.Metadata.Index { if kvindex != 0 { return nil, fmt.Errorf("database file (%v index %d) does not match with snapshot (index %d).", bepath, kvindex, snapshot.Metadata.Index) } plog.Warningf("consistent index never saved (snapshot index=%d)", snapshot.Metadata.Index) } } srv.consistIndex.setConsistentIndex(srv.kv.ConsistentIndex()) srv.authStore = auth.NewAuthStore(srv.be) if h := cfg.AutoCompactionRetention; h != 0 { srv.compactor = compactor.NewPeriodic(h, srv.kv, srv) srv.compactor.Run() } srv.applyV3Base = &applierV3backend{srv} if err = srv.restoreAlarms(); err != nil { return nil, err } // TODO: move transport initialization near the definition of remote tr := &rafthttp.Transport{ TLSInfo: cfg.PeerTLSInfo, DialTimeout: cfg.peerDialTimeout(), ID: id, URLs: cfg.PeerURLs, ClusterID: cl.ID(), Raft: srv, Snapshotter: ss, ServerStats: sstats, LeaderStats: lstats, ErrorC: srv.errorc, } if err = tr.Start(); err != nil { return nil, err } // add all remotes into transport for _, m := range remotes { if m.ID != id { tr.AddRemote(m.ID, m.PeerURLs) } } for _, m := range cl.Members() { if m.ID != id { tr.AddPeer(m.ID, m.PeerURLs) } } srv.r.transport = tr return srv, nil }
func main() { flag.Parse() if len(*migrateDatadir) == 0 { glog.Fatal("need to set '--data-dir'") } dbpath := path.Join(*migrateDatadir, "member", "snap", "db") // etcd3 store backend. We will use it to parse v3 data files and extract information. be := backend.NewDefaultBackend(dbpath) tx := be.BatchTx() // etcd2 store backend. We will use v3 data to update this and then save snapshot to disk. st := store.New(etcdserver.StoreClusterPrefix, etcdserver.StoreKeysPrefix) expireTime := time.Now().Add(*ttl) tx.Lock() err := tx.UnsafeForEach([]byte("key"), func(k, v []byte) error { kv := &mvccpb.KeyValue{} kv.Unmarshal(v) // This is compact key. if !strings.HasPrefix(string(kv.Key), "/") { return nil } ttlOpt := store.TTLOptionSet{} if kv.Lease != 0 { ttlOpt = store.TTLOptionSet{ExpireTime: expireTime} } if !isTombstone(k) { sk := path.Join(strings.Trim(etcdserver.StoreKeysPrefix, "/"), string(kv.Key)) _, err := st.Set(sk, false, string(kv.Value), ttlOpt) if err != nil { return err } } else { st.Delete(string(kv.Key), false, false) } return nil }) if err != nil { glog.Fatal(err) } tx.Unlock() if err := traverseAndDeleteEmptyDir(st, "/"); err != nil { glog.Fatal(err) } // rebuild cluster state. metadata, hardstate, oldSt, err := rebuild(*migrateDatadir) if err != nil { glog.Fatal(err) } // In the following, it's low level logic that saves metadata and data into v2 snapshot. backupPath := *migrateDatadir + ".rollback.backup" if err := os.Rename(*migrateDatadir, backupPath); err != nil { glog.Fatal(err) } if err := os.MkdirAll(path.Join(*migrateDatadir, "member", "snap"), 0700); err != nil { glog.Fatal(err) } walDir := path.Join(*migrateDatadir, "member", "wal") w, err := wal.Create(walDir, metadata) if err != nil { glog.Fatal(err) } err = w.SaveSnapshot(walpb.Snapshot{Index: hardstate.Commit, Term: hardstate.Term}) if err != nil { glog.Fatal(err) } w.Close() event, err := oldSt.Get(etcdserver.StoreClusterPrefix, true, false) if err != nil { glog.Fatal(err) } // nodes (members info) for ConfState nodes := []uint64{} traverseMetadata(event.Node, func(n *store.NodeExtern) { if n.Key != etcdserver.StoreClusterPrefix { // update store metadata v := "" if !n.Dir { v = *n.Value } if n.Key == path.Join(etcdserver.StoreClusterPrefix, "version") { v = rollbackVersion } if _, err := st.Set(n.Key, n.Dir, v, store.TTLOptionSet{}); err != nil { glog.Fatal(err) } // update nodes fields := strings.Split(n.Key, "/") if len(fields) == 4 && fields[2] == "members" { nodeID, err := strconv.ParseUint(fields[3], 16, 64) if err != nil { glog.Fatalf("failed to parse member ID (%s): %v", fields[3], err) } nodes = append(nodes, nodeID) } } }) data, err := st.Save() if err != nil { glog.Fatal(err) } raftSnap := raftpb.Snapshot{ Data: data, Metadata: raftpb.SnapshotMetadata{ Index: hardstate.Commit, Term: hardstate.Term, ConfState: raftpb.ConfState{ Nodes: nodes, }, }, } snapshotter := snap.New(path.Join(*migrateDatadir, "member", "snap")) if err := snapshotter.SaveSnap(raftSnap); err != nil { glog.Fatal(err) } fmt.Println("Finished successfully") }
// makeDB copies the database snapshot to the snapshot directory func makeDB(snapdir, dbfile string) { f, ferr := os.OpenFile(dbfile, os.O_RDONLY, 0600) if ferr != nil { ExitWithError(ExitInvalidInput, ferr) } defer f.Close() // get snapshot integrity hash if _, err := f.Seek(-sha256.Size, os.SEEK_END); err != nil { ExitWithError(ExitIO, err) } sha := make([]byte, sha256.Size) if _, err := f.Read(sha); err != nil { ExitWithError(ExitIO, err) } if _, err := f.Seek(0, os.SEEK_SET); err != nil { ExitWithError(ExitIO, err) } if err := os.MkdirAll(snapdir, 0755); err != nil { ExitWithError(ExitIO, err) } dbpath := path.Join(snapdir, "db") db, dberr := os.OpenFile(dbpath, os.O_RDWR|os.O_CREATE, 0600) if dberr != nil { ExitWithError(ExitIO, dberr) } if _, err := io.Copy(db, f); err != nil { ExitWithError(ExitIO, err) } // truncate away integrity hash, if any. off, serr := db.Seek(0, os.SEEK_END) if serr != nil { ExitWithError(ExitIO, serr) } hasHash := (off % 512) == sha256.Size if hasHash { if err := db.Truncate(off - sha256.Size); err != nil { ExitWithError(ExitIO, err) } } if !hasHash && !skipHashCheck { err := fmt.Errorf("snapshot missing hash but --skip-hash-check=false") ExitWithError(ExitBadArgs, err) } if hasHash && !skipHashCheck { // check for match if _, err := db.Seek(0, os.SEEK_SET); err != nil { ExitWithError(ExitIO, err) } h := sha256.New() if _, err := io.Copy(h, db); err != nil { ExitWithError(ExitIO, err) } dbsha := h.Sum(nil) if !reflect.DeepEqual(sha, dbsha) { err := fmt.Errorf("expected sha256 %v, got %v", sha, dbsha) ExitWithError(ExitInvalidInput, err) } } // db hash is OK, can now modify DB so it can be part of a new cluster db.Close() // update consistentIndex so applies go through on etcdserver despite // having a new raft instance be := backend.NewDefaultBackend(dbpath) s := mvcc.NewStore(be, nil, &initIndex{}) id := s.TxnBegin() btx := be.BatchTx() del := func(k, v []byte) error { _, _, err := s.TxnDeleteRange(id, k, nil) return err } // delete stored members from old cluster since using new members btx.UnsafeForEach([]byte("members"), del) btx.UnsafeForEach([]byte("members_removed"), del) // trigger write-out of new consistent index s.TxnEnd(id) s.Commit() s.Close() }
func getHash(dbPath string) (hash uint32, err error) { b := backend.NewDefaultBackend(dbPath) return b.Hash(mvcc.DefaultIgnores) }