func (l Lease) removeFrom(b backend.Backend) { key := int64ToBytes(int64(l.ID)) b.BatchTx().Lock() b.BatchTx().UnsafeDelete(leaseBucketName, key) b.BatchTx().Unlock() }
func mustSaveClusterVersionToBackend(be backend.Backend, ver *semver.Version) { ckey := backendClusterVersionKey() tx := be.BatchTx() tx.Lock() defer tx.Unlock() tx.UnsafePut(clusterBucketName, ckey, []byte(ver.String())) }
func mustCreateBackendBuckets(be backend.Backend) { tx := be.BatchTx() tx.Lock() defer tx.Unlock() tx.UnsafeCreateBucket(membersBucketName) tx.UnsafeCreateBucket(membersRemovedBuckedName) tx.UnsafeCreateBucket(clusterBucketName) }
func mustDeleteMemberFromBackend(be backend.Backend, id types.ID) { mkey := backendMemberKey(id) tx := be.BatchTx() tx.Lock() tx.UnsafeDelete(membersBucketName, mkey) tx.UnsafePut(membersRemovedBuckedName, mkey, []byte("removed")) tx.Unlock() }
func mustSaveMemberToBackend(be backend.Backend, m *Member) { mkey := backendMemberKey(m.ID) mvalue, err := json.Marshal(m) if err != nil { plog.Panicf("marshal raftAttributes should never fail: %v", err) } tx := be.BatchTx() tx.Lock() tx.UnsafePut(membersBucketName, mkey, mvalue) tx.Unlock() }
func (l Lease) persistTo(b backend.Backend) { key := int64ToBytes(int64(l.ID)) lpb := leasepb.Lease{ID: int64(l.ID), TTL: int64(l.TTL)} val, err := lpb.Marshal() if err != nil { panic("failed to marshal lease proto item") } b.BatchTx().Lock() b.BatchTx().UnsafePut(leaseBucketName, key, val) b.BatchTx().Unlock() }
func WriteKV(be backend.Backend, kv mvccpb.KeyValue) { ibytes := newRevBytes() revToBytes(revision{main: kv.ModRevision}, ibytes) d, err := kv.Marshal() if err != nil { plog.Fatalf("cannot marshal event: %v", err) } be.BatchTx().Lock() be.BatchTx().UnsafePut(keyBucketName, ibytes, d) be.BatchTx().Unlock() }
func NewAuthStore(be backend.Backend) *authStore { tx := be.BatchTx() tx.Lock() tx.UnsafeCreateBucket(authBucketName) tx.UnsafeCreateBucket(authUsersBucketName) tx.UnsafeCreateBucket(authRolesBucketName) tx.Unlock() be.ForceCommit() return &authStore{ be: be, } }
func NewAuthStore(be backend.Backend) *authStore { tx := be.BatchTx() tx.Lock() tx.UnsafeCreateBucket(authBucketName) tx.UnsafeCreateBucket(authUsersBucketName) tx.UnsafeCreateBucket(authRolesBucketName) tx.Unlock() be.ForceCommit() return &authStore{ be: be, simpleTokens: make(map[string]string), } }
func (as *authStore) Recover(be backend.Backend) { enabled := false as.be = be tx := be.BatchTx() tx.Lock() _, vs := tx.UnsafeRange(authBucketName, enableFlagKey, nil, 0) if len(vs) == 1 { if bytes.Equal(vs[0], authEnabled) { enabled = true } } tx.Unlock() as.enabledMu.Lock() as.enabled = enabled as.enabledMu.Unlock() }
func (s *store) Restore(b backend.Backend) error { s.mu.Lock() defer s.mu.Unlock() close(s.stopc) s.fifoSched.Stop() s.b = b s.kvindex = newTreeIndex() s.currentRev = revision{main: 1} s.compactMainRev = -1 s.tx = b.BatchTx() s.txnID = -1 s.fifoSched = schedule.NewFIFOScheduler() s.stopc = make(chan struct{}) return s.restore() }
func UpdateConsistentIndex(be backend.Backend, index uint64) { tx := be.BatchTx() tx.Lock() defer tx.Unlock() var oldi uint64 _, vs := tx.UnsafeRange(metaBucketName, consistentIndexKeyName, nil, 0) if len(vs) != 0 { oldi = binary.BigEndian.Uint64(vs[0]) } if index <= oldi { return } bs := make([]byte, 8) binary.BigEndian.PutUint64(bs, index) tx.UnsafePut(metaBucketName, consistentIndexKeyName, bs) }
func prepareBackend() backend.Backend { var be backend.Backend bch := make(chan struct{}) dbpath := path.Join(migrateDatadir, "member", "snap", "db") go func() { defer close(bch) be = backend.New(dbpath, time.Second, 10000) }() select { case <-bch: case <-time.After(time.Second): fmt.Fprintf(os.Stderr, "waiting for etcd to close and release its lock on %q\n", dbpath) <-bch } tx := be.BatchTx() tx.Lock() tx.UnsafeCreateBucket([]byte("key")) tx.UnsafeCreateBucket([]byte("meta")) tx.Unlock() return be }
// NewServer creates a new EtcdServer from the supplied configuration. The // configuration is considered static for the lifetime of the EtcdServer. func NewServer(cfg *ServerConfig) (srv *EtcdServer, err error) { st := store.New(StoreClusterPrefix, StoreKeysPrefix) var ( w *wal.WAL n raft.Node s *raft.MemoryStorage id types.ID cl *membership.RaftCluster ) if terr := fileutil.TouchDirAll(cfg.DataDir); terr != nil { return nil, fmt.Errorf("cannot access data directory: %v", terr) } haveWAL := wal.Exist(cfg.WALDir()) if err = fileutil.TouchDirAll(cfg.SnapDir()); err != nil { plog.Fatalf("create snapshot directory error: %v", err) } ss := snap.New(cfg.SnapDir()) bepath := path.Join(cfg.SnapDir(), databaseFilename) beExist := fileutil.Exist(bepath) var be backend.Backend beOpened := make(chan struct{}) go func() { be = backend.NewDefaultBackend(bepath) beOpened <- struct{}{} }() select { case <-beOpened: case <-time.After(time.Second): plog.Warningf("another etcd process is running with the same data dir and holding the file lock.") plog.Warningf("waiting for it to exit before starting...") <-beOpened } defer func() { if err != nil { be.Close() } }() prt, err := rafthttp.NewRoundTripper(cfg.PeerTLSInfo, cfg.peerDialTimeout()) if err != nil { return nil, err } var ( remotes []*membership.Member snapshot *raftpb.Snapshot ) switch { case !haveWAL && !cfg.NewCluster: if err = cfg.VerifyJoinExisting(); err != nil { return nil, err } cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialPeerURLsMap) if err != nil { return nil, err } existingCluster, gerr := GetClusterFromRemotePeers(getRemotePeerURLs(cl, cfg.Name), prt) if gerr != nil { return nil, fmt.Errorf("cannot fetch cluster info from peer urls: %v", gerr) } if err = membership.ValidateClusterAndAssignIDs(cl, existingCluster); err != nil { return nil, fmt.Errorf("error validating peerURLs %s: %v", existingCluster, err) } if !isCompatibleWithCluster(cl, cl.MemberByName(cfg.Name).ID, prt) { return nil, fmt.Errorf("incompatible with current running cluster") } remotes = existingCluster.Members() cl.SetID(existingCluster.ID()) cl.SetStore(st) cl.SetBackend(be) cfg.Print() id, n, s, w = startNode(cfg, cl, nil) case !haveWAL && cfg.NewCluster: if err = cfg.VerifyBootstrap(); err != nil { return nil, err } cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialPeerURLsMap) if err != nil { return nil, err } m := cl.MemberByName(cfg.Name) if isMemberBootstrapped(cl, cfg.Name, prt, cfg.bootstrapTimeout()) { return nil, fmt.Errorf("member %s has already been bootstrapped", m.ID) } if cfg.ShouldDiscover() { var str string str, err = discovery.JoinCluster(cfg.DiscoveryURL, cfg.DiscoveryProxy, m.ID, cfg.InitialPeerURLsMap.String()) if err != nil { return nil, &DiscoveryError{Op: "join", Err: err} } var urlsmap types.URLsMap urlsmap, err = types.NewURLsMap(str) if err != nil { return nil, err } if checkDuplicateURL(urlsmap) { return nil, fmt.Errorf("discovery cluster %s has duplicate url", urlsmap) } if cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, urlsmap); err != nil { return nil, err } } cl.SetStore(st) cl.SetBackend(be) cfg.PrintWithInitial() id, n, s, w = startNode(cfg, cl, cl.MemberIDs()) case haveWAL: if err = fileutil.IsDirWriteable(cfg.MemberDir()); err != nil { return nil, fmt.Errorf("cannot write to member directory: %v", err) } if err = fileutil.IsDirWriteable(cfg.WALDir()); err != nil { return nil, fmt.Errorf("cannot write to WAL directory: %v", err) } if cfg.ShouldDiscover() { plog.Warningf("discovery token ignored since a cluster has already been initialized. Valid log found at %q", cfg.WALDir()) } snapshot, err = ss.Load() if err != nil && err != snap.ErrNoSnapshot { return nil, err } if snapshot != nil { if err = st.Recovery(snapshot.Data); err != nil { plog.Panicf("recovered store from snapshot error: %v", err) } plog.Infof("recovered store from snapshot at index %d", snapshot.Metadata.Index) } cfg.Print() if !cfg.ForceNewCluster { id, cl, n, s, w = restartNode(cfg, snapshot) } else { id, cl, n, s, w = restartAsStandaloneNode(cfg, snapshot) } cl.SetStore(st) cl.SetBackend(be) cl.Recover(api.UpdateCapability) if cl.Version() != nil && !cl.Version().LessThan(semver.Version{Major: 3}) && !beExist { os.RemoveAll(bepath) return nil, fmt.Errorf("database file (%v) of the backend is missing", bepath) } default: return nil, fmt.Errorf("unsupported bootstrap config") } if terr := fileutil.TouchDirAll(cfg.MemberDir()); terr != nil { return nil, fmt.Errorf("cannot access member directory: %v", terr) } sstats := &stats.ServerStats{ Name: cfg.Name, ID: id.String(), } sstats.Initialize() lstats := stats.NewLeaderStats(id.String()) heartbeat := time.Duration(cfg.TickMs) * time.Millisecond srv = &EtcdServer{ readych: make(chan struct{}), Cfg: cfg, snapCount: cfg.SnapCount, errorc: make(chan error, 1), store: st, r: raftNode{ isIDRemoved: func(id uint64) bool { return cl.IsIDRemoved(types.ID(id)) }, Node: n, ticker: time.Tick(heartbeat), // set up contention detectors for raft heartbeat message. // expect to send a heartbeat within 2 heartbeat intervals. td: contention.NewTimeoutDetector(2 * heartbeat), heartbeat: heartbeat, raftStorage: s, storage: NewStorage(w, ss), msgSnapC: make(chan raftpb.Message, maxInFlightMsgSnap), readStateC: make(chan raft.ReadState, 1), }, id: id, attributes: membership.Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()}, cluster: cl, stats: sstats, lstats: lstats, SyncTicker: time.Tick(500 * time.Millisecond), peerRt: prt, reqIDGen: idutil.NewGenerator(uint16(id), time.Now()), forceVersionC: make(chan struct{}), } srv.applyV2 = &applierV2store{store: srv.store, cluster: srv.cluster} srv.be = be minTTL := time.Duration((3*cfg.ElectionTicks)/2) * heartbeat // always recover lessor before kv. When we recover the mvcc.KV it will reattach keys to its leases. // If we recover mvcc.KV first, it will attach the keys to the wrong lessor before it recovers. srv.lessor = lease.NewLessor(srv.be, int64(math.Ceil(minTTL.Seconds()))) srv.kv = mvcc.New(srv.be, srv.lessor, &srv.consistIndex) if beExist { kvindex := srv.kv.ConsistentIndex() // TODO: remove kvindex != 0 checking when we do not expect users to upgrade // etcd from pre-3.0 release. if snapshot != nil && kvindex < snapshot.Metadata.Index { if kvindex != 0 { return nil, fmt.Errorf("database file (%v index %d) does not match with snapshot (index %d).", bepath, kvindex, snapshot.Metadata.Index) } plog.Warningf("consistent index never saved (snapshot index=%d)", snapshot.Metadata.Index) } } srv.consistIndex.setConsistentIndex(srv.kv.ConsistentIndex()) srv.authStore = auth.NewAuthStore(srv.be) if h := cfg.AutoCompactionRetention; h != 0 { srv.compactor = compactor.NewPeriodic(h, srv.kv, srv) srv.compactor.Run() } srv.applyV3Base = &applierV3backend{srv} if err = srv.restoreAlarms(); err != nil { return nil, err } // TODO: move transport initialization near the definition of remote tr := &rafthttp.Transport{ TLSInfo: cfg.PeerTLSInfo, DialTimeout: cfg.peerDialTimeout(), ID: id, URLs: cfg.PeerURLs, ClusterID: cl.ID(), Raft: srv, Snapshotter: ss, ServerStats: sstats, LeaderStats: lstats, ErrorC: srv.errorc, } if err = tr.Start(); err != nil { return nil, err } // add all remotes into transport for _, m := range remotes { if m.ID != id { tr.AddRemote(m.ID, m.PeerURLs) } } for _, m := range cl.Members() { if m.ID != id { tr.AddPeer(m.ID, m.PeerURLs) } } srv.r.transport = tr return srv, nil }
func cleanup(s KV, b backend.Backend, path string) { s.Close() b.Close() os.Remove(path) }