示例#1
0
文件: kvstore.go 项目: zhuleilei/etcd
func (s *store) Restore() error {
	s.mu.Lock()
	defer s.mu.Unlock()

	min, max := newRevBytes(), newRevBytes()
	revToBytes(revision{}, min)
	revToBytes(revision{main: math.MaxInt64, sub: math.MaxInt64}, max)

	// restore index
	tx := s.b.BatchTx()
	tx.Lock()
	_, finishedCompactBytes := tx.UnsafeRange(metaBucketName, finishedCompactKeyName, nil, 0)
	if len(finishedCompactBytes) != 0 {
		s.compactMainRev = bytesToRev(finishedCompactBytes[0]).main
		log.Printf("storage: restore compact to %d", s.compactMainRev)
	}

	// TODO: limit N to reduce max memory usage
	keys, vals := tx.UnsafeRange(keyBucketName, min, max, 0)
	for i, key := range keys {
		var kv storagepb.KeyValue
		if err := kv.Unmarshal(vals[i]); err != nil {
			log.Fatalf("storage: cannot unmarshal event: %v", err)
		}

		rev := bytesToRev(key[:revBytesLen])

		// restore index
		switch {
		case isTombstone(key):
			s.kvindex.Tombstone(kv.Key, rev)
		default:
			s.kvindex.Restore(kv.Key, revision{kv.CreateRevision, 0}, rev, kv.Version)
		}

		// update revision
		s.currentRev = rev
	}

	_, scheduledCompactBytes := tx.UnsafeRange(metaBucketName, scheduledCompactKeyName, nil, 0)
	if len(scheduledCompactBytes) != 0 {
		scheduledCompact := bytesToRev(scheduledCompactBytes[0]).main
		if scheduledCompact > s.compactMainRev {
			log.Printf("storage: resume scheduled compaction at %d", scheduledCompact)
			go s.Compact(scheduledCompact)
		}
	}

	tx.Unlock()

	return nil
}
示例#2
0
// range is a keyword in Go, add Keys suffix.
func (s *store) rangeKeys(key, end []byte, limit, rangeRev int64) (kvs []storagepb.KeyValue, curRev int64, err error) {
	curRev = int64(s.currentRev.main)
	if s.currentRev.sub > 0 {
		curRev += 1
	}

	if rangeRev > curRev {
		return nil, s.currentRev.main, ErrFutureRev
	}
	var rev int64
	if rangeRev <= 0 {
		rev = curRev
	} else {
		rev = rangeRev
	}
	if rev <= s.compactMainRev {
		return nil, 0, ErrCompacted
	}

	_, revpairs := s.kvindex.Range(key, end, int64(rev))
	if len(revpairs) == 0 {
		return nil, curRev, nil
	}

	for _, revpair := range revpairs {
		start, end := revBytesRange(revpair)

		_, vs := s.tx.UnsafeRange(keyBucketName, start, end, 0)
		if len(vs) != 1 {
			log.Fatalf("storage: range cannot find rev (%d,%d)", revpair.main, revpair.sub)
		}

		var kv storagepb.KeyValue
		if err := kv.Unmarshal(vs[0]); err != nil {
			log.Fatalf("storage: cannot unmarshal event: %v", err)
		}
		kvs = append(kvs, kv)
		if limit > 0 && len(kvs) >= int(limit) {
			break
		}
	}
	return kvs, curRev, nil
}
示例#3
0
// kvsToEvents gets all events for the watchers from all key-value pairs
func kvsToEvents(wg *watcherGroup, revs, vals [][]byte) (evs []storagepb.Event) {
	for i, v := range vals {
		var kv storagepb.KeyValue
		if err := kv.Unmarshal(v); err != nil {
			log.Panicf("storage: cannot unmarshal event: %v", err)
		}

		if !wg.contains(string(kv.Key)) {
			continue
		}

		ty := storagepb.PUT
		if isTombstone(revs[i]) {
			ty = storagepb.DELETE
			// patch in mod revision so watchers won't skip
			kv.ModRevision = bytesToRev(revs[i]).main
		}
		evs = append(evs, storagepb.Event{Kv: &kv, Type: ty})
	}
	return evs
}
示例#4
0
func (s *store) delete(key []byte, rev revision) {
	mainrev := s.currentRev.main + 1

	ibytes := newRevBytes()
	revToBytes(revision{main: mainrev, sub: s.currentRev.sub}, ibytes)
	ibytes = appendMarkTombstone(ibytes)

	kv := storagepb.KeyValue{
		Key: key,
	}

	d, err := kv.Marshal()
	if err != nil {
		log.Fatalf("storage: cannot marshal event: %v", err)
	}

	s.tx.UnsafePut(keyBucketName, ibytes, d)
	err = s.kvindex.Tombstone(key, revision{main: mainrev, sub: s.currentRev.sub})
	if err != nil {
		log.Fatalf("storage: cannot tombstone an existing key (%s): %v", string(key), err)
	}
	s.changes = append(s.changes, kv)
	s.currentRev.sub += 1

	ibytes = newRevBytes()
	revToBytes(rev, ibytes)
	_, vs := s.tx.UnsafeRange(keyBucketName, ibytes, nil, 0)

	kv.Reset()
	if err = kv.Unmarshal(vs[0]); err != nil {
		log.Fatalf("storage: cannot unmarshal value: %v", err)
	}

	if lease.LeaseID(kv.Lease) != lease.NoLease {
		err = s.le.Detach(lease.LeaseID(kv.Lease), []lease.LeaseItem{{Key: string(kv.Key)}})
		if err != nil {
			log.Fatalf("storage: cannot detach %v", err)
		}
	}
}
示例#5
0
文件: kvstore.go 项目: kimsh92/etcd
// RangeHistory ranges the history from key to end starting from startRev.
// If `end` is nil, the request only observes the events on key.
// If `end` is not nil, it observes the events on key range [key, range_end).
// Limit limits the number of events returned.
// If startRev <=0, rangeEvents returns events from the beginning of uncompacted history.
//
// If the required start rev is compacted, ErrCompacted will be returned.
// If the required start rev has not happened, ErrFutureRev will be returned.
//
// RangeHistory returns revision bytes slice and key-values that satisfy the requirement (0 <= n <= limit).
// If history in the revision range has not all happened, it returns immeidately
// what is available.
// It also returns nextRev which indicates the start revision used for the following
// RangeEvents call. The nextRev could be smaller than the given endRev if the store
// has not progressed so far or it hits the event limit.
//
// TODO: return byte slices instead of keyValues to avoid meaningless encode and decode.
// This also helps to return raw (key, val) pair directly to make API consistent.
func (s *store) RangeHistory(key, end []byte, limit, startRev int64) (revbs [][]byte, kvs []storagepb.KeyValue, nextRev int64, err error) {
	s.mu.Lock()
	defer s.mu.Unlock()

	if startRev > 0 && startRev <= s.compactMainRev {
		return nil, nil, 0, ErrCompacted
	}
	if startRev > s.currentRev.main {
		return nil, nil, 0, ErrFutureRev
	}

	revs := s.kvindex.RangeSince(key, end, startRev)
	if len(revs) == 0 {
		return nil, nil, s.currentRev.main + 1, nil
	}

	tx := s.b.BatchTx()
	tx.Lock()
	defer tx.Unlock()
	// fetch events from the backend using revisions
	for _, rev := range revs {
		start, end := revBytesRange(rev)

		ks, vs := tx.UnsafeRange(keyBucketName, start, end, 0)
		if len(vs) != 1 {
			log.Fatalf("storage: range cannot find rev (%d,%d)", rev.main, rev.sub)
		}

		var kv storagepb.KeyValue
		if err := kv.Unmarshal(vs[0]); err != nil {
			log.Fatalf("storage: cannot unmarshal event: %v", err)
		}
		revbs = append(revbs, ks[0])
		kvs = append(kvs, kv)
		if limit > 0 && len(kvs) >= int(limit) {
			return revbs, kvs, rev.main + 1, nil
		}
	}
	return revbs, kvs, s.currentRev.main + 1, nil
}
示例#6
0
// kvsToEvents gets all events for the watchers from all key-value pairs
func kvsToEvents(revs, vals [][]byte, wsk watcherSetByKey, pfxs map[string]struct{}) (evs []storagepb.Event) {
	for i, v := range vals {
		var kv storagepb.KeyValue
		if err := kv.Unmarshal(v); err != nil {
			log.Panicf("storage: cannot unmarshal event: %v", err)
		}

		k := string(kv.Key)
		if _, ok := wsk.getSetByKey(k); !ok && !matchPrefix(k, pfxs) {
			continue
		}

		ty := storagepb.PUT
		if isTombstone(revs[i]) {
			ty = storagepb.DELETE
			// patch in mod revision so watchers won't skip
			kv.ModRevision = bytesToRev(revs[i]).main
		}
		evs = append(evs, storagepb.Event{Kv: &kv, Type: ty})
	}
	return evs
}
示例#7
0
// syncWatchers periodically syncs unsynced watchers by: Iterate all unsynced
// watchers to get the minimum revision within its range, skipping the
// watcher if its current revision is behind the compact revision of the
// store. And use this minimum revision to get all key-value pairs. Then send
// those events to watchers.
func (s *watchableStore) syncWatchers() {
	s.store.mu.Lock()
	defer s.store.mu.Unlock()

	if len(s.unsynced) == 0 {
		return
	}

	// in order to find key-value pairs from unsynced watchers, we need to
	// find min revision index, and these revisions can be used to
	// query the backend store of key-value pairs
	minRev := int64(math.MaxInt64)

	curRev := s.store.currentRev.main
	compactionRev := s.store.compactMainRev

	prefixes := make(map[string]struct{})
	for _, set := range s.unsynced {
		for w := range set {
			k := string(w.key)

			if w.cur > curRev {
				panic("watcher current revision should not exceed current revision")
			}

			if w.cur < compactionRev {
				select {
				case w.ch <- WatchResponse{WatchID: w.id, CompactRevision: compactionRev}:
					s.unsynced.delete(w)
				default:
					// retry next time
				}
				continue
			}

			if minRev >= w.cur {
				minRev = w.cur
			}

			if w.prefix {
				prefixes[k] = struct{}{}
			}
		}
	}

	minBytes, maxBytes := newRevBytes(), newRevBytes()
	revToBytes(revision{main: minRev}, minBytes)
	revToBytes(revision{main: curRev + 1}, maxBytes)

	// UnsafeRange returns keys and values. And in boltdb, keys are revisions.
	// values are actual key-value pairs in backend.
	tx := s.store.b.BatchTx()
	tx.Lock()
	ks, vs := tx.UnsafeRange(keyBucketName, minBytes, maxBytes, 0)

	evs := []storagepb.Event{}

	// get the list of all events from all key-value pairs
	for i, v := range vs {
		var kv storagepb.KeyValue
		if err := kv.Unmarshal(v); err != nil {
			log.Panicf("storage: cannot unmarshal event: %v", err)
		}

		k := string(kv.Key)
		if _, ok := s.unsynced.getSetByKey(k); !ok && !matchPrefix(k, prefixes) {
			continue
		}

		var ev storagepb.Event
		switch {
		case isTombstone(ks[i]):
			ev.Type = storagepb.DELETE
		default:
			ev.Type = storagepb.PUT
		}
		ev.Kv = &kv

		evs = append(evs, ev)
	}
	tx.Unlock()

	for w, es := range newWatcherToEventMap(s.unsynced, evs) {
		select {
		// s.store.Rev also uses Lock, so just return directly
		case w.ch <- WatchResponse{WatchID: w.id, Events: es, Revision: s.store.currentRev.main}:
			pendingEventsGauge.Add(float64(len(es)))
		default:
			// TODO: handle the full unsynced watchers.
			// continue to process other watchers for now, the full ones
			// will be processed next time and hopefully it will not be full.
			continue
		}
		w.cur = curRev
		s.synced.add(w)
		s.unsynced.delete(w)
	}

	slowWatcherGauge.Set(float64(len(s.unsynced)))
}
示例#8
0
func (s *store) put(key, value []byte, leaseID lease.LeaseID) {
	rev := s.currentRev.main + 1
	c := rev
	oldLease := lease.NoLease

	// if the key exists before, use its previous created and
	// get its previous leaseID
	grev, created, ver, err := s.kvindex.Get(key, rev)
	if err == nil {
		c = created.main
		ibytes := newRevBytes()
		revToBytes(grev, ibytes)
		_, vs := s.tx.UnsafeRange(keyBucketName, ibytes, nil, 0)
		var kv storagepb.KeyValue
		if err = kv.Unmarshal(vs[0]); err != nil {
			log.Fatalf("storage: cannot unmarshal value: %v", err)
		}
		oldLease = lease.LeaseID(kv.Lease)
	}

	ibytes := newRevBytes()
	revToBytes(revision{main: rev, sub: s.currentRev.sub}, ibytes)

	ver = ver + 1
	kv := storagepb.KeyValue{
		Key:            key,
		Value:          value,
		CreateRevision: c,
		ModRevision:    rev,
		Version:        ver,
		Lease:          int64(leaseID),
	}

	d, err := kv.Marshal()
	if err != nil {
		log.Fatalf("storage: cannot marshal event: %v", err)
	}

	s.tx.UnsafePut(keyBucketName, ibytes, d)
	s.kvindex.Put(key, revision{main: rev, sub: s.currentRev.sub})
	s.changes = append(s.changes, kv)
	s.currentRev.sub += 1

	if oldLease != lease.NoLease {
		if s.le == nil {
			panic("no lessor to detach lease")
		}

		err = s.le.Detach(oldLease, []lease.LeaseItem{{Key: string(key)}})
		if err != nil {
			panic("unexpected error from lease detach")
		}
	}

	if leaseID != lease.NoLease {
		if s.le == nil {
			panic("no lessor to attach lease")
		}

		err = s.le.Attach(leaseID, []lease.LeaseItem{{Key: string(key)}})
		if err != nil {
			panic("unexpected error from lease Attach")
		}
	}
}
示例#9
0
func (s *store) restore() error {
	min, max := newRevBytes(), newRevBytes()
	revToBytes(revision{main: 1}, min)
	revToBytes(revision{main: math.MaxInt64, sub: math.MaxInt64}, max)

	// restore index
	tx := s.b.BatchTx()
	tx.Lock()
	_, finishedCompactBytes := tx.UnsafeRange(metaBucketName, finishedCompactKeyName, nil, 0)
	if len(finishedCompactBytes) != 0 {
		s.compactMainRev = bytesToRev(finishedCompactBytes[0]).main
		log.Printf("storage: restore compact to %d", s.compactMainRev)
	}

	// TODO: limit N to reduce max memory usage
	keys, vals := tx.UnsafeRange(keyBucketName, min, max, 0)
	for i, key := range keys {
		var kv storagepb.KeyValue
		if err := kv.Unmarshal(vals[i]); err != nil {
			log.Fatalf("storage: cannot unmarshal event: %v", err)
		}

		rev := bytesToRev(key[:revBytesLen])

		// restore index
		switch {
		case isTombstone(key):
			s.kvindex.Tombstone(kv.Key, rev)
			if lease.LeaseID(kv.Lease) != lease.NoLease {
				err := s.le.Detach(lease.LeaseID(kv.Lease), []lease.LeaseItem{{Key: string(kv.Key)}})
				if err != nil && err != lease.ErrLeaseNotFound {
					log.Fatalf("storage: unexpected Detach error %v", err)
				}
			}
		default:
			s.kvindex.Restore(kv.Key, revision{kv.CreateRevision, 0}, rev, kv.Version)
			if lease.LeaseID(kv.Lease) != lease.NoLease {
				if s.le == nil {
					panic("no lessor to attach lease")
				}
				err := s.le.Attach(lease.LeaseID(kv.Lease), []lease.LeaseItem{{Key: string(kv.Key)}})
				// We are walking through the kv history here. It is possible that we attached a key to
				// the lease and the lease was revoked later.
				// Thus attaching an old version of key to a none existing lease is possible here, and
				// we should just ignore the error.
				if err != nil && err != lease.ErrLeaseNotFound {
					panic("unexpected Attach error")
				}
			}
		}

		// update revision
		s.currentRev = rev
	}

	_, scheduledCompactBytes := tx.UnsafeRange(metaBucketName, scheduledCompactKeyName, nil, 0)
	if len(scheduledCompactBytes) != 0 {
		scheduledCompact := bytesToRev(scheduledCompactBytes[0]).main
		if scheduledCompact > s.compactMainRev {
			log.Printf("storage: resume scheduled compaction at %d", scheduledCompact)
			go s.Compact(scheduledCompact)
		}
	}

	tx.Unlock()

	return nil
}
示例#10
0
// syncWatchers periodically syncs unsynced watchers by: Iterate all unsynced
// watchers to get the minimum revision within its range, skipping the
// watcher if its current revision is behind the compact revision of the
// store. And use this minimum revision to get all key-value pairs. Then send
// those events to watchers.
func (s *watchableStore) syncWatchers() {
	s.store.mu.Lock()
	defer s.store.mu.Unlock()

	if len(s.unsynced) == 0 {
		return
	}

	// in order to find key-value pairs from unsynced watchers, we need to
	// find min revision index, and these revisions can be used to
	// query the backend store of key-value pairs
	minRev := int64(math.MaxInt64)

	curRev := s.store.currentRev.main
	compactionRev := s.store.compactMainRev

	// TODO: change unsynced struct type same to this
	keyToUnsynced := make(map[string]map[*watcher]struct{})

	for w := range s.unsynced {
		k := string(w.key)

		if w.cur > curRev {
			panic("watcher current revision should not exceed current revision")
		}

		if w.cur < compactionRev {
			// TODO: return error compacted to that watcher instead of
			// just removing it sliently from unsynced.
			delete(s.unsynced, w)
			continue
		}

		if minRev >= w.cur {
			minRev = w.cur
		}

		if _, ok := keyToUnsynced[k]; !ok {
			keyToUnsynced[k] = make(map[*watcher]struct{})
		}
		keyToUnsynced[k][w] = struct{}{}
	}

	minBytes, maxBytes := newRevBytes(), newRevBytes()
	revToBytes(revision{main: minRev}, minBytes)
	revToBytes(revision{main: curRev + 1}, maxBytes)

	// UnsafeRange returns keys and values. And in boltdb, keys are revisions.
	// values are actual key-value pairs in backend.
	tx := s.store.b.BatchTx()
	tx.Lock()
	ks, vs := tx.UnsafeRange(keyBucketName, minBytes, maxBytes, 0)
	tx.Unlock()

	evs := []storagepb.Event{}

	// get the list of all events from all key-value pairs
	for i, v := range vs {
		var kv storagepb.KeyValue
		if err := kv.Unmarshal(v); err != nil {
			log.Panicf("storage: cannot unmarshal event: %v", err)
		}

		k := string(kv.Key)
		if _, ok := keyToUnsynced[k]; !ok {
			continue
		}

		var ev storagepb.Event
		switch {
		case isTombstone(ks[i]):
			ev.Type = storagepb.DELETE
		default:
			ev.Type = storagepb.PUT
		}
		ev.Kv = &kv

		evs = append(evs, ev)
	}

	for w, es := range newWatcherToEventMap(keyToUnsynced, evs) {
		wr := WatchResponse{WatchID: w.id, Events: es}
		select {
		case w.ch <- wr:
			pendingEventsGauge.Add(float64(len(es)))
		default:
			// TODO: handle the full unsynced watchers.
			// continue to process other watchers for now, the full ones
			// will be processed next time and hopefully it will not be full.
			continue
		}
		k := string(w.key)
		if err := unsafeAddWatcher(&s.synced, k, w); err != nil {
			log.Panicf("error unsafeAddWatcher (%v) for key %s", err, k)
		}
		delete(s.unsynced, w)
	}

	slowWatcherGauge.Set(float64(len(s.unsynced)))
}