示例#1
0
文件: kvstore.go 项目: zhuleilei/etcd
func (s *store) put(key, value []byte) {
	rev := s.currentRev.main + 1
	c := rev

	// if the key exists before, use its previous created
	_, created, ver, err := s.kvindex.Get(key, rev)
	if err == nil {
		c = created.main
	}

	ibytes := newRevBytes()
	revToBytes(revision{main: rev, sub: s.currentRev.sub}, ibytes)

	ver = ver + 1
	kv := storagepb.KeyValue{
		Key:            key,
		Value:          value,
		CreateRevision: c,
		ModRevision:    rev,
		Version:        ver,
	}

	d, err := kv.Marshal()
	if err != nil {
		log.Fatalf("storage: cannot marshal event: %v", err)
	}

	s.tx.UnsafePut(keyBucketName, ibytes, d)
	s.kvindex.Put(key, revision{main: rev, sub: s.currentRev.sub})
	s.currentRev.sub += 1
}
示例#2
0
func (s *store) delete(key []byte) {
	mainrev := s.currentRev.main + 1

	ibytes := newRevBytes()
	revToBytes(revision{main: mainrev, sub: s.currentRev.sub}, ibytes)
	ibytes = appendMarkTombstone(ibytes)

	kv := storagepb.KeyValue{
		Key: key,
	}

	d, err := kv.Marshal()
	if err != nil {
		log.Fatalf("storage: cannot marshal event: %v", err)
	}

	s.tx.UnsafePut(keyBucketName, ibytes, d)
	err = s.kvindex.Tombstone(key, revision{main: mainrev, sub: s.currentRev.sub})
	if err != nil {
		log.Fatalf("storage: cannot tombstone an existing key (%s): %v", string(key), err)
	}
	s.currentRev.sub += 1

	// TODO: De-attach keys from lease if necessary
}
示例#3
0
文件: kvstore.go 项目: zhuleilei/etcd
func (s *store) Restore() error {
	s.mu.Lock()
	defer s.mu.Unlock()

	min, max := newRevBytes(), newRevBytes()
	revToBytes(revision{}, min)
	revToBytes(revision{main: math.MaxInt64, sub: math.MaxInt64}, max)

	// restore index
	tx := s.b.BatchTx()
	tx.Lock()
	_, finishedCompactBytes := tx.UnsafeRange(metaBucketName, finishedCompactKeyName, nil, 0)
	if len(finishedCompactBytes) != 0 {
		s.compactMainRev = bytesToRev(finishedCompactBytes[0]).main
		log.Printf("storage: restore compact to %d", s.compactMainRev)
	}

	// TODO: limit N to reduce max memory usage
	keys, vals := tx.UnsafeRange(keyBucketName, min, max, 0)
	for i, key := range keys {
		var kv storagepb.KeyValue
		if err := kv.Unmarshal(vals[i]); err != nil {
			log.Fatalf("storage: cannot unmarshal event: %v", err)
		}

		rev := bytesToRev(key[:revBytesLen])

		// restore index
		switch {
		case isTombstone(key):
			s.kvindex.Tombstone(kv.Key, rev)
		default:
			s.kvindex.Restore(kv.Key, revision{kv.CreateRevision, 0}, rev, kv.Version)
		}

		// update revision
		s.currentRev = rev
	}

	_, scheduledCompactBytes := tx.UnsafeRange(metaBucketName, scheduledCompactKeyName, nil, 0)
	if len(scheduledCompactBytes) != 0 {
		scheduledCompact := bytesToRev(scheduledCompactBytes[0]).main
		if scheduledCompact > s.compactMainRev {
			log.Printf("storage: resume scheduled compaction at %d", scheduledCompact)
			go s.Compact(scheduledCompact)
		}
	}

	tx.Unlock()

	return nil
}
示例#4
0
func (s *store) put(key, value []byte, leaseID lease.LeaseID) {
	rev := s.currentRev.main + 1
	c := rev

	// if the key exists before, use its previous created
	_, created, ver, err := s.kvindex.Get(key, rev)
	if err == nil {
		c = created.main
	}

	ibytes := newRevBytes()
	revToBytes(revision{main: rev, sub: s.currentRev.sub}, ibytes)

	ver = ver + 1
	kv := storagepb.KeyValue{
		Key:            key,
		Value:          value,
		CreateRevision: c,
		ModRevision:    rev,
		Version:        ver,
		Lease:          int64(leaseID),
	}

	d, err := kv.Marshal()
	if err != nil {
		log.Fatalf("storage: cannot marshal event: %v", err)
	}

	s.tx.UnsafePut(keyBucketName, ibytes, d)
	s.kvindex.Put(key, revision{main: rev, sub: s.currentRev.sub})
	s.changes = append(s.changes, kv)
	s.currentRev.sub += 1

	if leaseID != lease.NoLease {
		if s.le == nil {
			panic("no lessor to attach lease")
		}

		// TODO: validate the existence of lease before call Attach.
		// We need to ensure put always successful since we do not want
		// to handle abortion for txn request. We need to ensure all requests
		// inside the txn can execute without error before executing them.
		err = s.le.Attach(leaseID, []lease.LeaseItem{{Key: string(key)}})
		if err != nil {
			panic("unexpected error from lease Attach")
		}
	}
}
示例#5
0
// range is a keyword in Go, add Keys suffix.
func (s *store) rangeKeys(key, end []byte, limit, rangeRev int64) (kvs []storagepb.KeyValue, curRev int64, err error) {
	curRev = int64(s.currentRev.main)
	if s.currentRev.sub > 0 {
		curRev += 1
	}

	if rangeRev > curRev {
		return nil, s.currentRev.main, ErrFutureRev
	}
	var rev int64
	if rangeRev <= 0 {
		rev = curRev
	} else {
		rev = rangeRev
	}
	if rev <= s.compactMainRev {
		return nil, 0, ErrCompacted
	}

	_, revpairs := s.kvindex.Range(key, end, int64(rev))
	if len(revpairs) == 0 {
		return nil, curRev, nil
	}

	for _, revpair := range revpairs {
		start, end := revBytesRange(revpair)

		_, vs := s.tx.UnsafeRange(keyBucketName, start, end, 0)
		if len(vs) != 1 {
			log.Fatalf("storage: range cannot find rev (%d,%d)", revpair.main, revpair.sub)
		}

		var kv storagepb.KeyValue
		if err := kv.Unmarshal(vs[0]); err != nil {
			log.Fatalf("storage: cannot unmarshal event: %v", err)
		}
		kvs = append(kvs, kv)
		if limit > 0 && len(kvs) >= int(limit) {
			break
		}
	}
	return kvs, curRev, nil
}
示例#6
0
// kvsToEvents gets all events for the watchers from all key-value pairs
func kvsToEvents(wg *watcherGroup, revs, vals [][]byte) (evs []storagepb.Event) {
	for i, v := range vals {
		var kv storagepb.KeyValue
		if err := kv.Unmarshal(v); err != nil {
			log.Panicf("storage: cannot unmarshal event: %v", err)
		}

		if !wg.contains(string(kv.Key)) {
			continue
		}

		ty := storagepb.PUT
		if isTombstone(revs[i]) {
			ty = storagepb.DELETE
			// patch in mod revision so watchers won't skip
			kv.ModRevision = bytesToRev(revs[i]).main
		}
		evs = append(evs, storagepb.Event{Kv: &kv, Type: ty})
	}
	return evs
}
示例#7
0
文件: kvstore.go 项目: kimsh92/etcd
// RangeHistory ranges the history from key to end starting from startRev.
// If `end` is nil, the request only observes the events on key.
// If `end` is not nil, it observes the events on key range [key, range_end).
// Limit limits the number of events returned.
// If startRev <=0, rangeEvents returns events from the beginning of uncompacted history.
//
// If the required start rev is compacted, ErrCompacted will be returned.
// If the required start rev has not happened, ErrFutureRev will be returned.
//
// RangeHistory returns revision bytes slice and key-values that satisfy the requirement (0 <= n <= limit).
// If history in the revision range has not all happened, it returns immeidately
// what is available.
// It also returns nextRev which indicates the start revision used for the following
// RangeEvents call. The nextRev could be smaller than the given endRev if the store
// has not progressed so far or it hits the event limit.
//
// TODO: return byte slices instead of keyValues to avoid meaningless encode and decode.
// This also helps to return raw (key, val) pair directly to make API consistent.
func (s *store) RangeHistory(key, end []byte, limit, startRev int64) (revbs [][]byte, kvs []storagepb.KeyValue, nextRev int64, err error) {
	s.mu.Lock()
	defer s.mu.Unlock()

	if startRev > 0 && startRev <= s.compactMainRev {
		return nil, nil, 0, ErrCompacted
	}
	if startRev > s.currentRev.main {
		return nil, nil, 0, ErrFutureRev
	}

	revs := s.kvindex.RangeSince(key, end, startRev)
	if len(revs) == 0 {
		return nil, nil, s.currentRev.main + 1, nil
	}

	tx := s.b.BatchTx()
	tx.Lock()
	defer tx.Unlock()
	// fetch events from the backend using revisions
	for _, rev := range revs {
		start, end := revBytesRange(rev)

		ks, vs := tx.UnsafeRange(keyBucketName, start, end, 0)
		if len(vs) != 1 {
			log.Fatalf("storage: range cannot find rev (%d,%d)", rev.main, rev.sub)
		}

		var kv storagepb.KeyValue
		if err := kv.Unmarshal(vs[0]); err != nil {
			log.Fatalf("storage: cannot unmarshal event: %v", err)
		}
		revbs = append(revbs, ks[0])
		kvs = append(kvs, kv)
		if limit > 0 && len(kvs) >= int(limit) {
			return revbs, kvs, rev.main + 1, nil
		}
	}
	return revbs, kvs, s.currentRev.main + 1, nil
}
示例#8
0
// kvsToEvents gets all events for the watchers from all key-value pairs
func kvsToEvents(revs, vals [][]byte, wsk watcherSetByKey, pfxs map[string]struct{}) (evs []storagepb.Event) {
	for i, v := range vals {
		var kv storagepb.KeyValue
		if err := kv.Unmarshal(v); err != nil {
			log.Panicf("storage: cannot unmarshal event: %v", err)
		}

		k := string(kv.Key)
		if _, ok := wsk.getSetByKey(k); !ok && !matchPrefix(k, pfxs) {
			continue
		}

		ty := storagepb.PUT
		if isTombstone(revs[i]) {
			ty = storagepb.DELETE
			// patch in mod revision so watchers won't skip
			kv.ModRevision = bytesToRev(revs[i]).main
		}
		evs = append(evs, storagepb.Event{Kv: &kv, Type: ty})
	}
	return evs
}
示例#9
0
func TestStoreRestore(t *testing.T) {
	s := newFakeStore()
	b := s.b.(*fakeBackend)
	fi := s.kvindex.(*fakeIndex)

	putkey := newTestKeyBytes(revision{3, 0}, false)
	putkv := storagepb.KeyValue{
		Key:            []byte("foo"),
		Value:          []byte("bar"),
		CreateRevision: 3,
		ModRevision:    3,
		Version:        1,
	}
	putkvb, err := putkv.Marshal()
	if err != nil {
		t.Fatal(err)
	}
	delkey := newTestKeyBytes(revision{4, 0}, true)
	delkv := storagepb.KeyValue{
		Key: []byte("foo"),
	}
	delkvb, err := delkv.Marshal()
	if err != nil {
		t.Fatal(err)
	}
	b.tx.rangeRespc <- rangeResp{[][]byte{finishedCompactKeyName}, [][]byte{newTestRevBytes(revision{2, 0})}}
	b.tx.rangeRespc <- rangeResp{[][]byte{putkey, delkey}, [][]byte{putkvb, delkvb}}
	b.tx.rangeRespc <- rangeResp{[][]byte{scheduledCompactKeyName}, [][]byte{newTestRevBytes(revision{2, 0})}}

	s.Restore()

	if s.compactMainRev != 2 {
		t.Errorf("compact rev = %d, want 4", s.compactMainRev)
	}
	wrev := revision{4, 0}
	if !reflect.DeepEqual(s.currentRev, wrev) {
		t.Errorf("current rev = %v, want %v", s.currentRev, wrev)
	}
	wact := []testutil.Action{
		{"range", []interface{}{metaBucketName, finishedCompactKeyName, []byte(nil), int64(0)}},
		{"range", []interface{}{keyBucketName, newTestRevBytes(revision{}), newTestRevBytes(revision{math.MaxInt64, math.MaxInt64}), int64(0)}},
		{"range", []interface{}{metaBucketName, scheduledCompactKeyName, []byte(nil), int64(0)}},
	}
	if g := b.tx.Action(); !reflect.DeepEqual(g, wact) {
		t.Errorf("tx actions = %+v, want %+v", g, wact)
	}
	wact = []testutil.Action{
		{"restore", []interface{}{[]byte("foo"), revision{3, 0}, revision{3, 0}, int64(1)}},
		{"tombstone", []interface{}{[]byte("foo"), revision{4, 0}}},
	}
	if g := fi.Action(); !reflect.DeepEqual(g, wact) {
		t.Errorf("index action = %+v, want %+v", g, wact)
	}
}
示例#10
0
func (s *store) delete(key []byte, rev revision) {
	mainrev := s.currentRev.main + 1

	ibytes := newRevBytes()
	revToBytes(revision{main: mainrev, sub: s.currentRev.sub}, ibytes)
	ibytes = appendMarkTombstone(ibytes)

	kv := storagepb.KeyValue{
		Key: key,
	}

	d, err := kv.Marshal()
	if err != nil {
		log.Fatalf("storage: cannot marshal event: %v", err)
	}

	s.tx.UnsafePut(keyBucketName, ibytes, d)
	err = s.kvindex.Tombstone(key, revision{main: mainrev, sub: s.currentRev.sub})
	if err != nil {
		log.Fatalf("storage: cannot tombstone an existing key (%s): %v", string(key), err)
	}
	s.changes = append(s.changes, kv)
	s.currentRev.sub += 1

	ibytes = newRevBytes()
	revToBytes(rev, ibytes)
	_, vs := s.tx.UnsafeRange(keyBucketName, ibytes, nil, 0)

	kv.Reset()
	if err = kv.Unmarshal(vs[0]); err != nil {
		log.Fatalf("storage: cannot unmarshal value: %v", err)
	}

	if lease.LeaseID(kv.Lease) != lease.NoLease {
		err = s.le.Detach(lease.LeaseID(kv.Lease), []lease.LeaseItem{{Key: string(kv.Key)}})
		if err != nil {
			log.Fatalf("storage: cannot detach %v", err)
		}
	}
}
示例#11
0
func TestStoreRange(t *testing.T) {
	key := newTestKeyBytes(revision{2, 0}, false)
	kv := storagepb.KeyValue{
		Key:            []byte("foo"),
		Value:          []byte("bar"),
		CreateRevision: 1,
		ModRevision:    2,
		Version:        1,
	}
	kvb, err := kv.Marshal()
	if err != nil {
		t.Fatal(err)
	}
	currev := revision{1, 1}
	wrev := int64(2)

	tests := []struct {
		idxr indexRangeResp
		r    rangeResp
	}{
		{
			indexRangeResp{[][]byte{[]byte("foo")}, []revision{{2, 0}}},
			rangeResp{[][]byte{key}, [][]byte{kvb}},
		},
		{
			indexRangeResp{[][]byte{[]byte("foo"), []byte("foo1")}, []revision{{2, 0}, {3, 0}}},
			rangeResp{[][]byte{key}, [][]byte{kvb}},
		},
	}
	for i, tt := range tests {
		s := newFakeStore()
		b := s.b.(*fakeBackend)
		fi := s.kvindex.(*fakeIndex)

		s.currentRev = currev
		s.tx = b.BatchTx()
		b.tx.rangeRespc <- tt.r
		fi.indexRangeRespc <- tt.idxr

		kvs, rev, err := s.rangeKeys([]byte("foo"), []byte("goo"), 1, 0)
		if err != nil {
			t.Errorf("#%d: err = %v, want nil", i, err)
		}
		if w := []storagepb.KeyValue{kv}; !reflect.DeepEqual(kvs, w) {
			t.Errorf("#%d: kvs = %+v, want %+v", i, kvs, w)
		}
		if rev != wrev {
			t.Errorf("#%d: rev = %d, want %d", i, rev, wrev)
		}

		wstart, wend := revBytesRange(tt.idxr.revs[0])
		wact := []testutil.Action{
			{"range", []interface{}{keyBucketName, wstart, wend, int64(0)}},
		}
		if g := b.tx.Action(); !reflect.DeepEqual(g, wact) {
			t.Errorf("#%d: tx action = %+v, want %+v", i, g, wact)
		}
		wact = []testutil.Action{
			{"range", []interface{}{[]byte("foo"), []byte("goo"), wrev}},
		}
		if g := fi.Action(); !reflect.DeepEqual(g, wact) {
			t.Errorf("#%d: index action = %+v, want %+v", i, g, wact)
		}
		if s.currentRev != currev {
			t.Errorf("#%d: current rev = %+v, want %+v", i, s.currentRev, currev)
		}
	}
}
示例#12
0
// syncWatchers periodically syncs unsynced watchers by: Iterate all unsynced
// watchers to get the minimum revision within its range, skipping the
// watcher if its current revision is behind the compact revision of the
// store. And use this minimum revision to get all key-value pairs. Then send
// those events to watchers.
func (s *watchableStore) syncWatchers() {
	s.store.mu.Lock()
	defer s.store.mu.Unlock()

	if len(s.unsynced) == 0 {
		return
	}

	// in order to find key-value pairs from unsynced watchers, we need to
	// find min revision index, and these revisions can be used to
	// query the backend store of key-value pairs
	minRev := int64(math.MaxInt64)

	curRev := s.store.currentRev.main
	compactionRev := s.store.compactMainRev

	prefixes := make(map[string]struct{})
	for _, set := range s.unsynced {
		for w := range set {
			k := string(w.key)

			if w.cur > curRev {
				panic("watcher current revision should not exceed current revision")
			}

			if w.cur < compactionRev {
				select {
				case w.ch <- WatchResponse{WatchID: w.id, CompactRevision: compactionRev}:
					s.unsynced.delete(w)
				default:
					// retry next time
				}
				continue
			}

			if minRev >= w.cur {
				minRev = w.cur
			}

			if w.prefix {
				prefixes[k] = struct{}{}
			}
		}
	}

	minBytes, maxBytes := newRevBytes(), newRevBytes()
	revToBytes(revision{main: minRev}, minBytes)
	revToBytes(revision{main: curRev + 1}, maxBytes)

	// UnsafeRange returns keys and values. And in boltdb, keys are revisions.
	// values are actual key-value pairs in backend.
	tx := s.store.b.BatchTx()
	tx.Lock()
	ks, vs := tx.UnsafeRange(keyBucketName, minBytes, maxBytes, 0)

	evs := []storagepb.Event{}

	// get the list of all events from all key-value pairs
	for i, v := range vs {
		var kv storagepb.KeyValue
		if err := kv.Unmarshal(v); err != nil {
			log.Panicf("storage: cannot unmarshal event: %v", err)
		}

		k := string(kv.Key)
		if _, ok := s.unsynced.getSetByKey(k); !ok && !matchPrefix(k, prefixes) {
			continue
		}

		var ev storagepb.Event
		switch {
		case isTombstone(ks[i]):
			ev.Type = storagepb.DELETE
		default:
			ev.Type = storagepb.PUT
		}
		ev.Kv = &kv

		evs = append(evs, ev)
	}
	tx.Unlock()

	for w, es := range newWatcherToEventMap(s.unsynced, evs) {
		select {
		// s.store.Rev also uses Lock, so just return directly
		case w.ch <- WatchResponse{WatchID: w.id, Events: es, Revision: s.store.currentRev.main}:
			pendingEventsGauge.Add(float64(len(es)))
		default:
			// TODO: handle the full unsynced watchers.
			// continue to process other watchers for now, the full ones
			// will be processed next time and hopefully it will not be full.
			continue
		}
		w.cur = curRev
		s.synced.add(w)
		s.unsynced.delete(w)
	}

	slowWatcherGauge.Set(float64(len(s.unsynced)))
}
示例#13
0
func (s *store) put(key, value []byte, leaseID lease.LeaseID) {
	rev := s.currentRev.main + 1
	c := rev
	oldLease := lease.NoLease

	// if the key exists before, use its previous created and
	// get its previous leaseID
	grev, created, ver, err := s.kvindex.Get(key, rev)
	if err == nil {
		c = created.main
		ibytes := newRevBytes()
		revToBytes(grev, ibytes)
		_, vs := s.tx.UnsafeRange(keyBucketName, ibytes, nil, 0)
		var kv storagepb.KeyValue
		if err = kv.Unmarshal(vs[0]); err != nil {
			log.Fatalf("storage: cannot unmarshal value: %v", err)
		}
		oldLease = lease.LeaseID(kv.Lease)
	}

	ibytes := newRevBytes()
	revToBytes(revision{main: rev, sub: s.currentRev.sub}, ibytes)

	ver = ver + 1
	kv := storagepb.KeyValue{
		Key:            key,
		Value:          value,
		CreateRevision: c,
		ModRevision:    rev,
		Version:        ver,
		Lease:          int64(leaseID),
	}

	d, err := kv.Marshal()
	if err != nil {
		log.Fatalf("storage: cannot marshal event: %v", err)
	}

	s.tx.UnsafePut(keyBucketName, ibytes, d)
	s.kvindex.Put(key, revision{main: rev, sub: s.currentRev.sub})
	s.changes = append(s.changes, kv)
	s.currentRev.sub += 1

	if oldLease != lease.NoLease {
		if s.le == nil {
			panic("no lessor to detach lease")
		}

		err = s.le.Detach(oldLease, []lease.LeaseItem{{Key: string(key)}})
		if err != nil {
			panic("unexpected error from lease detach")
		}
	}

	if leaseID != lease.NoLease {
		if s.le == nil {
			panic("no lessor to attach lease")
		}

		err = s.le.Attach(leaseID, []lease.LeaseItem{{Key: string(key)}})
		if err != nil {
			panic("unexpected error from lease Attach")
		}
	}
}
示例#14
0
func TestStorePut(t *testing.T) {
	kv := storagepb.KeyValue{
		Key:            []byte("foo"),
		Value:          []byte("bar"),
		CreateRevision: 1,
		ModRevision:    2,
		Version:        1,
	}
	kvb, err := kv.Marshal()
	if err != nil {
		t.Fatal(err)
	}

	tests := []struct {
		rev revision
		r   indexGetResp
		rr  *rangeResp

		wrev    revision
		wkey    []byte
		wkv     storagepb.KeyValue
		wputrev revision
	}{
		{
			revision{1, 0},
			indexGetResp{revision{}, revision{}, 0, ErrRevisionNotFound},
			nil,

			revision{1, 1},
			newTestKeyBytes(revision{2, 0}, false),
			storagepb.KeyValue{
				Key:            []byte("foo"),
				Value:          []byte("bar"),
				CreateRevision: 2,
				ModRevision:    2,
				Version:        1,
				Lease:          1,
			},
			revision{2, 0},
		},
		{
			revision{1, 1},
			indexGetResp{revision{2, 0}, revision{2, 0}, 1, nil},
			&rangeResp{[][]byte{newTestKeyBytes(revision{2, 1}, false)}, [][]byte{kvb}},

			revision{1, 2},
			newTestKeyBytes(revision{2, 1}, false),
			storagepb.KeyValue{
				Key:            []byte("foo"),
				Value:          []byte("bar"),
				CreateRevision: 2,
				ModRevision:    2,
				Version:        2,
				Lease:          2,
			},
			revision{2, 1},
		},
		{
			revision{2, 0},
			indexGetResp{revision{2, 1}, revision{2, 0}, 2, nil},
			&rangeResp{[][]byte{newTestKeyBytes(revision{2, 1}, false)}, [][]byte{kvb}},

			revision{2, 1},
			newTestKeyBytes(revision{3, 0}, false),
			storagepb.KeyValue{
				Key:            []byte("foo"),
				Value:          []byte("bar"),
				CreateRevision: 2,
				ModRevision:    3,
				Version:        3,
				Lease:          3,
			},
			revision{3, 0},
		},
	}
	for i, tt := range tests {
		s := newFakeStore()
		b := s.b.(*fakeBackend)
		fi := s.kvindex.(*fakeIndex)

		s.currentRev = tt.rev
		s.tx = b.BatchTx()
		fi.indexGetRespc <- tt.r
		if tt.rr != nil {
			b.tx.rangeRespc <- *tt.rr
		}

		s.put([]byte("foo"), []byte("bar"), lease.LeaseID(i+1))

		data, err := tt.wkv.Marshal()
		if err != nil {
			t.Errorf("#%d: marshal err = %v, want nil", i, err)
		}

		wact := []testutil.Action{
			{"seqput", []interface{}{keyBucketName, tt.wkey, data}},
		}

		if tt.rr != nil {
			wact = []testutil.Action{
				{"range", []interface{}{keyBucketName, newTestKeyBytes(tt.r.rev, false), []byte(nil), int64(0)}},
				{"seqput", []interface{}{keyBucketName, tt.wkey, data}},
			}
		}

		if g := b.tx.Action(); !reflect.DeepEqual(g, wact) {
			t.Errorf("#%d: tx action = %+v, want %+v", i, g, wact)
		}
		wact = []testutil.Action{
			{"get", []interface{}{[]byte("foo"), tt.wputrev.main}},
			{"put", []interface{}{[]byte("foo"), tt.wputrev}},
		}
		if g := fi.Action(); !reflect.DeepEqual(g, wact) {
			t.Errorf("#%d: index action = %+v, want %+v", i, g, wact)
		}
		if s.currentRev != tt.wrev {
			t.Errorf("#%d: rev = %+v, want %+v", i, s.currentRev, tt.wrev)
		}

		s.Close()
	}
}
示例#15
0
func TestStoreDeleteRange(t *testing.T) {
	key := newTestKeyBytes(revision{2, 0}, false)
	kv := storagepb.KeyValue{
		Key:            []byte("foo"),
		Value:          []byte("bar"),
		CreateRevision: 1,
		ModRevision:    2,
		Version:        1,
	}
	kvb, err := kv.Marshal()
	if err != nil {
		t.Fatal(err)
	}

	tests := []struct {
		rev revision
		r   indexRangeResp
		rr  rangeResp

		wkey    []byte
		wrev    revision
		wrrev   int64
		wdelrev revision
	}{
		{
			revision{2, 0},
			indexRangeResp{[][]byte{[]byte("foo")}, []revision{{2, 0}}},
			rangeResp{[][]byte{key}, [][]byte{kvb}},

			newTestKeyBytes(revision{3, 0}, true),
			revision{2, 1},
			2,
			revision{3, 0},
		},
		{
			revision{2, 1},
			indexRangeResp{[][]byte{[]byte("foo")}, []revision{{2, 0}}},
			rangeResp{[][]byte{key}, [][]byte{kvb}},

			newTestKeyBytes(revision{3, 1}, true),
			revision{2, 2},
			3,
			revision{3, 1},
		},
	}
	for i, tt := range tests {
		s := newFakeStore()
		b := s.b.(*fakeBackend)
		fi := s.kvindex.(*fakeIndex)

		s.currentRev = tt.rev
		s.tx = b.BatchTx()
		fi.indexRangeRespc <- tt.r
		b.tx.rangeRespc <- tt.rr

		n := s.deleteRange([]byte("foo"), []byte("goo"))
		if n != 1 {
			t.Errorf("#%d: n = %d, want 1", i, n)
		}

		data, err := (&storagepb.KeyValue{
			Key: []byte("foo"),
		}).Marshal()
		if err != nil {
			t.Errorf("#%d: marshal err = %v, want nil", i, err)
		}
		wact := []testutil.Action{
			{"seqput", []interface{}{keyBucketName, tt.wkey, data}},
			{"range", []interface{}{keyBucketName, newTestKeyBytes(revision{2, 0}, false), []byte(nil), int64(0)}},
		}
		if g := b.tx.Action(); !reflect.DeepEqual(g, wact) {
			t.Errorf("#%d: tx action = %+v, want %+v", i, g, wact)
		}
		wact = []testutil.Action{
			{"range", []interface{}{[]byte("foo"), []byte("goo"), tt.wrrev}},
			{"tombstone", []interface{}{[]byte("foo"), tt.wdelrev}},
		}
		if g := fi.Action(); !reflect.DeepEqual(g, wact) {
			t.Errorf("#%d: index action = %+v, want %+v", i, g, wact)
		}
		if s.currentRev != tt.wrev {
			t.Errorf("#%d: rev = %+v, want %+v", i, s.currentRev, tt.wrev)
		}
	}
}
示例#16
0
// syncWatchers periodically syncs unsynced watchers by: Iterate all unsynced
// watchers to get the minimum revision within its range, skipping the
// watcher if its current revision is behind the compact revision of the
// store. And use this minimum revision to get all key-value pairs. Then send
// those events to watchers.
func (s *watchableStore) syncWatchers() {
	s.store.mu.Lock()
	defer s.store.mu.Unlock()

	if len(s.unsynced) == 0 {
		return
	}

	// in order to find key-value pairs from unsynced watchers, we need to
	// find min revision index, and these revisions can be used to
	// query the backend store of key-value pairs
	minRev := int64(math.MaxInt64)

	curRev := s.store.currentRev.main
	compactionRev := s.store.compactMainRev

	// TODO: change unsynced struct type same to this
	keyToUnsynced := make(map[string]map[*watcher]struct{})

	for w := range s.unsynced {
		k := string(w.key)

		if w.cur > curRev {
			panic("watcher current revision should not exceed current revision")
		}

		if w.cur < compactionRev {
			// TODO: return error compacted to that watcher instead of
			// just removing it sliently from unsynced.
			delete(s.unsynced, w)
			continue
		}

		if minRev >= w.cur {
			minRev = w.cur
		}

		if _, ok := keyToUnsynced[k]; !ok {
			keyToUnsynced[k] = make(map[*watcher]struct{})
		}
		keyToUnsynced[k][w] = struct{}{}
	}

	minBytes, maxBytes := newRevBytes(), newRevBytes()
	revToBytes(revision{main: minRev}, minBytes)
	revToBytes(revision{main: curRev + 1}, maxBytes)

	// UnsafeRange returns keys and values. And in boltdb, keys are revisions.
	// values are actual key-value pairs in backend.
	tx := s.store.b.BatchTx()
	tx.Lock()
	ks, vs := tx.UnsafeRange(keyBucketName, minBytes, maxBytes, 0)
	tx.Unlock()

	evs := []storagepb.Event{}

	// get the list of all events from all key-value pairs
	for i, v := range vs {
		var kv storagepb.KeyValue
		if err := kv.Unmarshal(v); err != nil {
			log.Panicf("storage: cannot unmarshal event: %v", err)
		}

		k := string(kv.Key)
		if _, ok := keyToUnsynced[k]; !ok {
			continue
		}

		var ev storagepb.Event
		switch {
		case isTombstone(ks[i]):
			ev.Type = storagepb.DELETE
		default:
			ev.Type = storagepb.PUT
		}
		ev.Kv = &kv

		evs = append(evs, ev)
	}

	for w, es := range newWatcherToEventMap(keyToUnsynced, evs) {
		wr := WatchResponse{WatchID: w.id, Events: es}
		select {
		case w.ch <- wr:
			pendingEventsGauge.Add(float64(len(es)))
		default:
			// TODO: handle the full unsynced watchers.
			// continue to process other watchers for now, the full ones
			// will be processed next time and hopefully it will not be full.
			continue
		}
		k := string(w.key)
		if err := unsafeAddWatcher(&s.synced, k, w); err != nil {
			log.Panicf("error unsafeAddWatcher (%v) for key %s", err, k)
		}
		delete(s.unsynced, w)
	}

	slowWatcherGauge.Set(float64(len(s.unsynced)))
}
示例#17
0
func (s *store) restore() error {
	min, max := newRevBytes(), newRevBytes()
	revToBytes(revision{main: 1}, min)
	revToBytes(revision{main: math.MaxInt64, sub: math.MaxInt64}, max)

	// restore index
	tx := s.b.BatchTx()
	tx.Lock()
	_, finishedCompactBytes := tx.UnsafeRange(metaBucketName, finishedCompactKeyName, nil, 0)
	if len(finishedCompactBytes) != 0 {
		s.compactMainRev = bytesToRev(finishedCompactBytes[0]).main
		log.Printf("storage: restore compact to %d", s.compactMainRev)
	}

	// TODO: limit N to reduce max memory usage
	keys, vals := tx.UnsafeRange(keyBucketName, min, max, 0)
	for i, key := range keys {
		var kv storagepb.KeyValue
		if err := kv.Unmarshal(vals[i]); err != nil {
			log.Fatalf("storage: cannot unmarshal event: %v", err)
		}

		rev := bytesToRev(key[:revBytesLen])

		// restore index
		switch {
		case isTombstone(key):
			s.kvindex.Tombstone(kv.Key, rev)
			if lease.LeaseID(kv.Lease) != lease.NoLease {
				err := s.le.Detach(lease.LeaseID(kv.Lease), []lease.LeaseItem{{Key: string(kv.Key)}})
				if err != nil && err != lease.ErrLeaseNotFound {
					log.Fatalf("storage: unexpected Detach error %v", err)
				}
			}
		default:
			s.kvindex.Restore(kv.Key, revision{kv.CreateRevision, 0}, rev, kv.Version)
			if lease.LeaseID(kv.Lease) != lease.NoLease {
				if s.le == nil {
					panic("no lessor to attach lease")
				}
				err := s.le.Attach(lease.LeaseID(kv.Lease), []lease.LeaseItem{{Key: string(kv.Key)}})
				// We are walking through the kv history here. It is possible that we attached a key to
				// the lease and the lease was revoked later.
				// Thus attaching an old version of key to a none existing lease is possible here, and
				// we should just ignore the error.
				if err != nil && err != lease.ErrLeaseNotFound {
					panic("unexpected Attach error")
				}
			}
		}

		// update revision
		s.currentRev = rev
	}

	_, scheduledCompactBytes := tx.UnsafeRange(metaBucketName, scheduledCompactKeyName, nil, 0)
	if len(scheduledCompactBytes) != 0 {
		scheduledCompact := bytesToRev(scheduledCompactBytes[0]).main
		if scheduledCompact > s.compactMainRev {
			log.Printf("storage: resume scheduled compaction at %d", scheduledCompact)
			go s.Compact(scheduledCompact)
		}
	}

	tx.Unlock()

	return nil
}
示例#18
0
func TestStoreRangeHistory(t *testing.T) {
	key := newTestKeyBytes(revision{2, 0}, false)
	kv := storagepb.KeyValue{
		Key:            []byte("foo"),
		Value:          []byte("bar"),
		CreateRevision: 1,
		ModRevision:    2,
		Version:        1,
	}
	kvb, err := kv.Marshal()
	if err != nil {
		t.Fatal(err)
	}
	currev := revision{2, 0}

	tests := []struct {
		idxr indexRangeEventsResp
		r    rangeResp
	}{
		{
			indexRangeEventsResp{[]revision{{2, 0}}},
			rangeResp{[][]byte{key}, [][]byte{kvb}},
		},
		{
			indexRangeEventsResp{[]revision{{2, 0}, {3, 0}}},
			rangeResp{[][]byte{key}, [][]byte{kvb}},
		},
	}
	for i, tt := range tests {
		s, b, index := newFakeStore()
		s.currentRev = currev
		index.indexRangeEventsRespc <- tt.idxr
		b.tx.rangeRespc <- tt.r

		keys, kvs, _, err := s.RangeHistory([]byte("foo"), []byte("goo"), 1, 1)
		if err != nil {
			t.Errorf("#%d: err = %v, want nil", i, err)
		}
		if w := [][]byte{key}; !reflect.DeepEqual(keys, w) {
			t.Errorf("#%d: keys = %+v, want %+v", i, keys, w)
		}
		if w := []storagepb.KeyValue{kv}; !reflect.DeepEqual(kvs, w) {
			t.Errorf("#%d: kvs = %+v, want %+v", i, kvs, w)
		}

		wact := []testutil.Action{
			{"rangeEvents", []interface{}{[]byte("foo"), []byte("goo"), int64(1)}},
		}
		if g := index.Action(); !reflect.DeepEqual(g, wact) {
			t.Errorf("#%d: index action = %+v, want %+v", i, g, wact)
		}
		wstart, wend := revBytesRange(tt.idxr.revs[0])
		wact = []testutil.Action{
			{"range", []interface{}{keyBucketName, wstart, wend, int64(0)}},
		}
		if g := b.tx.Action(); !reflect.DeepEqual(g, wact) {
			t.Errorf("#%d: tx action = %+v, want %+v", i, g, wact)
		}
		if s.currentRev != currev {
			t.Errorf("#%d: current rev = %+v, want %+v", i, s.currentRev, currev)
		}
	}
}