Esempio n. 1
0
func (s *store) delete(key []byte) {
	mainrev := s.currentRev.main + 1

	tx := s.b.BatchTx()
	tx.Lock()
	defer tx.Unlock()

	ibytes := newRevBytes()
	revToBytes(revision{main: mainrev, sub: s.currentRev.sub}, ibytes)

	event := storagepb.Event{
		Type: storagepb.DELETE,
		Kv: &storagepb.KeyValue{
			Key: key,
		},
	}

	d, err := event.Marshal()
	if err != nil {
		log.Fatalf("storage: cannot marshal event: %v", err)
	}

	tx.UnsafePut(keyBucketName, ibytes, d)
	err = s.kvindex.Tombstone(key, revision{main: mainrev, sub: s.currentRev.sub})
	if err != nil {
		log.Fatalf("storage: cannot tombstone an existing key (%s): %v", string(key), err)
	}
	s.currentRev.sub += 1
}
Esempio n. 2
0
func (s *store) put(key, value []byte) {
	rev := s.currentRev.main + 1
	c := rev

	// if the key exists before, use its previous created
	_, created, ver, err := s.kvindex.Get(key, rev)
	if err == nil {
		c = created.main
	}

	ibytes := newRevBytes()
	revToBytes(revision{main: rev, sub: s.currentRev.sub}, ibytes)

	ver = ver + 1
	event := storagepb.Event{
		Type: storagepb.PUT,
		Kv: &storagepb.KeyValue{
			Key:            key,
			Value:          value,
			CreateRevision: c,
			ModRevision:    rev,
			Version:        ver,
		},
	}

	d, err := event.Marshal()
	if err != nil {
		log.Fatalf("storage: cannot marshal event: %v", err)
	}

	s.tx.UnsafePut(keyBucketName, ibytes, d)
	s.kvindex.Put(key, revision{main: rev, sub: s.currentRev.sub})
	s.currentRev.sub += 1
}
Esempio n. 3
0
func (s *store) put(key, value []byte, rev int64) {
	c := rev

	// if the key exists before, use its previous created
	_, created, err := s.kvindex.Get(key, rev)
	if err == nil {
		c = created.main
	}

	ibytes := newRevBytes()
	revToBytes(reversion{main: rev, sub: s.currentRev.sub}, ibytes)

	event := storagepb.Event{
		Type: storagepb.PUT,
		Kv: storagepb.KeyValue{
			Key:         key,
			Value:       value,
			CreateIndex: c,
			ModIndex:    rev,
		},
	}

	d, err := event.Marshal()
	if err != nil {
		log.Fatalf("storage: cannot marshal event: %v", err)
	}

	tx := s.b.BatchTx()
	tx.Lock()
	defer tx.Unlock()
	tx.UnsafePut(keyBucketName, ibytes, d)
	s.kvindex.Put(key, reversion{main: rev, sub: s.currentRev.sub})
	s.currentRev.sub += 1
}
Esempio n. 4
0
func (s *store) delete(key []byte, mainrev int64) bool {
	grev := mainrev
	if s.currentRev.sub > 0 {
		grev += 1
	}
	rev, _, _, err := s.kvindex.Get(key, grev)
	if err != nil {
		// key not exist
		return false
	}

	tx := s.b.BatchTx()
	tx.Lock()
	defer tx.Unlock()

	revbytes := newRevBytes()
	revToBytes(rev, revbytes)

	_, vs := tx.UnsafeRange(keyBucketName, revbytes, nil, 0)
	if len(vs) != 1 {
		log.Fatalf("storage: delete cannot find rev (%d,%d)", rev.main, rev.sub)
	}

	e := &storagepb.Event{}
	if err := e.Unmarshal(vs[0]); err != nil {
		log.Fatalf("storage: cannot unmarshal event: %v", err)
	}
	if e.Type == storagepb.DELETE {
		return false
	}

	ibytes := newRevBytes()
	revToBytes(reversion{main: mainrev, sub: s.currentRev.sub}, ibytes)

	event := storagepb.Event{
		Type: storagepb.DELETE,
		Kv: &storagepb.KeyValue{
			Key: key,
		},
	}

	d, err := event.Marshal()
	if err != nil {
		log.Fatalf("storage: cannot marshal event: %v", err)
	}

	tx.UnsafePut(keyBucketName, ibytes, d)
	err = s.kvindex.Tombstone(key, reversion{main: mainrev, sub: s.currentRev.sub})
	if err != nil {
		log.Fatalf("storage: cannot tombstone an existing key (%s): %v", string(key), err)
	}
	s.currentRev.sub += 1
	return true
}
Esempio n. 5
0
// notify notifies the fact that given event at the given rev just happened to
// watchings that watch on the key of the event.
func (s *watchableStore) notify(rev int64, ev storagepb.Event) {
	// check all prefixes of the key to notify all corresponded watchings
	for i := 0; i <= len(ev.Kv.Key); i++ {
		k := string(ev.Kv.Key[:i])
		if wm, ok := s.synced[k]; ok {
			for w := range wm {
				// the watching needs to be notified when either it watches prefix or
				// the key is exactly matched.
				if !w.prefix && i != len(ev.Kv.Key) {
					continue
				}
				ev.WatchID = w.id
				select {
				case w.ch <- ev:
					pendingEventsGauge.Inc()
				default:
					w.cur = rev
					s.unsynced[w] = struct{}{}
					delete(wm, w)
					slowWatchingGauge.Inc()
				}
			}
		}
	}
}
Esempio n. 6
0
// RangeEvents gets the events from key to end in [startRev, endRev).
// If `end` is nil, the request only observes the events on key.
// If `end` is not nil, it observes the events on key range [key, range_end).
// Limit limits the number of events returned.
// If startRev <=0, rangeEvents returns events from the beginning of uncompacted history.
// If endRev <=0, it indicates there is no end revision.
//
// If the required start rev is compacted, ErrCompacted will be returned.
// If the required start rev has not happened, ErrFutureRev will be returned.
//
// RangeEvents returns events that satisfy the requirement (0 <= n <= limit).
// If events in the revision range have not all happened, it returns immeidately
// what is available.
// It also returns nextRev which indicates the start revision used for the following
// RangeEvents call. The nextRev could be smaller than the given endRev if the store
// has not progressed so far or it hits the event limit.
//
// TODO: return byte slices instead of events to avoid meaningless encode and decode.
func (s *store) RangeEvents(key, end []byte, limit, startRev, endRev int64) (evs []storagepb.Event, nextRev int64, err error) {
	s.mu.Lock()
	defer s.mu.Unlock()

	if startRev > 0 && startRev <= s.compactMainRev {
		return nil, 0, ErrCompacted
	}
	if startRev > s.currentRev.main {
		return nil, 0, ErrFutureRev
	}

	revs := s.kvindex.RangeEvents(key, end, startRev)
	if len(revs) == 0 {
		return nil, s.currentRev.main + 1, nil
	}

	tx := s.b.BatchTx()
	tx.Lock()
	defer tx.Unlock()
	// fetch events from the backend using revisions
	for _, rev := range revs {
		if endRev > 0 && rev.main >= endRev {
			return evs, rev.main, nil
		}
		revbytes := newRevBytes()
		revToBytes(rev, revbytes)

		_, vs := tx.UnsafeRange(keyBucketName, revbytes, nil, 0)
		if len(vs) != 1 {
			log.Fatalf("storage: range cannot find rev (%d,%d)", rev.main, rev.sub)
		}

		e := storagepb.Event{}
		if err := e.Unmarshal(vs[0]); err != nil {
			log.Fatalf("storage: cannot unmarshal event: %v", err)
		}
		evs = append(evs, e)
		if limit > 0 && len(evs) >= int(limit) {
			return evs, rev.main + 1, nil
		}
	}
	return evs, s.currentRev.main + 1, nil
}
Esempio n. 7
0
func TestStoreRestore(t *testing.T) {
	s, b, index := newFakeStore()

	putev := storagepb.Event{
		Type: storagepb.PUT,
		Kv: &storagepb.KeyValue{
			Key:            []byte("foo"),
			Value:          []byte("bar"),
			CreateRevision: 3,
			ModRevision:    3,
			Version:        1,
		},
	}
	putevb, err := putev.Marshal()
	if err != nil {
		t.Fatal(err)
	}
	delev := storagepb.Event{
		Type: storagepb.DELETE,
		Kv: &storagepb.KeyValue{
			Key: []byte("foo"),
		},
	}
	delevb, err := delev.Marshal()
	if err != nil {
		t.Fatal(err)
	}
	b.tx.rangeRespc <- rangeResp{[][]byte{finishedCompactKeyName}, [][]byte{newTestBytes(revision{2, 0})}}
	b.tx.rangeRespc <- rangeResp{[][]byte{newTestBytes(revision{3, 0}), newTestBytes(revision{4, 0})}, [][]byte{putevb, delevb}}
	b.tx.rangeRespc <- rangeResp{[][]byte{scheduledCompactKeyName}, [][]byte{newTestBytes(revision{2, 0})}}

	s.Restore()

	if s.compactMainRev != 2 {
		t.Errorf("compact rev = %d, want 4", s.compactMainRev)
	}
	wrev := revision{4, 0}
	if !reflect.DeepEqual(s.currentRev, wrev) {
		t.Errorf("current rev = %v, want %v", s.currentRev, wrev)
	}
	wact := []testutil.Action{
		{"range", []interface{}{metaBucketName, finishedCompactKeyName, []byte(nil), int64(0)}},
		{"range", []interface{}{keyBucketName, newTestBytes(revision{}), newTestBytes(revision{math.MaxInt64, math.MaxInt64}), int64(0)}},
		{"range", []interface{}{metaBucketName, scheduledCompactKeyName, []byte(nil), int64(0)}},
	}
	if g := b.tx.Action(); !reflect.DeepEqual(g, wact) {
		t.Errorf("tx actions = %+v, want %+v", g, wact)
	}
	wact = []testutil.Action{
		{"restore", []interface{}{[]byte("foo"), revision{3, 0}, revision{3, 0}, int64(1)}},
		{"tombstone", []interface{}{[]byte("foo"), revision{4, 0}}},
	}
	if g := index.Action(); !reflect.DeepEqual(g, wact) {
		t.Errorf("index action = %+v, want %+v", g, wact)
	}
}
Esempio n. 8
0
func (s *store) put(key, value []byte, rev int64) {
	ibytes := newRevBytes()
	revToBytes(reversion{main: rev, sub: s.currentRev.sub}, ibytes)

	event := storagepb.Event{
		Type: storagepb.PUT,
		Kv: storagepb.KeyValue{
			Key:   key,
			Value: value,
		},
	}

	d, err := event.Marshal()
	if err != nil {
		log.Fatalf("storage: cannot marshal event: %v", err)
	}

	tx := s.b.BatchTx()
	tx.Lock()
	defer tx.Unlock()
	tx.UnsafePut(keyBucketName, ibytes, d)
	s.kvindex.Put(key, reversion{main: rev, sub: s.currentRev.sub})
	s.currentRev.sub += 1
}
Esempio n. 9
0
// syncWatchers periodically syncs unsynced watchers by: Iterate all unsynced
// watchers to get the minimum revision within its range, skipping the
// watcher if its current revision is behind the compact revision of the
// store. And use this minimum revision to get all key-value pairs. Then send
// those events to watchers.
func (s *watchableStore) syncWatchers() {
	s.store.mu.Lock()
	defer s.store.mu.Unlock()

	if len(s.unsynced) == 0 {
		return
	}

	// in order to find key-value pairs from unsynced watchers, we need to
	// find min revision index, and these revisions can be used to
	// query the backend store of key-value pairs
	minRev := int64(math.MaxInt64)

	curRev := s.store.currentRev.main
	compactionRev := s.store.compactMainRev

	prefixes := make(map[string]struct{})
	for _, set := range s.unsynced {
		for w := range set {
			k := string(w.key)

			if w.cur > curRev {
				panic("watcher current revision should not exceed current revision")
			}

			if w.cur < compactionRev {
				select {
				case w.ch <- WatchResponse{WatchID: w.id, CompactRevision: compactionRev}:
					s.unsynced.delete(w)
				default:
					// retry next time
				}
				continue
			}

			if minRev >= w.cur {
				minRev = w.cur
			}

			if w.prefix {
				prefixes[k] = struct{}{}
			}
		}
	}

	minBytes, maxBytes := newRevBytes(), newRevBytes()
	revToBytes(revision{main: minRev}, minBytes)
	revToBytes(revision{main: curRev + 1}, maxBytes)

	// UnsafeRange returns keys and values. And in boltdb, keys are revisions.
	// values are actual key-value pairs in backend.
	tx := s.store.b.BatchTx()
	tx.Lock()
	ks, vs := tx.UnsafeRange(keyBucketName, minBytes, maxBytes, 0)

	evs := []storagepb.Event{}

	// get the list of all events from all key-value pairs
	for i, v := range vs {
		var kv storagepb.KeyValue
		if err := kv.Unmarshal(v); err != nil {
			log.Panicf("storage: cannot unmarshal event: %v", err)
		}

		k := string(kv.Key)
		if _, ok := s.unsynced.getSetByKey(k); !ok && !matchPrefix(k, prefixes) {
			continue
		}

		var ev storagepb.Event
		switch {
		case isTombstone(ks[i]):
			ev.Type = storagepb.DELETE
		default:
			ev.Type = storagepb.PUT
		}
		ev.Kv = &kv

		evs = append(evs, ev)
	}
	tx.Unlock()

	for w, es := range newWatcherToEventMap(s.unsynced, evs) {
		select {
		// s.store.Rev also uses Lock, so just return directly
		case w.ch <- WatchResponse{WatchID: w.id, Events: es, Revision: s.store.currentRev.main}:
			pendingEventsGauge.Add(float64(len(es)))
		default:
			// TODO: handle the full unsynced watchers.
			// continue to process other watchers for now, the full ones
			// will be processed next time and hopefully it will not be full.
			continue
		}
		w.cur = curRev
		s.synced.add(w)
		s.unsynced.delete(w)
	}

	slowWatcherGauge.Set(float64(len(s.unsynced)))
}
Esempio n. 10
0
func TestStoreRangeEvents(t *testing.T) {
	ev := storagepb.Event{
		Type: storagepb.PUT,
		Kv: &storagepb.KeyValue{
			Key:            []byte("foo"),
			Value:          []byte("bar"),
			CreateRevision: 1,
			ModRevision:    2,
			Version:        1,
		},
	}
	evb, err := ev.Marshal()
	if err != nil {
		t.Fatal(err)
	}
	currev := revision{2, 0}

	tests := []struct {
		idxr indexRangeEventsResp
		r    rangeResp
	}{
		{
			indexRangeEventsResp{[]revision{{2, 0}}},
			rangeResp{[][]byte{newTestBytes(revision{2, 0})}, [][]byte{evb}},
		},
		{
			indexRangeEventsResp{[]revision{{2, 0}, {3, 0}}},
			rangeResp{[][]byte{newTestBytes(revision{2, 0})}, [][]byte{evb}},
		},
	}
	for i, tt := range tests {
		s, b, index := newFakeStore()
		s.currentRev = currev
		index.indexRangeEventsRespc <- tt.idxr
		b.tx.rangeRespc <- tt.r

		evs, _, err := s.RangeEvents([]byte("foo"), []byte("goo"), 1, 1, 4)
		if err != nil {
			t.Errorf("#%d: err = %v, want nil", i, err)
		}
		if w := []storagepb.Event{ev}; !reflect.DeepEqual(evs, w) {
			t.Errorf("#%d: evs = %+v, want %+v", i, evs, w)
		}

		wact := []testutil.Action{
			{"rangeEvents", []interface{}{[]byte("foo"), []byte("goo"), int64(1)}},
		}
		if g := index.Action(); !reflect.DeepEqual(g, wact) {
			t.Errorf("#%d: index action = %+v, want %+v", i, g, wact)
		}
		wact = []testutil.Action{
			{"range", []interface{}{keyBucketName, newTestBytes(tt.idxr.revs[0]), []byte(nil), int64(0)}},
		}
		if g := b.tx.Action(); !reflect.DeepEqual(g, wact) {
			t.Errorf("#%d: tx action = %+v, want %+v", i, g, wact)
		}
		if s.currentRev != currev {
			t.Errorf("#%d: current rev = %+v, want %+v", i, s.currentRev, currev)
		}
	}
}
Esempio n. 11
0
// syncWatchers periodically syncs unsynced watchers by: Iterate all unsynced
// watchers to get the minimum revision within its range, skipping the
// watcher if its current revision is behind the compact revision of the
// store. And use this minimum revision to get all key-value pairs. Then send
// those events to watchers.
func (s *watchableStore) syncWatchers() {
	s.store.mu.Lock()
	defer s.store.mu.Unlock()

	if len(s.unsynced) == 0 {
		return
	}

	// in order to find key-value pairs from unsynced watchers, we need to
	// find min revision index, and these revisions can be used to
	// query the backend store of key-value pairs
	minRev := int64(math.MaxInt64)

	curRev := s.store.currentRev.main
	compactionRev := s.store.compactMainRev

	// TODO: change unsynced struct type same to this
	keyToUnsynced := make(map[string]map[*watcher]struct{})

	for w := range s.unsynced {
		k := string(w.key)

		if w.cur > curRev {
			panic("watcher current revision should not exceed current revision")
		}

		if w.cur < compactionRev {
			// TODO: return error compacted to that watcher instead of
			// just removing it sliently from unsynced.
			delete(s.unsynced, w)
			continue
		}

		if minRev >= w.cur {
			minRev = w.cur
		}

		if _, ok := keyToUnsynced[k]; !ok {
			keyToUnsynced[k] = make(map[*watcher]struct{})
		}
		keyToUnsynced[k][w] = struct{}{}
	}

	minBytes, maxBytes := newRevBytes(), newRevBytes()
	revToBytes(revision{main: minRev}, minBytes)
	revToBytes(revision{main: curRev + 1}, maxBytes)

	// UnsafeRange returns keys and values. And in boltdb, keys are revisions.
	// values are actual key-value pairs in backend.
	tx := s.store.b.BatchTx()
	tx.Lock()
	ks, vs := tx.UnsafeRange(keyBucketName, minBytes, maxBytes, 0)
	tx.Unlock()

	evs := []storagepb.Event{}

	// get the list of all events from all key-value pairs
	for i, v := range vs {
		var kv storagepb.KeyValue
		if err := kv.Unmarshal(v); err != nil {
			log.Panicf("storage: cannot unmarshal event: %v", err)
		}

		k := string(kv.Key)
		if _, ok := keyToUnsynced[k]; !ok {
			continue
		}

		var ev storagepb.Event
		switch {
		case isTombstone(ks[i]):
			ev.Type = storagepb.DELETE
		default:
			ev.Type = storagepb.PUT
		}
		ev.Kv = &kv

		evs = append(evs, ev)
	}

	for w, es := range newWatcherToEventMap(keyToUnsynced, evs) {
		wr := WatchResponse{WatchID: w.id, Events: es}
		select {
		case w.ch <- wr:
			pendingEventsGauge.Add(float64(len(es)))
		default:
			// TODO: handle the full unsynced watchers.
			// continue to process other watchers for now, the full ones
			// will be processed next time and hopefully it will not be full.
			continue
		}
		k := string(w.key)
		if err := unsafeAddWatcher(&s.synced, k, w); err != nil {
			log.Panicf("error unsafeAddWatcher (%v) for key %s", err, k)
		}
		delete(s.unsynced, w)
	}

	slowWatcherGauge.Set(float64(len(s.unsynced)))
}