Beispiel #1
0
func ExternalId(uid uint64) (xid string, rerr error) {
	key := posting.Key(uid, "_xid_") // uid -> "_xid_" -> xid
	pl := posting.GetOrCreate(key, uidStore)
	if pl.Length() == 0 {
		return "", errors.New("NO external id")
	}

	if pl.Length() > 1 {
		glog.WithField("uid", uid).Fatal("This shouldn't be happening.")
		return "", errors.New("Multiple external ids for this uid.")
	}

	var p types.Posting
	if ok := pl.Get(&p, 0); !ok {
		glog.WithField("uid", uid).Error("While retrieving posting")
		return "", errors.New("While retrieving posting")
	}

	if p.Uid() != math.MaxUint64 {
		glog.WithField("uid", uid).Fatal("Value uid must be MaxUint64.")
	}
	var t interface{}
	rerr = posting.ParseValue(&t, p.ValueBytes())
	xid = t.(string)
	return xid, rerr
}
Beispiel #2
0
// Caller must hold at least a read lock.
func (l *List) lePostingIndex(maxUid uint64) (int, uint64) {
	posting := l.getPostingList()
	left, right := 0, posting.PostingsLength()-1
	sofar := -1
	p := new(types.Posting)

	for left <= right {
		pos := (left + right) / 2
		if ok := posting.Postings(p, pos); !ok {
			glog.WithField("idx", pos).Fatal("Unable to parse posting from list.")
		}
		val := p.Uid()
		if val > maxUid {
			right = pos - 1
			continue
		}
		if val == maxUid {
			return pos, val
		}
		sofar = pos
		left = pos + 1
	}
	if sofar == -1 {
		return -1, 0
	}
	if ok := posting.Postings(p, sofar); !ok {
		glog.WithField("idx", sofar).Fatal("Unable to parse posting from list.")
	}
	return sofar, p.Uid()
}
Beispiel #3
0
func assignNew(pl *posting.List, xid string, instanceIdx uint64,
	numInstances uint64) (uint64, error) {

	entry := lmgr.newOrExisting(xid)
	entry.Lock()
	entry.ts = time.Now()
	defer entry.Unlock()

	if pl.Length() > 1 {
		glog.Fatalf("We shouldn't have more than 1 uid for xid: %v\n", xid)

	} else if pl.Length() > 0 {
		var p types.Posting
		if ok := pl.Get(&p, 0); !ok {
			return 0, errors.New("While retrieving entry from posting list.")
		}
		return p.Uid(), nil
	}

	// No current id exists. Create one.
	uid, err := allocateUniqueUid(xid, instanceIdx, numInstances)
	if err != nil {
		return 0, err
	}

	t := x.DirectedEdge{
		ValueId:   uid,
		Source:    "_assigner_",
		Timestamp: time.Now(),
	}
	rerr := pl.AddMutation(t, posting.Set)
	return uid, rerr
}
Beispiel #4
0
// In benchmarks, the time taken per AddMutation before was
// plateauing at 2.5 ms with sync per 10 log entries, and increasing
// for sync per 100 log entries (to 3 ms per AddMutation), largely because
// of how index generation was being done.
//
// With this change, the benchmarks perform as good as benchmarks for
// commit.Logger, where the less frequently file sync happens, the faster
// AddMutations run.
//
// PASS
// BenchmarkAddMutations_SyncEveryLogEntry-6    	     100	  24712455 ns/op
// BenchmarkAddMutations_SyncEvery10LogEntry-6  	     500	   2485961 ns/op
// BenchmarkAddMutations_SyncEvery100LogEntry-6 	   10000	    298352 ns/op
// BenchmarkAddMutations_SyncEvery1000LogEntry-6	   30000	     63544 ns/op
// ok  	github.com/dgraph-io/dgraph/posting	10.291s
func (l *List) AddMutation(t x.DirectedEdge, op byte) error {
	l.wg.Wait()
	l.Lock()
	defer l.Unlock()
	if l.deleteMe {
		return E_TMP_ERROR
	}

	if t.Timestamp.UnixNano() < l.maxMutationTs {
		return fmt.Errorf("Mutation ts lower than committed ts.")
	}

	// Mutation arrives:
	// - Check if we had any(SET/DEL) before this, stored in the mutation list.
	//		- If yes, then replace that mutation. Jump to a)
	// a)		check if the entity exists in main posting list.
	// 				- If yes, store the mutation.
	// 				- If no, disregard this mutation.

	// All edges with a value set, have the same uid. In other words,
	// an (entity, attribute) can only have one interface{} value.
	if t.Value != nil {
		t.ValueId = math.MaxUint64
	}
	if t.ValueId == 0 {
		return fmt.Errorf("ValueId cannot be zero.")
	}

	mbuf := newPosting(t, op)
	uo := flatbuffers.GetUOffsetT(mbuf)
	mpost := new(types.Posting)
	mpost.Init(mbuf, uo)

	glog.WithFields(logrus.Fields{
		"uid":    mpost.Uid(),
		"source": string(mpost.Source()),
		"ts":     mpost.Ts(),
	}).Debug("Add mutation")

	l.mergeMutation(mpost)
	l.maxMutationTs = t.Timestamp.UnixNano()
	if len(l.mindex)+len(l.mlayer) > 0 {
		atomic.StoreInt64(&l.dirtyTs, time.Now().UnixNano())
		if dirtymap != nil {
			dirtymap.Put(l.ghash, true)
		}
	}
	if l.clog == nil {
		return nil
	}
	return l.clog.AddLog(t.Timestamp.UnixNano(), l.hash, mbuf)
}
Beispiel #5
0
func samePosting(a *types.Posting, b *types.Posting) bool {
	if a.Uid() != b.Uid() {
		return false
	}
	if a.ValueLength() != b.ValueLength() {
		return false
	}
	if !bytes.Equal(a.ValueBytes(), b.ValueBytes()) {
		return false
	}
	if !bytes.Equal(a.Source(), b.Source()) {
		return false
	}
	return true
}
Beispiel #6
0
func checkUids(t *testing.T, l *List, uids ...uint64) error {
	if l.Length() != len(uids) {
		return fmt.Errorf("Expected: %d. Length: %d", len(uids), l.Length())
	}
	for i := 0; i < len(uids); i++ {
		var p types.Posting
		if ok := l.Get(&p, i); !ok {
			return fmt.Errorf("Unable to retrieve posting.")
		}
		if p.Uid() != uids[i] {
			return fmt.Errorf("Expected: %v. Got: %v", uids[i], p.Uid())
		}
	}
	return nil
}
Beispiel #7
0
func (l *List) GetUids() []uint64 {
	l.wg.Wait()
	l.RLock()
	defer l.RUnlock()

	result := make([]uint64, l.length())
	result = result[:0]
	var p types.Posting
	for i := 0; i < l.length(); i++ {
		if ok := l.get(&p, i); !ok || p.Uid() == math.MaxUint64 {
			break
		}
		result = append(result, p.Uid())
	}
	return result
}
Beispiel #8
0
func addPosting(b *flatbuffers.Builder, p types.Posting) flatbuffers.UOffsetT {
	so := b.CreateByteString(p.Source()) // Do this before posting start.
	var bo flatbuffers.UOffsetT
	if p.ValueLength() > 0 {
		bo = b.CreateByteVector(p.ValueBytes())
	}

	types.PostingStart(b)
	types.PostingAddUid(b, p.Uid())
	if bo > 0 {
		types.PostingAddValue(b, bo)
	}
	types.PostingAddSource(b, so)
	types.PostingAddTs(b, p.Ts())
	types.PostingAddOp(b, p.Op())
	return types.PostingEnd(b)
}
Beispiel #9
0
func (l *List) Value() (result []byte, rerr error) {
	l.wg.Wait()
	l.RLock()
	defer l.RUnlock()

	if l.length() == 0 {
		return result, fmt.Errorf("No value found")
	}

	var p types.Posting
	if ok := l.get(&p, l.length()-1); !ok {
		return result, fmt.Errorf("Unable to get last posting")
	}
	if p.Uid() != math.MaxUint64 {
		return result, fmt.Errorf("No value found")
	}
	return p.ValueBytes(), nil
}
Beispiel #10
0
func (l *List) init(key []byte, pstore *store.Store, clog *commit.Logger) {
	l.Lock()
	defer l.Unlock()
	defer l.wg.Done()

	if len(empty) == 0 {
		glog.Fatal("empty should have some bytes.")
	}
	l.key = key
	l.pstore = pstore
	l.clog = clog

	posting := l.getPostingList()
	l.maxMutationTs = posting.CommitTs()
	l.hash = farm.Fingerprint32(key)
	l.ghash = gotomic.IntKey(farm.Fingerprint64(key))
	l.mlayer = make(map[int]types.Posting)

	if clog == nil {
		return
	}
	glog.Debug("Starting stream entries...")

	err := clog.StreamEntries(posting.CommitTs()+1, l.hash,
		func(hdr commit.Header, buffer []byte) {

			uo := flatbuffers.GetUOffsetT(buffer)
			m := new(types.Posting)
			m.Init(buffer, uo)
			if m.Ts() > l.maxMutationTs {
				l.maxMutationTs = m.Ts()
			}
			glog.WithFields(logrus.Fields{
				"uid":    m.Uid(),
				"source": string(m.Source()),
				"ts":     m.Ts(),
			}).Debug("Got entry from log")
			l.mergeMutation(m)
		})
	if err != nil {
		glog.WithError(err).Error("While streaming entries.")
	}
	glog.Debug("Done streaming entries.")
}
Beispiel #11
0
func GetOrAssign(xid string, instanceIdx uint64, numInstances uint64) (uid uint64, rerr error) {
	key := stringKey(xid)
	pl := posting.GetOrCreate(key)
	if pl.Length() == 0 {
		return assignNew(pl, xid, instanceIdx, numInstances)

	} else if pl.Length() > 1 {
		glog.Fatalf("We shouldn't have more than 1 uid for xid: %v\n", xid)

	} else {
		// We found one posting.
		var p types.Posting
		if ok := pl.Get(&p, 0); !ok {
			return 0, errors.New("While retrieving entry from posting list")
		}
		return p.Uid(), nil
	}
	return 0, errors.New("Some unhandled route lead me here." +
		" Wake the stupid developer up.")
}
Beispiel #12
0
// mutationIndex (mindex) is useful to avoid having to parse the entire
// postinglist upto idx, for every Get(*types.Posting, idx), which has a
// complexity of O(idx). Iteration over N size posting list would this push
// us into O(N^2) territory, without this technique.
//
// Using this technique,
// we can overlay mutation layers over immutable posting list, to allow for
// O(m) lookups, where m = size of mutation list. Obviously, the size of
// mutation list should be much smaller than the size of posting list, except
// in tiny posting lists, where performance wouldn't be such a concern anyways.
//
// Say we have this data:
// Posting List (plist, immutable):
// idx:   0  1  2  3  4  5
// value: 2  5  9 10 13 15
//
// Mutation List (mlist):
// idx:          0   1   2
// value:        7  10  13' // posting uid is 13 but other values vary.
// Op:         SET DEL SET
// Effective:  ADD DEL REP  (REP = replace)
//
// ----------------------------------------------------------------------------
// regenerateIndex would generate these:
// mlayer (layer just above posting list contains only replace instructions)
// idx:          4
// value:       13'
// Op:       	 SET
// Effective:  REP  (REP = replace)
//
// mindex:
// idx:          2   4
// value:        7  10
// moveidx:     -1  +1
// Effective:  ADD DEL
//
// Now, let's see how the access would work:
// idx: get --> calculation [idx, served from, value]
// idx: 0 --> 0   [0, plist, 2]
// idx: 1 --> 1   [1, plist, 5]
// idx: 2 --> ADD from mindex
//        -->     [2, mindex, 7] // also has moveidx = -1
// idx: 3 --> 3 + moveidx=-1 = 2 [2, plist, 9]
// idx: 4 --> DEL from mindex
//        --> 4 + moveidx=-1 + moveidx=+1 = 4 [4, mlayer, 13']
// idx: 5 --> 5 + moveidx=-1 + moveidx=+1 = 5 [5, plist, 15]
//
// Thus we can provide mutation layers over immutable posting list, while
// still ensuring fast lookup access.
//
// NOTE: This function expects the caller to hold a RW Lock.
// Update: With mergeMutation function, we're adding mutations with a cost
// of O(log M + log N), where M = number of previous mutations, and N =
// number of postings in the immutable posting list.
func (l *List) mergeMutation(mp *types.Posting) {
	curUid := mp.Uid()
	pi, puid := l.lePostingIndex(curUid)  // O(log N)
	mi, muid := l.leMutationIndex(curUid) // O(log M)
	inPlist := puid == curUid

	// O(1) follows, but any additions or deletions from mindex would
	// be O(M) due to element shifting. In terms of benchmarks, this performs
	// a LOT better than when I was running O(N + M), re-generating mutation
	// flatbuffers, linked lists etc.
	mlink := new(MutationLink)
	mlink.posting = mp

	if mp.Op() == Del {
		if muid == curUid { // curUid found in mindex.
			if inPlist { // In plist, so replace previous instruction in mindex.
				mlink.moveidx = 1
				mlink.idx = pi + mi
				l.mindex[mi] = mlink

			} else { // Not in plist, so delete previous instruction in mindex.
				l.mdelta -= 1
				l.mindexDeleteAt(mi)
			}

		} else { // curUid not found in mindex.
			if inPlist { // In plist, so insert in mindex.
				mlink.moveidx = 1
				l.mdelta -= 1
				mlink.idx = pi + mi + 1
				l.mindexInsertAt(mlink, mi+1)

			} else {
				// Not found in plist, and not found in mindex. So, ignore.
			}
		}

	} else if mp.Op() == Set {
		if muid == curUid { // curUid found in mindex.
			if inPlist { // In plist, so delete previous instruction, set in mlayer.
				l.mindexDeleteAt(mi)
				l.mlayer[pi] = *mp

			} else { // Not in plist, so replace previous set instruction in mindex.
				// NOTE: This prev instruction couldn't have been a Del instruction.
				mlink.idx = pi + 1 + mi
				mlink.moveidx = -1
				l.mindex[mi] = mlink
			}

		} else { // curUid not found in mindex.
			if inPlist { // In plist, so just set it in mlayer.
				// If this posting matches what we already have in posting list,
				// we don't need to `dirty` this by adding to mlayer.
				plist := l.getPostingList()
				var cp types.Posting
				if ok := plist.Postings(&cp, pi); ok {
					if samePosting(&cp, mp) {
						return // do nothing.
					}
				}
				l.mlayer[pi] = *mp

			} else { // not in plist, not in mindex, so insert in mindex.
				mlink.moveidx = -1
				l.mdelta += 1
				mlink.idx = pi + 1 + mi + 1 // right of pi, and right of mi.
				l.mindexInsertAt(mlink, mi+1)
			}
		}

	} else {
		glog.WithField("op", mp.Op()).Fatal("Invalid operation.")
	}
}
Beispiel #13
0
func TestAddMutation(t *testing.T) {
	// logrus.SetLevel(logrus.DebugLevel)
	l := NewList()
	key := Key(1, "name")
	dir, err := ioutil.TempDir("", "storetest_")
	if err != nil {
		t.Error(err)
		return
	}

	defer os.RemoveAll(dir)
	ps := new(store.Store)
	ps.Init(dir)

	clog := commit.NewLogger(dir, "mutations", 50<<20)
	clog.Init()
	defer clog.Close()

	l.init(key, ps, clog)

	edge := x.DirectedEdge{
		ValueId:   9,
		Source:    "testing",
		Timestamp: time.Now(),
	}
	if err := l.AddMutation(edge, Set); err != nil {
		t.Error(err)
	}
	/*
		if err := l.CommitIfDirty(); err != nil {
			t.Error(err)
		}
	*/

	if l.Length() != 1 {
		t.Error("Unable to find added elements in posting list")
	}
	var p types.Posting
	if ok := l.Get(&p, 0); !ok {
		t.Error("Unable to retrieve posting at 1st iter")
		t.Fail()
	}
	if p.Uid() != 9 {
		t.Errorf("Expected 9. Got: %v", p.Uid)
	}
	if string(p.Source()) != "testing" {
		t.Errorf("Expected testing. Got: %v", string(p.Source()))
	}
	// return // Test 1.

	// Add another edge now.
	edge.ValueId = 81
	l.AddMutation(edge, Set)
	// l.CommitIfDirty()
	if l.Length() != 2 {
		t.Errorf("Length: %d", l.Length())
		t.Fail()
	}

	var uid uint64
	uid = 1
	for i := 0; i < l.Length(); i++ {
		if ok := l.Get(&p, i); !ok {
			t.Error("Unable to retrieve posting at 2nd iter")
		}
		uid *= 9
		if p.Uid() != uid {
			t.Logf("Expected: %v. Got: %v", uid, p.Uid())
		}
	}
	// return // Test 2.

	// Add another edge, in between the two above.
	uids := []uint64{
		9, 49, 81,
	}
	edge.ValueId = 49
	if err := l.AddMutation(edge, Set); err != nil {
		t.Error(err)
	}
	/*
		if err := l.CommitIfDirty(); err != nil {
			t.Error(err)
		}
	*/
	if err := checkUids(t, l, uids...); err != nil {
		t.Error(err)
	}
	// return // Test 3.

	// Delete an edge, add an edge, replace an edge
	edge.ValueId = 49
	if err := l.AddMutation(edge, Del); err != nil {
		t.Error(err)
	}

	edge.ValueId = 69
	if err := l.AddMutation(edge, Set); err != nil {
		t.Error(err)
	}

	edge.ValueId = 9
	edge.Source = "anti-testing"
	if err := l.AddMutation(edge, Set); err != nil {
		t.Error(err)
	}
	/*
		if err := l.CommitIfDirty(); err != nil {
			t.Error(err)
		}
	*/

	uids = []uint64{9, 69, 81}
	if err := checkUids(t, l, uids...); err != nil {
		t.Error(err)
	}

	l.Get(&p, 0)
	if string(p.Source()) != "anti-testing" {
		t.Errorf("Expected: anti-testing. Got: %v", string(p.Source()))
	}

	/*
		if err := l.CommitIfDirty(); err != nil {
			t.Error(err)
		}
	*/
	// Try reading the same data in another PostingList.
	dl := NewList()
	dl.init(key, ps, clog)
	if err := checkUids(t, dl, uids...); err != nil {
		t.Error(err)
	}

	if _, err := dl.MergeIfDirty(); err != nil {
		t.Error(err)
	}
	if err := checkUids(t, dl, uids...); err != nil {
		t.Error(err)
	}
}
Beispiel #14
0
func TestAddMutation_Value(t *testing.T) {
	// logrus.SetLevel(logrus.DebugLevel)
	glog.Debug("Running init...")
	ol := NewList()
	key := Key(10, "value")
	dir, err := ioutil.TempDir("", "storetest_")
	if err != nil {
		t.Error(err)
		return
	}

	defer os.RemoveAll(dir)
	ps := new(store.Store)
	ps.Init(dir)

	clog := commit.NewLogger(dir, "mutations", 50<<20)
	clog.Init()
	defer clog.Close()

	ol.init(key, ps, clog)
	glog.Debug("Init successful.")

	edge := x.DirectedEdge{
		Value:     "oh hey there",
		Source:    "new-testing",
		Timestamp: time.Now(),
	}
	if err := ol.AddMutation(edge, Set); err != nil {
		t.Error(err)
	}
	var p types.Posting
	ol.Get(&p, 0)
	if p.Uid() != math.MaxUint64 {
		t.Errorf("All value uids should go to MaxUint64. Got: %v", p.Uid())
	}
	var iout interface{}
	if err := ParseValue(&iout, p.ValueBytes()); err != nil {
		t.Error(err)
	}
	out := iout.(string)
	if out != "oh hey there" {
		t.Errorf("Expected a value. Got: [%q]", out)
	}

	// Run the same check after committing.
	if _, err := ol.MergeIfDirty(); err != nil {
		t.Error(err)
	}
	{
		var tp types.Posting
		if ok := ol.Get(&tp, 0); !ok {
			t.Error("While retrieving posting")
		}
		if err := ParseValue(&iout, tp.ValueBytes()); err != nil {
			t.Error(err)
		}
		out := iout.(string)
		if out != "oh hey there" {
			t.Errorf("Expected a value. Got: [%q]", out)
		}
	}

	// The value made it to the posting list. Changing it now.
	edge.Value = 119
	if err := ol.AddMutation(edge, Set); err != nil {
		t.Error(err)
	}
	if ol.Length() != 1 {
		t.Errorf("Length should be one. Got: %v", ol.Length())
	}
	if ok := ol.Get(&p, 0); !ok {
		t.Error("While retrieving posting")
	}
	if err := ParseValue(&iout, p.ValueBytes()); err != nil {
		t.Error(err)
	}
	intout := iout.(float64)
	if intout != 119 {
		t.Errorf("Expected 119. Got: %v", intout)
	}
}