func addPosting(b *flatbuffers.Builder, p types.Posting) flatbuffers.UOffsetT { so := b.CreateByteString(p.Source()) // Do this before posting start. var bo flatbuffers.UOffsetT if p.ValueLength() > 0 { bo = b.CreateByteVector(p.ValueBytes()) } types.PostingStart(b) types.PostingAddUid(b, p.Uid()) if bo > 0 { types.PostingAddValue(b, bo) } types.PostingAddSource(b, so) types.PostingAddTs(b, p.Ts()) types.PostingAddOp(b, p.Op()) return types.PostingEnd(b) }
// mutationIndex (mindex) is useful to avoid having to parse the entire // postinglist upto idx, for every Get(*types.Posting, idx), which has a // complexity of O(idx). Iteration over N size posting list would this push // us into O(N^2) territory, without this technique. // // Using this technique, // we can overlay mutation layers over immutable posting list, to allow for // O(m) lookups, where m = size of mutation list. Obviously, the size of // mutation list should be much smaller than the size of posting list, except // in tiny posting lists, where performance wouldn't be such a concern anyways. // // Say we have this data: // Posting List (plist, immutable): // idx: 0 1 2 3 4 5 // value: 2 5 9 10 13 15 // // Mutation List (mlist): // idx: 0 1 2 // value: 7 10 13' // posting uid is 13 but other values vary. // Op: SET DEL SET // Effective: ADD DEL REP (REP = replace) // // ---------------------------------------------------------------------------- // regenerateIndex would generate these: // mlayer (layer just above posting list contains only replace instructions) // idx: 4 // value: 13' // Op: SET // Effective: REP (REP = replace) // // mindex: // idx: 2 4 // value: 7 10 // moveidx: -1 +1 // Effective: ADD DEL // // Now, let's see how the access would work: // idx: get --> calculation [idx, served from, value] // idx: 0 --> 0 [0, plist, 2] // idx: 1 --> 1 [1, plist, 5] // idx: 2 --> ADD from mindex // --> [2, mindex, 7] // also has moveidx = -1 // idx: 3 --> 3 + moveidx=-1 = 2 [2, plist, 9] // idx: 4 --> DEL from mindex // --> 4 + moveidx=-1 + moveidx=+1 = 4 [4, mlayer, 13'] // idx: 5 --> 5 + moveidx=-1 + moveidx=+1 = 5 [5, plist, 15] // // Thus we can provide mutation layers over immutable posting list, while // still ensuring fast lookup access. // // NOTE: This function expects the caller to hold a RW Lock. // Update: With mergeMutation function, we're adding mutations with a cost // of O(log M + log N), where M = number of previous mutations, and N = // number of postings in the immutable posting list. func (l *List) mergeMutation(mp *types.Posting) { curUid := mp.Uid() pi, puid := l.lePostingIndex(curUid) // O(log N) mi, muid := l.leMutationIndex(curUid) // O(log M) inPlist := puid == curUid // O(1) follows, but any additions or deletions from mindex would // be O(M) due to element shifting. In terms of benchmarks, this performs // a LOT better than when I was running O(N + M), re-generating mutation // flatbuffers, linked lists etc. mlink := new(MutationLink) mlink.posting = mp if mp.Op() == Del { if muid == curUid { // curUid found in mindex. if inPlist { // In plist, so replace previous instruction in mindex. mlink.moveidx = 1 mlink.idx = pi + mi l.mindex[mi] = mlink } else { // Not in plist, so delete previous instruction in mindex. l.mdelta -= 1 l.mindexDeleteAt(mi) } } else { // curUid not found in mindex. if inPlist { // In plist, so insert in mindex. mlink.moveidx = 1 l.mdelta -= 1 mlink.idx = pi + mi + 1 l.mindexInsertAt(mlink, mi+1) } else { // Not found in plist, and not found in mindex. So, ignore. } } } else if mp.Op() == Set { if muid == curUid { // curUid found in mindex. if inPlist { // In plist, so delete previous instruction, set in mlayer. l.mindexDeleteAt(mi) l.mlayer[pi] = *mp } else { // Not in plist, so replace previous set instruction in mindex. // NOTE: This prev instruction couldn't have been a Del instruction. mlink.idx = pi + 1 + mi mlink.moveidx = -1 l.mindex[mi] = mlink } } else { // curUid not found in mindex. if inPlist { // In plist, so just set it in mlayer. // If this posting matches what we already have in posting list, // we don't need to `dirty` this by adding to mlayer. plist := l.getPostingList() var cp types.Posting if ok := plist.Postings(&cp, pi); ok { if samePosting(&cp, mp) { return // do nothing. } } l.mlayer[pi] = *mp } else { // not in plist, not in mindex, so insert in mindex. mlink.moveidx = -1 l.mdelta += 1 mlink.idx = pi + 1 + mi + 1 // right of pi, and right of mi. l.mindexInsertAt(mlink, mi+1) } } } else { glog.WithField("op", mp.Op()).Fatal("Invalid operation.") } }
func (l *List) updateMutationLayer(mpost *types.Posting) bool { l.AssertLock() x.AssertTrue(mpost.Op == Set || mpost.Op == Del) // First check the mutable layer. midx := sort.Search(len(l.mlayer), func(idx int) bool { mp := l.mlayer[idx] return mpost.Uid <= mp.Uid }) // This block handles the case where mpost.UID is found in mutation layer. if midx < len(l.mlayer) && l.mlayer[midx].Uid == mpost.Uid { // mp is the posting found in mlayer. oldPost := l.mlayer[midx] // Note that mpost.Op is either Set or Del, whereas oldPost.Op can be // either Set or Del or Add. msame := samePosting(oldPost, mpost) if msame && ((mpost.Op == Del) == (oldPost.Op == Del)) { // This posting has similar content as what is found in mlayer. If the // ops are similar, then we do nothing. Note that Add and Set are // considered similar, and the second clause is true also when // mpost.Op==Add and oldPost.Op==Set. return false } if !msame && mpost.Op == Del { // Invalid Del as contents do not match. return false } // Here are the remaining cases. // Del, Set: Replace with new post. // Del, Del: Replace with new post. // Set, Del: Replace with new post. // Set, Set: Replace with new post. // Add, Del: Undo by removing oldPost. // Add, Set: Replace with new post. Need to set mpost.Op to Add. if oldPost.Op == Add { if mpost.Op == Del { // Undo old post. copy(l.mlayer[midx:], l.mlayer[midx+1:]) l.mlayer[len(l.mlayer)-1] = nil l.mlayer = l.mlayer[:len(l.mlayer)-1] return true } // Add followed by Set is considered an Add. Hence, mutate mpost.Op. mpost.Op = Add } l.mlayer[midx] = mpost return true } // Didn't find it in mutable layer. Now check the immutable layer. pl := l.getPostingList(0) pidx := sort.Search(len(pl.Postings), func(idx int) bool { p := pl.Postings[idx] return mpost.Uid <= p.Uid }) var uidFound, psame bool if pidx < len(pl.Postings) { p := pl.Postings[pidx] uidFound = mpost.Uid == p.Uid if uidFound { psame = samePosting(p, mpost) } } if mpost.Op == Set { if psame { return false } if !uidFound { // Posting not found in PL. This is considered an Add operation. mpost.Op = Add } } else if !psame { // mpost.Op==Del // Either we fail to find UID in immutable PL or contents don't match. return false } // Doesn't match what we already have in immutable layer. So, add to mutable layer. if midx >= len(l.mlayer) { // Add it at the end. l.mlayer = append(l.mlayer, mpost) return true } // Otherwise, add it where midx is pointing to. l.mlayer = append(l.mlayer, nil) copy(l.mlayer[midx+1:], l.mlayer[midx:]) l.mlayer[midx] = mpost return true }