// In benchmarks, the time taken per AddMutation before was // plateauing at 2.5 ms with sync per 10 log entries, and increasing // for sync per 100 log entries (to 3 ms per AddMutation), largely because // of how index generation was being done. // // With this change, the benchmarks perform as good as benchmarks for // commit.Logger, where the less frequently file sync happens, the faster // AddMutations run. // // PASS // BenchmarkAddMutations_SyncEveryLogEntry-6 100 24712455 ns/op // BenchmarkAddMutations_SyncEvery10LogEntry-6 500 2485961 ns/op // BenchmarkAddMutations_SyncEvery100LogEntry-6 10000 298352 ns/op // BenchmarkAddMutations_SyncEvery1000LogEntry-6 30000 63544 ns/op // ok github.com/dgraph-io/dgraph/posting 10.291s func (l *List) AddMutation(t x.DirectedEdge, op byte) error { l.wg.Wait() l.Lock() defer l.Unlock() if l.deleteMe { return E_TMP_ERROR } if t.Timestamp.UnixNano() < l.maxMutationTs { return fmt.Errorf("Mutation ts lower than committed ts.") } // Mutation arrives: // - Check if we had any(SET/DEL) before this, stored in the mutation list. // - If yes, then replace that mutation. Jump to a) // a) check if the entity exists in main posting list. // - If yes, store the mutation. // - If no, disregard this mutation. // All edges with a value set, have the same uid. In other words, // an (entity, attribute) can only have one interface{} value. if t.Value != nil { t.ValueId = math.MaxUint64 } if t.ValueId == 0 { return fmt.Errorf("ValueId cannot be zero.") } mbuf := newPosting(t, op) uo := flatbuffers.GetUOffsetT(mbuf) mpost := new(types.Posting) mpost.Init(mbuf, uo) glog.WithFields(logrus.Fields{ "uid": mpost.Uid(), "source": string(mpost.Source()), "ts": mpost.Ts(), }).Debug("Add mutation") l.mergeMutation(mpost) l.maxMutationTs = t.Timestamp.UnixNano() if len(l.mindex)+len(l.mlayer) > 0 { atomic.StoreInt64(&l.dirtyTs, time.Now().UnixNano()) if dirtymap != nil { dirtymap.Put(l.ghash, true) } } if l.clog == nil { return nil } return l.clog.AddLog(t.Timestamp.UnixNano(), l.hash, mbuf) }
func samePosting(a *types.Posting, b *types.Posting) bool { if a.Uid() != b.Uid() { return false } if a.ValueLength() != b.ValueLength() { return false } if !bytes.Equal(a.ValueBytes(), b.ValueBytes()) { return false } if !bytes.Equal(a.Source(), b.Source()) { return false } return true }
func addPosting(b *flatbuffers.Builder, p types.Posting) flatbuffers.UOffsetT { so := b.CreateByteString(p.Source()) // Do this before posting start. var bo flatbuffers.UOffsetT if p.ValueLength() > 0 { bo = b.CreateByteVector(p.ValueBytes()) } types.PostingStart(b) types.PostingAddUid(b, p.Uid()) if bo > 0 { types.PostingAddValue(b, bo) } types.PostingAddSource(b, so) types.PostingAddTs(b, p.Ts()) types.PostingAddOp(b, p.Op()) return types.PostingEnd(b) }
func (l *List) init(key []byte, pstore *store.Store, clog *commit.Logger) { l.Lock() defer l.Unlock() defer l.wg.Done() if len(empty) == 0 { glog.Fatal("empty should have some bytes.") } l.key = key l.pstore = pstore l.clog = clog posting := l.getPostingList() l.maxMutationTs = posting.CommitTs() l.hash = farm.Fingerprint32(key) l.ghash = gotomic.IntKey(farm.Fingerprint64(key)) l.mlayer = make(map[int]types.Posting) if clog == nil { return } glog.Debug("Starting stream entries...") err := clog.StreamEntries(posting.CommitTs()+1, l.hash, func(hdr commit.Header, buffer []byte) { uo := flatbuffers.GetUOffsetT(buffer) m := new(types.Posting) m.Init(buffer, uo) if m.Ts() > l.maxMutationTs { l.maxMutationTs = m.Ts() } glog.WithFields(logrus.Fields{ "uid": m.Uid(), "source": string(m.Source()), "ts": m.Ts(), }).Debug("Got entry from log") l.mergeMutation(m) }) if err != nil { glog.WithError(err).Error("While streaming entries.") } glog.Debug("Done streaming entries.") }
func TestAddMutation(t *testing.T) { // logrus.SetLevel(logrus.DebugLevel) l := NewList() key := Key(1, "name") dir, err := ioutil.TempDir("", "storetest_") if err != nil { t.Error(err) return } defer os.RemoveAll(dir) ps := new(store.Store) ps.Init(dir) clog := commit.NewLogger(dir, "mutations", 50<<20) clog.Init() defer clog.Close() l.init(key, ps, clog) edge := x.DirectedEdge{ ValueId: 9, Source: "testing", Timestamp: time.Now(), } if err := l.AddMutation(edge, Set); err != nil { t.Error(err) } /* if err := l.CommitIfDirty(); err != nil { t.Error(err) } */ if l.Length() != 1 { t.Error("Unable to find added elements in posting list") } var p types.Posting if ok := l.Get(&p, 0); !ok { t.Error("Unable to retrieve posting at 1st iter") t.Fail() } if p.Uid() != 9 { t.Errorf("Expected 9. Got: %v", p.Uid) } if string(p.Source()) != "testing" { t.Errorf("Expected testing. Got: %v", string(p.Source())) } // return // Test 1. // Add another edge now. edge.ValueId = 81 l.AddMutation(edge, Set) // l.CommitIfDirty() if l.Length() != 2 { t.Errorf("Length: %d", l.Length()) t.Fail() } var uid uint64 uid = 1 for i := 0; i < l.Length(); i++ { if ok := l.Get(&p, i); !ok { t.Error("Unable to retrieve posting at 2nd iter") } uid *= 9 if p.Uid() != uid { t.Logf("Expected: %v. Got: %v", uid, p.Uid()) } } // return // Test 2. // Add another edge, in between the two above. uids := []uint64{ 9, 49, 81, } edge.ValueId = 49 if err := l.AddMutation(edge, Set); err != nil { t.Error(err) } /* if err := l.CommitIfDirty(); err != nil { t.Error(err) } */ if err := checkUids(t, l, uids...); err != nil { t.Error(err) } // return // Test 3. // Delete an edge, add an edge, replace an edge edge.ValueId = 49 if err := l.AddMutation(edge, Del); err != nil { t.Error(err) } edge.ValueId = 69 if err := l.AddMutation(edge, Set); err != nil { t.Error(err) } edge.ValueId = 9 edge.Source = "anti-testing" if err := l.AddMutation(edge, Set); err != nil { t.Error(err) } /* if err := l.CommitIfDirty(); err != nil { t.Error(err) } */ uids = []uint64{9, 69, 81} if err := checkUids(t, l, uids...); err != nil { t.Error(err) } l.Get(&p, 0) if string(p.Source()) != "anti-testing" { t.Errorf("Expected: anti-testing. Got: %v", string(p.Source())) } /* if err := l.CommitIfDirty(); err != nil { t.Error(err) } */ // Try reading the same data in another PostingList. dl := NewList() dl.init(key, ps, clog) if err := checkUids(t, dl, uids...); err != nil { t.Error(err) } if _, err := dl.MergeIfDirty(); err != nil { t.Error(err) } if err := checkUids(t, dl, uids...); err != nil { t.Error(err) } }