func (s *state) handleNQuads(wg *sync.WaitGroup) { for nq := range s.cnq { if farm.Fingerprint64([]byte(nq.Subject))%s.mod != 0 { // Ignore due to mod sampling. atomic.AddUint64(&s.ctr.ignored, 1) continue } edge, err := nq.ToEdge() for err != nil { // Just put in a retry loop to tackle temporary errors. if err == posting.E_TMP_ERROR { time.Sleep(time.Microsecond) } else { glog.WithError(err).WithField("nq", nq). Error("While converting to edge") return } edge, err = nq.ToEdge() } key := posting.Key(edge.Entity, edge.Attribute) plist := posting.GetOrCreate(key) plist.AddMutation(edge, posting.Set) atomic.AddUint64(&s.ctr.processed, 1) } wg.Done() }
// allocateUniqueUid returns an integer in range: // [minIdx, maxIdx] derived based on numInstances and instanceIdx. // which hasn't already been allocated to other xids. It does this by // taking the fingerprint of the xid appended with zero or more spaces // until the obtained integer is unique. func allocateUniqueUid(instanceIdx uint64, numInstances uint64) uint64 { mod := math.MaxUint64 / numInstances minIdx := instanceIdx * mod buf := make([]byte, 128) for { _, err := rand.Read(buf) x.Checkf(err, "rand.Read shouldn't throw an error") uidb := farm.Fingerprint64(buf) // Generate from hash. uid := (uidb % mod) + minIdx if uid == math.MaxUint64 || !lmgr.isNew(uid) { continue } // Check if this uid has already been allocated. key := x.DataKey("_uid_", uid) pl, decr := posting.GetOrCreate(key) defer decr() if pl.Length(0) == 0 { return uid } } log.Fatalf("This shouldn't be reached.") return 0 }
func (s *state) handleNQuads(wg *sync.WaitGroup) { for nq := range s.cnq { edge, err := nq.ToEdge(s.instanceIdx, s.numInstances) for err != nil { // Just put in a retry loop to tackle temporary errors. if err == posting.E_TMP_ERROR { time.Sleep(time.Microsecond) } else { glog.WithError(err).WithField("nq", nq). Error("While converting to edge") return } edge, err = nq.ToEdge(s.instanceIdx, s.numInstances) } // Only handle this edge if the attribute satisfies the modulo rule if farm.Fingerprint64([]byte(edge.Attribute))%s.numInstances == s.instanceIdx { key := posting.Key(edge.Entity, edge.Attribute) plist := posting.GetOrCreate(key, dataStore) plist.AddMutation(edge, posting.Set) atomic.AddUint64(&s.ctr.processed, 1) } else { atomic.AddUint64(&s.ctr.ignored, 1) } } wg.Done() }
func ExternalId(uid uint64) (xid string, rerr error) { key := posting.Key(uid, "_xid_") // uid -> "_xid_" -> xid pl := posting.GetOrCreate(key, uidStore) if pl.Length() == 0 { return "", errors.New("NO external id") } if pl.Length() > 1 { glog.WithField("uid", uid).Fatal("This shouldn't be happening.") return "", errors.New("Multiple external ids for this uid.") } var p types.Posting if ok := pl.Get(&p, 0); !ok { glog.WithField("uid", uid).Error("While retrieving posting") return "", errors.New("While retrieving posting") } if p.Uid() != math.MaxUint64 { glog.WithField("uid", uid).Fatal("Value uid must be MaxUint64.") } var t interface{} rerr = posting.ParseValue(&t, p.ValueBytes()) xid = t.(string) return xid, rerr }
func addEdgeToUID(t *testing.T, ps *store.Store, attr string, src uint64, dst uint64) { edge := &task.DirectedEdge{ ValueId: dst, Label: "testing", Attr: attr, Entity: src, } l, _ := posting.GetOrCreate(x.DataKey(attr, src)) require.NoError(t, l.AddMutationWithIndex(context.Background(), edge, posting.Set)) }
func markTaken(ctx context.Context, uid uint64) { mu := &task.DirectedEdge{ Entity: uid, Attr: "_uid_", Value: []byte("_"), // not txid Label: "_loader_", } key := x.DataKey("_uid_", uid) plist, decr := posting.GetOrCreate(key) plist.AddMutation(ctx, mu, posting.Set) decr() }
func addEdgeToTypedValue(t *testing.T, ps *store.Store, attr string, src uint64, typ types.TypeID, value []byte) { edge := &task.DirectedEdge{ Value: value, ValueType: uint32(typ), Label: "testing", Attr: attr, Entity: src, } l, _ := posting.GetOrCreate(x.DataKey(attr, src)) require.NoError(t, l.AddMutationWithIndex(context.Background(), edge, posting.Set)) }
// handleNQuads converts the nQuads that satisfy the modulo // rule into posting lists. func (s *state) handleNQuads(wg *sync.WaitGroup) { defer wg.Done() // Check if we need to mark used UIDs. markUids := s.groupsMap[group.BelongsTo("_uid_")] ctx := context.Background() for nq := range s.cnq { if s.Error() != nil { return } // Only handle this edge if the attribute satisfies the modulo rule if !s.groupsMap[group.BelongsTo(nq.Predicate)] { atomic.AddUint64(&s.ctr.ignored, 1) continue } edge, err := nq.ToEdge() for err != nil { // Just put in a retry loop to tackle temporary errors. if err == posting.ErrRetry { time.Sleep(time.Microsecond) } else { s.SetError(err) glog.WithError(err).WithField("nq", nq). Error("While converting to edge") return } edge, err = nq.ToEdge() } key := x.DataKey(edge.Attr, edge.Entity) plist, decr := posting.GetOrCreate(key) plist.AddMutationWithIndex(ctx, edge, posting.Set) decr() // Don't defer, just call because we're in a channel loop. // Mark UIDs and XIDs as taken if markUids { // Mark entity UID. markTaken(ctx, edge.Entity) // Mark the Value UID. if edge.ValueId != 0 { markTaken(ctx, edge.ValueId) } } atomic.AddUint64(&s.ctr.processed, 1) } }
func ProcessTask(query []byte) (result []byte, rerr error) { uo := flatbuffers.GetUOffsetT(query) q := new(task.Query) q.Init(query, uo) b := flatbuffers.NewBuilder(0) voffsets := make([]flatbuffers.UOffsetT, q.UidsLength()) uoffsets := make([]flatbuffers.UOffsetT, q.UidsLength()) attr := string(q.Attr()) for i := 0; i < q.UidsLength(); i++ { uid := q.Uids(i) key := posting.Key(uid, attr) pl := posting.GetOrCreate(key, dataStore) var valoffset flatbuffers.UOffsetT if val, err := pl.Value(); err != nil { valoffset = b.CreateByteVector(x.Nilbyte) } else { valoffset = b.CreateByteVector(val) } task.ValueStart(b) task.ValueAddVal(b, valoffset) voffsets[i] = task.ValueEnd(b) ulist := pl.GetUids() uoffsets[i] = x.UidlistOffset(b, ulist) } task.ResultStartValuesVector(b, len(voffsets)) for i := len(voffsets) - 1; i >= 0; i-- { b.PrependUOffsetT(voffsets[i]) } valuesVent := b.EndVector(len(voffsets)) task.ResultStartUidmatrixVector(b, len(uoffsets)) for i := len(uoffsets) - 1; i >= 0; i-- { b.PrependUOffsetT(uoffsets[i]) } matrixVent := b.EndVector(len(uoffsets)) task.ResultStart(b) task.ResultAddValues(b, valuesVent) task.ResultAddUidmatrix(b, matrixVent) rend := task.ResultEnd(b) b.Finish(rend) return b.Bytes[b.Head():], nil }
func allocateUniqueUid(xid string, instanceIdx uint64, numInstances uint64) (uid uint64, rerr error) { mod := math.MaxUint64 / numInstances minIdx := instanceIdx * mod for sp := ""; ; sp += " " { txid := xid + sp uid1 := farm.Fingerprint64([]byte(txid)) // Generate from hash. uid = (uid1 % mod) + minIdx glog.WithField("txid", txid).WithField("uid", uid).Debug("Generated") if uid == math.MaxUint64 { glog.Debug("Hit uint64max while generating fingerprint. Ignoring...") continue } // Check if this uid has already been allocated. key := posting.Key(uid, "_xid_") // uid -> "_xid_" -> xid pl := posting.GetOrCreate(key, uidStore) if pl.Length() > 0 { // Something already present here. var p types.Posting pl.Get(&p, 0) var tmp interface{} posting.ParseValue(&tmp, p.ValueBytes()) glog.Debug("Found existing xid: [%q]. Continuing...", tmp.(string)) continue } // Uid hasn't been assigned yet. t := x.DirectedEdge{ Value: xid, // not txid Source: "_assigner_", Timestamp: time.Now(), } rerr = pl.AddMutation(t, posting.Set) if rerr != nil { glog.WithError(rerr).Error("While adding mutation") } return uid, rerr } return 0, errors.New("Some unhandled route lead me here." + " Wake the stupid developer up.") }
// runMutations goes through all the edges and applies them. It returns the // mutations which were not applied in left. func runMutations(ctx context.Context, edges []*task.DirectedEdge, op uint32) error { for _, edge := range edges { if !groups().ServesGroup(group.BelongsTo(edge.Attr)) { return x.Errorf("Predicate fingerprint doesn't match this instance") } key := x.DataKey(edge.Attr, edge.Entity) plist, decr := posting.GetOrCreate(key) defer decr() if err := plist.AddMutationWithIndex(ctx, edge, op); err != nil { x.Printf("Error while adding mutation: %v %v", edge, err) return err // abort applying the rest of them. } } return nil }
func writePLs(t *testing.T, pred string, count int, vid uint64, ps *store.Store) { for i := 0; i < count; i++ { k := x.DataKey(pred, uint64(i)) list, _ := posting.GetOrCreate(k) de := &task.DirectedEdge{ ValueId: vid, Label: "test", } list.AddMutation(context.TODO(), de, posting.Set) if merged, err := list.CommitIfDirty(context.TODO()); err != nil { t.Errorf("While merging: %v", err) } else if !merged { t.Errorf("No merge happened") } } }
func GetOrAssign(xid string, instanceIdx uint64, numInstances uint64) (uid uint64, rerr error) { key := stringKey(xid) pl := posting.GetOrCreate(key) if pl.Length() == 0 { return assignNew(pl, xid, instanceIdx, numInstances) } else if pl.Length() > 1 { glog.Fatalf("We shouldn't have more than 1 uid for xid: %v\n", xid) } else { // We found one posting. var p types.Posting if ok := pl.Get(&p, 0); !ok { return 0, errors.New("While retrieving entry from posting list") } return p.Uid(), nil } return 0, errors.New("Some unhandled route lead me here." + " Wake the stupid developer up.") }
// getPostingValue looks up key, gets the value, converts it. If any error is // encountered, we return nil. This is used in some filtering where we do not // want to waste time creating errors. func getPostingValue(key []byte, scalarType types.Scalar) *types.Value { pl, decr := posting.GetOrCreate(key) defer decr() valBytes, vType, err := pl.Value() if bytes.Equal(valBytes, nil) { return nil } val := types.ValueForType(types.TypeID(vType)) if val == nil { return nil } if err := val.UnmarshalBinary(valBytes); err != nil { return nil } // Convert to schema type. sv, err := scalarType.Convert(val) if err != nil { return nil } return &sv }
// fetchValue gets the value for a given UID. func fetchValue(uid uint64, attr string, scalar types.Scalar) (types.Value, error) { pl, decr := posting.GetOrCreate(x.DataKey(attr, uid)) defer decr() valBytes, vType, err := pl.Value() if err != nil { return nil, err } val := types.ValueForType(types.TypeID(vType)) if val == nil { return nil, x.Errorf("Invalid type: %v", vType) } err = val.UnmarshalBinary(valBytes) if err != nil { return nil, err } schemaVal, err := scalar.Convert(val) if err != nil { return nil, err } return schemaVal, nil }
func populateGraph(t *testing.T) (string, *store.Store) { // logrus.SetLevel(logrus.DebugLevel) dir, err := ioutil.TempDir("", "storetest_") if err != nil { t.Error(err) return "", nil } ps := new(store.Store) ps.Init(dir) worker.Init(ps) clog := commit.NewLogger(dir, "mutations", 50<<20) clog.Init() posting.Init(clog) // So, user we're interested in has uid: 1. // She has 4 friends: 23, 24, 25, 31, and 101 edge := x.DirectedEdge{ ValueId: 23, Source: "testing", Timestamp: time.Now(), } addEdge(t, edge, posting.GetOrCreate(posting.Key(1, "friend"), ps)) edge.ValueId = 24 addEdge(t, edge, posting.GetOrCreate(posting.Key(1, "friend"), ps)) edge.ValueId = 25 addEdge(t, edge, posting.GetOrCreate(posting.Key(1, "friend"), ps)) edge.ValueId = 31 addEdge(t, edge, posting.GetOrCreate(posting.Key(1, "friend"), ps)) edge.ValueId = 101 addEdge(t, edge, posting.GetOrCreate(posting.Key(1, "friend"), ps)) // Now let's add a few properties for the main user. edge.Value = "Michonne" addEdge(t, edge, posting.GetOrCreate(posting.Key(1, "name"), ps)) edge.Value = "female" addEdge(t, edge, posting.GetOrCreate(posting.Key(1, "gender"), ps)) edge.Value = "alive" addEdge(t, edge, posting.GetOrCreate(posting.Key(1, "status"), ps)) // Now let's add a name for each of the friends, except 101. edge.Value = "Rick Grimes" addEdge(t, edge, posting.GetOrCreate(posting.Key(23, "name"), ps)) edge.Value = "Glenn Rhee" addEdge(t, edge, posting.GetOrCreate(posting.Key(24, "name"), ps)) edge.Value = "Daryl Dixon" addEdge(t, edge, posting.GetOrCreate(posting.Key(25, "name"), ps)) edge.Value = "Andrea" addEdge(t, edge, posting.GetOrCreate(posting.Key(31, "name"), ps)) return dir, ps }
// processTask processes the query, accumulates and returns the result. func processTask(q *task.Query) (*task.Result, error) { attr := q.Attr useFunc := len(q.SrcFunc) != 0 var n int var tokens []string var geoQuery *geo.QueryData var err error var intersectDest bool var ineqValue types.Value var ineqValueToken string var isGeq, isLeq bool if useFunc { f := q.SrcFunc[0] isGeq = f == "geq" isLeq = f == "leq" switch { case isGeq: fallthrough case isLeq: if len(q.SrcFunc) != 2 { return nil, x.Errorf("Function requires 2 arguments, but got %d %v", len(q.SrcFunc), q.SrcFunc) } ineqValue, err = getValue(attr, q.SrcFunc[1]) if err != nil { return nil, err } // Tokenizing RHS value of inequality. ineqTokens, err := posting.IndexTokens(attr, ineqValue) if err != nil { return nil, err } if len(ineqTokens) != 1 { return nil, x.Errorf("Expected only 1 token but got: %v", ineqTokens) } ineqValueToken = ineqTokens[0] // Get tokens geq / leq ineqValueToken. tokens, err = getInequalityTokens(attr, ineqValueToken, isGeq) if err != nil { return nil, err } case geo.IsGeoFunc(q.SrcFunc[0]): // For geo functions, we get extra information used for filtering. tokens, geoQuery, err = geo.GetTokens(q.SrcFunc) if err != nil { return nil, err } default: tokens, err = getTokens(q.SrcFunc) if err != nil { return nil, err } intersectDest = (strings.ToLower(q.SrcFunc[0]) == "allof") } n = len(tokens) } else { n = len(q.Uids) } var out task.Result for i := 0; i < n; i++ { var key []byte if useFunc { key = x.IndexKey(attr, tokens[i]) } else { key = x.DataKey(attr, q.Uids[i]) } // Get or create the posting list for an entity, attribute combination. pl, decr := posting.GetOrCreate(key) defer decr() // If a posting list contains a value, we store that or else we store a nil // byte so that processing is consistent later. vbytes, vtype, err := pl.Value() newValue := &task.Value{ValType: uint32(vtype)} if err == nil { newValue.Val = vbytes } else { newValue.Val = x.Nilbyte } out.Values = append(out.Values, newValue) if q.DoCount { out.Counts = append(out.Counts, uint32(pl.Length(0))) // Add an empty UID list to make later processing consistent out.UidMatrix = append(out.UidMatrix, &emptyUIDList) continue } // The more usual case: Getting the UIDs. opts := posting.ListOptions{ AfterUID: uint64(q.AfterUid), } // If we have srcFunc and Uids, it means its a filter. So we intersect. if useFunc && len(q.Uids) > 0 { opts.Intersect = &task.List{Uids: q.Uids} } out.UidMatrix = append(out.UidMatrix, pl.Uids(opts)) } if (isGeq || isLeq) && len(tokens) > 0 && ineqValueToken == tokens[0] { // Need to evaluate inequality for entries in the first bucket. typ := schema.TypeOf(attr) if typ == nil || !typ.IsScalar() { return nil, x.Errorf("Attribute not scalar: %s %v", attr, typ) } scalarType := typ.(types.Scalar) x.AssertTrue(len(out.UidMatrix) > 0) // Filter the first row of UidMatrix. Since ineqValue != nil, we may // assume that ineqValue is equal to the first token found in TokensTable. algo.ApplyFilter(out.UidMatrix[0], func(uid uint64, i int) bool { key := x.DataKey(attr, uid) sv := getPostingValue(key, scalarType) if sv == nil { return false } if isGeq { return !scalarType.Less(*sv, ineqValue) } return !scalarType.Less(ineqValue, *sv) }) } // If geo filter, do value check for correctness. var values []*task.Value if geoQuery != nil { uids := algo.MergeSorted(out.UidMatrix) for _, uid := range uids.Uids { key := x.DataKey(attr, uid) pl, decr := posting.GetOrCreate(key) vbytes, vtype, err := pl.Value() newValue := &task.Value{ValType: uint32(vtype)} if err == nil { newValue.Val = vbytes } else { newValue.Val = x.Nilbyte } values = append(values, newValue) decr() // Decrement the reference count of the pl. } filtered := geo.FilterUids(uids, values, geoQuery) for i := 0; i < len(out.UidMatrix); i++ { out.UidMatrix[i] = algo.IntersectSorted([]*task.List{out.UidMatrix[i], filtered}) } } out.IntersectDest = intersectDest return &out, nil }
func TestProcessTask(t *testing.T) { // logrus.SetLevel(logrus.DebugLevel) dir, err := ioutil.TempDir("", "storetest_") if err != nil { t.Error(err) return } defer os.RemoveAll(dir) ps := new(store.Store) ps.Init(dir) clog := commit.NewLogger(dir, "mutations", 50<<20) clog.Init() defer clog.Close() posting.Init(clog) Init(ps) edge := x.DirectedEdge{ ValueId: 23, Source: "author0", Timestamp: time.Now(), } addEdge(t, edge, posting.GetOrCreate(posting.Key(10, "friend"), ps)) addEdge(t, edge, posting.GetOrCreate(posting.Key(11, "friend"), ps)) addEdge(t, edge, posting.GetOrCreate(posting.Key(12, "friend"), ps)) edge.ValueId = 25 addEdge(t, edge, posting.GetOrCreate(posting.Key(12, "friend"), ps)) edge.ValueId = 26 addEdge(t, edge, posting.GetOrCreate(posting.Key(12, "friend"), ps)) edge.ValueId = 31 addEdge(t, edge, posting.GetOrCreate(posting.Key(10, "friend"), ps)) addEdge(t, edge, posting.GetOrCreate(posting.Key(12, "friend"), ps)) edge.Value = "photon" addEdge(t, edge, posting.GetOrCreate(posting.Key(12, "friend"), ps)) query := NewQuery("friend", []uint64{10, 11, 12}) result, err := ProcessTask(query) if err != nil { t.Error(err) } ro := flatbuffers.GetUOffsetT(result) r := new(task.Result) r.Init(result, ro) if r.UidmatrixLength() != 3 { t.Errorf("Expected 3. Got uidmatrix length: %v", r.UidmatrixLength()) } if err := check(r, 0, []uint64{23, 31}); err != nil { t.Error(err) } if err := check(r, 1, []uint64{23}); err != nil { t.Error(err) } if err := check(r, 2, []uint64{23, 25, 26, 31}); err != nil { t.Error(err) } if r.ValuesLength() != 3 { t.Errorf("Expected 3. Got values length: %v", r.ValuesLength()) } var tval task.Value if ok := r.Values(&tval, 0); !ok { t.Errorf("Unable to retrieve value") } if tval.ValLength() != 1 || tval.ValBytes()[0] != 0x00 { t.Errorf("Invalid byte value at index 0") } if ok := r.Values(&tval, 1); !ok { t.Errorf("Unable to retrieve value") } if tval.ValLength() != 1 || tval.ValBytes()[0] != 0x00 { t.Errorf("Invalid byte value at index 0") } if ok := r.Values(&tval, 2); !ok { t.Errorf("Unable to retrieve value") } var iout interface{} if err := posting.ParseValue(&iout, tval.ValBytes()); err != nil { t.Error(err) } v := iout.(string) if v != "photon" { t.Errorf("Expected photon. Got: %q", v) } }
func getOrCreate(key []byte) *posting.List { l, _ := posting.GetOrCreate(key) return l }
func intersectBucket(ts *task.Sort, attr, token string, out []intersectedList) error { count := int(ts.Count) sType := schema.TypeOf(attr) if !sType.IsScalar() { return x.Errorf("Cannot sort attribute %s of type object.", attr) } scalar := sType.(types.Scalar) key := x.IndexKey(attr, token) pl, decr := posting.GetOrCreate(key) defer decr() for i, ul := range ts.UidMatrix { il := &out[i] if count > 0 && len(il.ulist.Uids) >= count { continue } // Intersect index with i-th input UID list. listOpt := posting.ListOptions{Intersect: ul} result := pl.Uids(listOpt) n := len(result.Uids) // Check offsets[i]. if il.offset >= n { // We are going to skip the whole intersection. No need to do actual // sorting. Just update offsets[i]. il.offset -= n continue } // Sort results by value before applying offset. sortByValue(attr, result, scalar, ts.Desc) if il.offset > 0 { result.Uids = result.Uids[il.offset:n] il.offset = 0 n = len(result.Uids) } // n is number of elements to copy from result to out. if count > 0 { slack := count - len(il.ulist.Uids) if slack < n { n = slack } } // Copy from result to out. for j := 0; j < n; j++ { il.ulist.Uids = append(il.ulist.Uids, result.Uids[j]) } } // end for loop // Check out[i] sizes for all i. for i := 0; i < len(ts.UidMatrix); i++ { // Iterate over UID lists. if len(out[i].ulist.Uids) < count { return errContinue } x.AssertTrue(len(out[i].ulist.Uids) == count) } return errDone }