// validateTypes checks for predicate types present in the schema and validates if the // input value is of the correct type func validateTypes(nquads []rdf.NQuad) error { for i := range nquads { nquad := &nquads[i] if t := schema.TypeOf(nquad.Predicate); t != nil && t.IsScalar() { schemaType := t.(types.Scalar) typeID := types.TypeID(nquad.ObjectType) if typeID == types.BytesID { // Storage type was unspecified in the RDF, so we convert the data to the schema // type. v := types.ValueForType(schemaType.ID()) err := v.UnmarshalText(nquad.ObjectValue) if err != nil { return err } nquad.ObjectValue, err = v.MarshalBinary() if err != nil { return err } nquad.ObjectType = byte(schemaType.ID()) } else if typeID != schemaType.ID() { v := types.ValueForType(typeID) err := v.UnmarshalBinary(nquad.ObjectValue) if err != nil { return err } if _, err := schemaType.Convert(v); err != nil { return err } } } } return nil }
// IndexTokens return tokens, without the predicate prefix and index rune. func IndexTokens(attr string, p types.Value) ([]string, error) { schemaType := schema.TypeOf(attr) if !schemaType.IsScalar() { return nil, x.Errorf("Cannot index attribute %s of type object.", attr) } s := schemaType.(types.Scalar) schemaVal, err := s.Convert(p) if err != nil { return nil, err } switch v := schemaVal.(type) { case *types.Geo: return geo.IndexTokens(v) case *types.Int32: return types.IntIndex(attr, v) case *types.Float: return types.FloatIndex(attr, v) case *types.Date: return types.DateIndex(attr, v) case *types.Time: return types.TimeIndex(attr, v) case *types.String: return types.DefaultIndexKeys(attr, v), nil } return nil, nil }
func getValue(attr, data string) (types.Value, error) { // Parse given value and get token. There should be only one token. t := schema.TypeOf(attr) if t == nil || !t.IsScalar() { return nil, x.Errorf("Attribute %s is not valid scalar type", attr) } schemaType := t.(types.Scalar) v := types.ValueForType(schemaType.ID()) err := v.UnmarshalText([]byte(data)) if err != nil { return nil, err } return v, nil }
// newGraph returns the SubGraph and its task query. func newGraph(ctx context.Context, gq *gql.GraphQuery) (*SubGraph, error) { euid, exid := gq.UID, gq.XID // This would set the Result field in SubGraph, // and populate the children for attributes. if len(exid) > 0 { x.AssertTruef(!strings.HasPrefix(exid, "_new_:"), "Query shouldn't contain _new_") euid = farm.Fingerprint64([]byte(exid)) x.Trace(ctx, "Xid: %v Uid: %v", exid, euid) } if euid == 0 && gq.Func == nil { err := x.Errorf("Invalid query, query internal id is zero and generator is nil") x.TraceError(ctx, err) return nil, err } // For the root, the name to be used in result is stored in Alias, not Attr. // The attr at root (if present) would stand for the source functions attr. args := params{ AttrType: schema.TypeOf(gq.Alias), isDebug: gq.Alias == "debug", Alias: gq.Alias, } sg := &SubGraph{ Params: args, } if gq.Func != nil { sg.Attr = gq.Func.Attr sg.SrcFunc = append(sg.SrcFunc, gq.Func.Name) sg.SrcFunc = append(sg.SrcFunc, gq.Func.Args...) } if euid > 0 { // euid is the root UID. sg.SrcUIDs = &task.List{Uids: []uint64{euid}} sg.uidMatrix = []*task.List{&task.List{Uids: []uint64{euid}}} } sg.values = createNilValuesList(1) return sg, nil }
func intersectBucket(ts *task.Sort, attr, token string, out []intersectedList) error { count := int(ts.Count) sType := schema.TypeOf(attr) if !sType.IsScalar() { return x.Errorf("Cannot sort attribute %s of type object.", attr) } scalar := sType.(types.Scalar) key := x.IndexKey(attr, token) pl, decr := posting.GetOrCreate(key) defer decr() for i, ul := range ts.UidMatrix { il := &out[i] if count > 0 && len(il.ulist.Uids) >= count { continue } // Intersect index with i-th input UID list. listOpt := posting.ListOptions{Intersect: ul} result := pl.Uids(listOpt) n := len(result.Uids) // Check offsets[i]. if il.offset >= n { // We are going to skip the whole intersection. No need to do actual // sorting. Just update offsets[i]. il.offset -= n continue } // Sort results by value before applying offset. sortByValue(attr, result, scalar, ts.Desc) if il.offset > 0 { result.Uids = result.Uids[il.offset:n] il.offset = 0 n = len(result.Uids) } // n is number of elements to copy from result to out. if count > 0 { slack := count - len(il.ulist.Uids) if slack < n { n = slack } } // Copy from result to out. for j := 0; j < n; j++ { il.ulist.Uids = append(il.ulist.Uids, result.Uids[j]) } } // end for loop // Check out[i] sizes for all i. for i := 0; i < len(ts.UidMatrix); i++ { // Iterate over UID lists. if len(out[i].ulist.Uids) < count { return errContinue } x.AssertTrue(len(out[i].ulist.Uids) == count) } return errDone }
// processTask processes the query, accumulates and returns the result. func processTask(q *task.Query) (*task.Result, error) { attr := q.Attr useFunc := len(q.SrcFunc) != 0 var n int var tokens []string var geoQuery *geo.QueryData var err error var intersectDest bool var ineqValue types.Value var ineqValueToken string var isGeq, isLeq bool if useFunc { f := q.SrcFunc[0] isGeq = f == "geq" isLeq = f == "leq" switch { case isGeq: fallthrough case isLeq: if len(q.SrcFunc) != 2 { return nil, x.Errorf("Function requires 2 arguments, but got %d %v", len(q.SrcFunc), q.SrcFunc) } ineqValue, err = getValue(attr, q.SrcFunc[1]) if err != nil { return nil, err } // Tokenizing RHS value of inequality. ineqTokens, err := posting.IndexTokens(attr, ineqValue) if err != nil { return nil, err } if len(ineqTokens) != 1 { return nil, x.Errorf("Expected only 1 token but got: %v", ineqTokens) } ineqValueToken = ineqTokens[0] // Get tokens geq / leq ineqValueToken. tokens, err = getInequalityTokens(attr, ineqValueToken, isGeq) if err != nil { return nil, err } case geo.IsGeoFunc(q.SrcFunc[0]): // For geo functions, we get extra information used for filtering. tokens, geoQuery, err = geo.GetTokens(q.SrcFunc) if err != nil { return nil, err } default: tokens, err = getTokens(q.SrcFunc) if err != nil { return nil, err } intersectDest = (strings.ToLower(q.SrcFunc[0]) == "allof") } n = len(tokens) } else { n = len(q.Uids) } var out task.Result for i := 0; i < n; i++ { var key []byte if useFunc { key = x.IndexKey(attr, tokens[i]) } else { key = x.DataKey(attr, q.Uids[i]) } // Get or create the posting list for an entity, attribute combination. pl, decr := posting.GetOrCreate(key) defer decr() // If a posting list contains a value, we store that or else we store a nil // byte so that processing is consistent later. vbytes, vtype, err := pl.Value() newValue := &task.Value{ValType: uint32(vtype)} if err == nil { newValue.Val = vbytes } else { newValue.Val = x.Nilbyte } out.Values = append(out.Values, newValue) if q.DoCount { out.Counts = append(out.Counts, uint32(pl.Length(0))) // Add an empty UID list to make later processing consistent out.UidMatrix = append(out.UidMatrix, &emptyUIDList) continue } // The more usual case: Getting the UIDs. opts := posting.ListOptions{ AfterUID: uint64(q.AfterUid), } // If we have srcFunc and Uids, it means its a filter. So we intersect. if useFunc && len(q.Uids) > 0 { opts.Intersect = &task.List{Uids: q.Uids} } out.UidMatrix = append(out.UidMatrix, pl.Uids(opts)) } if (isGeq || isLeq) && len(tokens) > 0 && ineqValueToken == tokens[0] { // Need to evaluate inequality for entries in the first bucket. typ := schema.TypeOf(attr) if typ == nil || !typ.IsScalar() { return nil, x.Errorf("Attribute not scalar: %s %v", attr, typ) } scalarType := typ.(types.Scalar) x.AssertTrue(len(out.UidMatrix) > 0) // Filter the first row of UidMatrix. Since ineqValue != nil, we may // assume that ineqValue is equal to the first token found in TokensTable. algo.ApplyFilter(out.UidMatrix[0], func(uid uint64, i int) bool { key := x.DataKey(attr, uid) sv := getPostingValue(key, scalarType) if sv == nil { return false } if isGeq { return !scalarType.Less(*sv, ineqValue) } return !scalarType.Less(ineqValue, *sv) }) } // If geo filter, do value check for correctness. var values []*task.Value if geoQuery != nil { uids := algo.MergeSorted(out.UidMatrix) for _, uid := range uids.Uids { key := x.DataKey(attr, uid) pl, decr := posting.GetOrCreate(key) vbytes, vtype, err := pl.Value() newValue := &task.Value{ValType: uint32(vtype)} if err == nil { newValue.Val = vbytes } else { newValue.Val = x.Nilbyte } values = append(values, newValue) decr() // Decrement the reference count of the pl. } filtered := geo.FilterUids(uids, values, geoQuery) for i := 0; i < len(out.UidMatrix); i++ { out.UidMatrix[i] = algo.IntersectSorted([]*task.List{out.UidMatrix[i], filtered}) } } out.IntersectDest = intersectDest return &out, nil }
func treeCopy(ctx context.Context, gq *gql.GraphQuery, sg *SubGraph) error { // Typically you act on the current node, and leave recursion to deal with // children. But, in this case, we don't want to muck with the current // node, because of the way we're dealing with the root node. // So, we work on the children, and then recurse for grand children. var scalars []string // Add scalar children nodes based on schema if obj, ok := sg.Params.AttrType.(types.Object); ok { // Add scalar fields in the level to children list := schema.ScalarList(obj.Name) for _, it := range list { args := params{ AttrType: it.Typ, isDebug: sg.Params.isDebug, } dst := &SubGraph{ Attr: it.Field, Params: args, } sg.Children = append(sg.Children, dst) scalars = append(scalars, it.Field) } } for _, gchild := range gq.Children { if isPresent(scalars, gchild.Attr) { continue } if gchild.Attr == "_count_" { if len(gq.Children) > 1 { return errors.New("Cannot have other attributes with count") } if gchild.Children != nil { return errors.New("Count cannot have other attributes") } sg.Params.DoCount = true break } if gchild.Attr == "_uid_" { sg.Params.GetUID = true } // Determine the type of current node. var attrType types.Type if sg.Params.AttrType != nil { if objType, ok := sg.Params.AttrType.(types.Object); ok { attrType = schema.TypeOf(objType.Fields[gchild.Attr]) } } else { // Child is explicitly specified as some type. if objType := schema.TypeOf(gchild.Attr); objType != nil { if o, ok := objType.(types.Object); ok && o.Name == gchild.Attr { attrType = objType } } } args := params{ AttrType: attrType, Alias: gchild.Alias, isDebug: sg.Params.isDebug, } dst := &SubGraph{ Attr: gchild.Attr, Params: args, } if gchild.Filter != nil { dstf := &SubGraph{} filterCopy(dstf, gchild.Filter) dst.Filters = append(dst.Filters, dstf) } if v, ok := gchild.Args["offset"]; ok { offset, err := strconv.ParseInt(v, 0, 32) if err != nil { return err } dst.Params.Offset = int(offset) } if v, ok := gchild.Args["after"]; ok { after, err := strconv.ParseUint(v, 0, 64) if err != nil { return err } dst.Params.AfterUID = uint64(after) } if v, ok := gchild.Args["first"]; ok { first, err := strconv.ParseInt(v, 0, 32) if err != nil { return err } dst.Params.Count = int(first) } if v, ok := gchild.Args["order"]; ok { dst.Params.Order = v } else if v, ok := gchild.Args["orderdesc"]; ok { dst.Params.Order = v dst.Params.OrderDesc = true } sg.Children = append(sg.Children, dst) err := treeCopy(ctx, gchild, dst) if err != nil { return err } } return nil }
// This method gets the values and children for a subgraph. func (sg *SubGraph) preTraverse(uid uint64, dst outputNode) error { invalidUids := make(map[uint64]bool) // We go through all predicate children of the subgraph. for _, pc := range sg.Children { idx := algo.IndexOf(pc.SrcUIDs, uid) if idx < 0 { continue } ul := pc.uidMatrix[idx] fieldName := pc.Attr if pc.Params.Alias != "" { fieldName = pc.Params.Alias } if sg.Params.GetUID || sg.Params.isDebug { dst.SetUID(uid) } if len(pc.counts) > 0 { c := types.Int32(pc.counts[idx]) uc := dst.New(fieldName) uc.AddValue("_count_", &c) dst.AddChild(fieldName, uc) } else if len(ul.Uids) > 0 || len(pc.Children) > 0 { // We create as many predicate entity children as the length of uids for // this predicate. for _, childUID := range ul.Uids { if invalidUids[childUID] { continue } uc := dst.New(fieldName) // Doing check for UID here is no good because some of these might be // invalid nodes. // if pc.Params.GetUID || pc.Params.isDebug { // dst.SetUID(uid) // } if rerr := pc.preTraverse(childUID, uc); rerr != nil { if rerr.Error() == "_INV_" { invalidUids[childUID] = true continue // next UID. } // Some other error. log.Printf("Error while traversal: %v", rerr) return rerr } if !uc.IsEmpty() { dst.AddChild(fieldName, uc) } } } else { tv := pc.values[idx] v, err := getValue(tv) if err != nil { return err } if pc.Attr == "_xid_" { txt, err := v.MarshalText() if err != nil { return err } dst.SetXID(string(txt)) } else { globalType := schema.TypeOf(pc.Attr) schemaType := pc.Params.AttrType sv := v if schemaType != nil { // Do type checking on response values if !schemaType.IsScalar() { return x.Errorf("Unknown Scalar:%v. Leaf predicate:'%v' must be"+ " one of the scalar types defined in the schema.", pc.Params.AttrType, pc.Attr) } st := schemaType.(types.Scalar) // Convert to schema type. sv, err = st.Convert(v) if bytes.Equal(tv.Val, nil) || err != nil { // skip values that don't convert. return x.Errorf("_INV_") } } else if globalType != nil { // Try to coerce types if this is an optional scalar outside an // object definition. if !globalType.IsScalar() { return x.Errorf("Leaf predicate:'%v' must be a scalar.", pc.Attr) } gt := globalType.(types.Scalar) // Convert to schema type. sv, err = gt.Convert(v) if bytes.Equal(tv.Val, nil) || err != nil { continue } } if bytes.Equal(tv.Val, nil) { continue } dst.AddValue(fieldName, sv) } } } return nil }