// processTask processes the query, accumulates and returns the result. func processTask(q *task.Query) (*task.Result, error) { attr := q.Attr useFunc := len(q.SrcFunc) != 0 var n int var tokens []string var geoQuery *geo.QueryData var err error var intersectDest bool var ineqValue types.Value var ineqValueToken string var isGeq, isLeq bool if useFunc { f := q.SrcFunc[0] isGeq = f == "geq" isLeq = f == "leq" switch { case isGeq: fallthrough case isLeq: if len(q.SrcFunc) != 2 { return nil, x.Errorf("Function requires 2 arguments, but got %d %v", len(q.SrcFunc), q.SrcFunc) } ineqValue, err = getValue(attr, q.SrcFunc[1]) if err != nil { return nil, err } // Tokenizing RHS value of inequality. ineqTokens, err := posting.IndexTokens(attr, ineqValue) if err != nil { return nil, err } if len(ineqTokens) != 1 { return nil, x.Errorf("Expected only 1 token but got: %v", ineqTokens) } ineqValueToken = ineqTokens[0] // Get tokens geq / leq ineqValueToken. tokens, err = getInequalityTokens(attr, ineqValueToken, isGeq) if err != nil { return nil, err } case geo.IsGeoFunc(q.SrcFunc[0]): // For geo functions, we get extra information used for filtering. tokens, geoQuery, err = geo.GetTokens(q.SrcFunc) if err != nil { return nil, err } default: tokens, err = getTokens(q.SrcFunc) if err != nil { return nil, err } intersectDest = (strings.ToLower(q.SrcFunc[0]) == "allof") } n = len(tokens) } else { n = len(q.Uids) } var out task.Result for i := 0; i < n; i++ { var key []byte if useFunc { key = x.IndexKey(attr, tokens[i]) } else { key = x.DataKey(attr, q.Uids[i]) } // Get or create the posting list for an entity, attribute combination. pl, decr := posting.GetOrCreate(key) defer decr() // If a posting list contains a value, we store that or else we store a nil // byte so that processing is consistent later. vbytes, vtype, err := pl.Value() newValue := &task.Value{ValType: uint32(vtype)} if err == nil { newValue.Val = vbytes } else { newValue.Val = x.Nilbyte } out.Values = append(out.Values, newValue) if q.DoCount { out.Counts = append(out.Counts, uint32(pl.Length(0))) // Add an empty UID list to make later processing consistent out.UidMatrix = append(out.UidMatrix, &emptyUIDList) continue } // The more usual case: Getting the UIDs. opts := posting.ListOptions{ AfterUID: uint64(q.AfterUid), } // If we have srcFunc and Uids, it means its a filter. So we intersect. if useFunc && len(q.Uids) > 0 { opts.Intersect = &task.List{Uids: q.Uids} } out.UidMatrix = append(out.UidMatrix, pl.Uids(opts)) } if (isGeq || isLeq) && len(tokens) > 0 && ineqValueToken == tokens[0] { // Need to evaluate inequality for entries in the first bucket. typ := schema.TypeOf(attr) if typ == nil || !typ.IsScalar() { return nil, x.Errorf("Attribute not scalar: %s %v", attr, typ) } scalarType := typ.(types.Scalar) x.AssertTrue(len(out.UidMatrix) > 0) // Filter the first row of UidMatrix. Since ineqValue != nil, we may // assume that ineqValue is equal to the first token found in TokensTable. algo.ApplyFilter(out.UidMatrix[0], func(uid uint64, i int) bool { key := x.DataKey(attr, uid) sv := getPostingValue(key, scalarType) if sv == nil { return false } if isGeq { return !scalarType.Less(*sv, ineqValue) } return !scalarType.Less(ineqValue, *sv) }) } // If geo filter, do value check for correctness. var values []*task.Value if geoQuery != nil { uids := algo.MergeSorted(out.UidMatrix) for _, uid := range uids.Uids { key := x.DataKey(attr, uid) pl, decr := posting.GetOrCreate(key) vbytes, vtype, err := pl.Value() newValue := &task.Value{ValType: uint32(vtype)} if err == nil { newValue.Val = vbytes } else { newValue.Val = x.Nilbyte } values = append(values, newValue) decr() // Decrement the reference count of the pl. } filtered := geo.FilterUids(uids, values, geoQuery) for i := 0; i < len(out.UidMatrix); i++ { out.UidMatrix[i] = algo.IntersectSorted([]*task.List{out.UidMatrix[i], filtered}) } } out.IntersectDest = intersectDest return &out, nil }
// ProcessGraph processes the SubGraph instance accumulating result for the query // from different instances. Note: taskQuery is nil for root node. func ProcessGraph(ctx context.Context, sg, parent *SubGraph, rch chan error) { var err error if len(sg.Attr) == 0 { // If we have a filter SubGraph which only contains an operator, // it won't have any attribute to work on. // This is to allow providing SrcUIDs to the filter children. sg.DestUIDs = sg.SrcUIDs } else if parent == nil && len(sg.SrcFunc) == 0 { // I am root. I don't have any function to execute, and my // result has been prepared for me already. sg.DestUIDs = algo.MergeSorted(sg.uidMatrix) // Could also be = sg.SrcUIDs } else { taskQuery := createTaskQuery(sg) result, err := worker.ProcessTaskOverNetwork(ctx, taskQuery) if err != nil { x.TraceError(ctx, x.Wrapf(err, "Error while processing task")) rch <- err return } sg.uidMatrix = result.UidMatrix sg.values = result.Values if len(sg.values) > 0 { v := sg.values[0] x.Trace(ctx, "Sample value for attr: %v Val: %v", sg.Attr, string(v.Val)) } sg.counts = result.Counts if sg.Params.DoCount && len(sg.Filters) == 0 { // If there is a filter, we need to do more work to get the actual count. x.Trace(ctx, "Zero uids. Only count requested") rch <- nil return } if result.IntersectDest { sg.DestUIDs = algo.IntersectSorted(result.UidMatrix) } else { sg.DestUIDs = algo.MergeSorted(result.UidMatrix) } } if len(sg.DestUIDs.Uids) == 0 { // Looks like we're done here. Be careful with nil srcUIDs! x.Trace(ctx, "Zero uids for %q. Num attr children: %v", sg.Attr, len(sg.Children)) rch <- nil return } // Apply filters if any. if len(sg.Filters) > 0 { // Run all filters in parallel. filterChan := make(chan error, len(sg.Filters)) for _, filter := range sg.Filters { filter.SrcUIDs = sg.DestUIDs go ProcessGraph(ctx, filter, sg, filterChan) } for _ = range sg.Filters { select { case err = <-filterChan: if err != nil { x.TraceError(ctx, x.Wrapf(err, "Error while processing filter task")) rch <- err return } case <-ctx.Done(): x.TraceError(ctx, x.Wrapf(ctx.Err(), "Context done before full execution")) rch <- ctx.Err() return } } // Now apply the results from filter. var lists []*task.List for _, filter := range sg.Filters { lists = append(lists, filter.DestUIDs) } if sg.FilterOp == "|" { sg.DestUIDs = algo.MergeSorted(lists) } else { sg.DestUIDs = algo.IntersectSorted(lists) } } if len(sg.Params.Order) == 0 { // There is no ordering. Just apply pagination and return. if err = sg.applyPagination(ctx); err != nil { rch <- err return } } else { // We need to sort first before pagination. if err = sg.applyOrderAndPagination(ctx); err != nil { rch <- err return } } // Here we consider handling _count_ with filtering. We do this after // pagination because otherwise, we need to do the count with pagination // taken into account. For example, a PL might have only 50 entries but the // user wants to skip 100 entries and return 10 entries. In this case, you // should return a count of 0, not 10. if sg.Params.DoCount { x.AssertTrue(len(sg.Filters) > 0) sg.counts = make([]uint32, len(sg.uidMatrix)) for i, ul := range sg.uidMatrix { // A possible optimization is to return the size of the intersection // without forming the intersection. algo.IntersectWith(ul, sg.DestUIDs) sg.counts[i] = uint32(len(ul.Uids)) } rch <- nil return } childChan := make(chan error, len(sg.Children)) for i := 0; i < len(sg.Children); i++ { child := sg.Children[i] child.SrcUIDs = sg.DestUIDs // Make the connection. go ProcessGraph(ctx, child, sg, childChan) } // Now get all the results back. for _ = range sg.Children { select { case err = <-childChan: if err != nil { x.TraceError(ctx, x.Wrapf(err, "Error while processing child task")) rch <- err return } case <-ctx.Done(): x.TraceError(ctx, x.Wrapf(ctx.Err(), "Context done before full execution")) rch <- ctx.Err() return } } rch <- nil }