func numComponents(fq *ds.FinalizedQuery) int { numComponents := len(fq.Orders()) if p, _, _ := fq.IneqFilterLow(); p != "" { numComponents++ } if p, _, _ := fq.IneqFilterHigh(); p != "" { numComponents++ } for _, v := range fq.EqFilters() { numComponents += v.Len() } return numComponents }
// adjustQuery applies various mutations to the query to make it suitable for // merging. In general, this removes limits and offsets the 'distinct' modifier, // and it ensures that if there are sort orders which won't appear in the // result data that the query is transformed into a projection query which // contains all of the data. A non-projection query will never be transformed // in this way. func adjustQuery(fq *ds.FinalizedQuery) (*ds.FinalizedQuery, error) { q := fq.Original() // The limit and offset must be done in-memory because otherwise we may // request too few entities from the underlying store if many matching // entities have been deleted in the buffered transaction. q = q.Limit(-1) q = q.Offset(-1) // distinction must be done in-memory, because otherwise there's no way // to merge in the effect of the in-flight changes (because there's no way // to push back to the datastore "yeah, I know you told me that the (1, 2) // result came from `/Bob,1`, but would you mind pretending that it didn't // and tell me next the one instead? q = q.Distinct(false) // since we need to merge results, we must have all order-related fields // in each result. The only time we wouldn't have all the data available would // be for a keys-only or projection query. To fix this, we convert all // Projection and KeysOnly queries to project on /all/ Orders. // // FinalizedQuery already guarantees that all projected fields show up in // the Orders, but the projected fields could be a subset of the orders. // // Additionally on a keys-only query, any orders other than __key__ require // conversion of this query to a projection query including those orders in // order to merge the results correctly. // // In both cases, the resulting objects returned to the higher layers of the // stack will only include the information requested by the user; keys-only // queries will discard all PropertyMap data, and projection queries will // discard any field data that the user didn't ask for. orders := fq.Orders() if len(fq.Project()) > 0 || (fq.KeysOnly() && len(orders) > 1) { q = q.KeysOnly(false) for _, o := range orders { if o.Property == "__key__" { continue } q = q.Project(o.Property) } } return q.Finalize() }
// GetBinaryBounds gets the binary encoding of the upper and lower bounds of // the inequality filter on fq, if any is defined. If a bound does not exist, // it is nil. // // NOTE: if fq specifies a descending sort order for the inequality, the bounds // will be inverted, incremented, and flipped. func GetBinaryBounds(fq *ds.FinalizedQuery) (lower, upper []byte) { // Pick up the start/end range from the inequalities, if any. // // start and end in the reducedQuery are normalized so that `start >= // X < end`. Because of that, we need to tweak the inequality filters // contained in the query if they use the > or <= operators. if ineqProp := fq.IneqFilterProp(); ineqProp != "" { _, startOp, startV := fq.IneqFilterLow() if startOp != "" { lower = serialize.ToBytes(startV) if startOp == ">" { lower = increment(lower) } } _, endOp, endV := fq.IneqFilterHigh() if endOp != "" { upper = serialize.ToBytes(endV) if endOp == "<=" { upper = increment(upper) } } // The inequality is specified in natural (ascending) order in the query's // Filter syntax, but the order information may indicate to use a descending // index column for it. If that's the case, then we must invert, swap and // increment the inequality endpoints. // // Invert so that the desired numbers are represented correctly in the index. // Swap so that our iterators still go from >= start to < end. // Increment so that >= and < get correctly bounded (since the iterator is // still using natrual bytes ordering) if fq.Orders()[0].Descending { hi, lo := []byte(nil), []byte(nil) if len(lower) > 0 { lo = increment(serialize.Invert(lower)) } if len(upper) > 0 { hi = increment(serialize.Invert(upper)) } upper, lower = lo, hi } } return }
func (d rdsImpl) fixQuery(fq *ds.FinalizedQuery) (*datastore.Query, error) { ret := datastore.NewQuery(fq.Kind()) start, end := fq.Bounds() if start != nil { ret = ret.Start(start.(datastore.Cursor)) } if end != nil { ret = ret.End(end.(datastore.Cursor)) } for prop, vals := range fq.EqFilters() { if prop == "__ancestor__" { p, err := dsF2RProp(d.aeCtx, vals[0]) if err != nil { return nil, err } ret = ret.Ancestor(p.Value.(*datastore.Key)) } else { filt := prop + "=" for _, v := range vals { p, err := dsF2RProp(d.aeCtx, v) if err != nil { return nil, err } ret = ret.Filter(filt, p.Value) } } } if lnam, lop, lprop := fq.IneqFilterLow(); lnam != "" { p, err := dsF2RProp(d.aeCtx, lprop) if err != nil { return nil, err } ret = ret.Filter(lnam+" "+lop, p.Value) } if hnam, hop, hprop := fq.IneqFilterHigh(); hnam != "" { p, err := dsF2RProp(d.aeCtx, hprop) if err != nil { return nil, err } ret = ret.Filter(hnam+" "+hop, p.Value) } if fq.EventuallyConsistent() { ret = ret.EventualConsistency() } if fq.KeysOnly() { ret = ret.KeysOnly() } if lim, ok := fq.Limit(); ok { ret = ret.Limit(int(lim)) } if off, ok := fq.Offset(); ok { ret = ret.Offset(int(off)) } for _, o := range fq.Orders() { ret = ret.Order(o.String()) } ret = ret.Project(fq.Project()...) if fq.Distinct() { ret = ret.Distinct() } return ret, nil }
func reduce(fq *ds.FinalizedQuery, aid, ns string, isTxn bool) (*reducedQuery, error) { if err := fq.Valid(aid, ns); err != nil { return nil, err } if isTxn && fq.Ancestor() == nil { return nil, fmt.Errorf("queries within a transaction must include an Ancestor filter") } if num := numComponents(fq); num > MaxQueryComponents { return nil, fmt.Errorf( "gae/memory: query is too large. may not have more than "+ "%d filters + sort orders + ancestor total: had %d", MaxQueryComponents, num) } ret := &reducedQuery{ aid: aid, ns: ns, kind: fq.Kind(), suffixFormat: fq.Orders(), } eqFilts := fq.EqFilters() ret.eqFilters = make(map[string]stringset.Set, len(eqFilts)) for prop, vals := range eqFilts { sVals := stringset.New(len(vals)) for _, v := range vals { sVals.Add(string(serialize.ToBytes(v))) } ret.eqFilters[prop] = sVals } startD, endD := GetBinaryBounds(fq) // Now we check the start and end cursors. // // Cursors are composed of a list of IndexColumns at the beginning, followed // by the raw bytes to use for the suffix. The cursor is only valid if all of // its IndexColumns match our proposed suffixFormat, as calculated above. // // Cursors are mutually exclusive with the start/end we picked up from the // inequality. In a well formed query, they indicate a subset of results // bounded by the inequality. Technically if the start cursor is not >= the // low bound, or the end cursor is < the high bound, it's an error, but for // simplicity we just cap to the narrowest intersection of the inequality and // cursors. ret.start = startD ret.end = endD if start, end := fq.Bounds(); start != nil || end != nil { if start != nil { if c, ok := start.(queryCursor); ok { startCols, startD, err := c.decode() if err != nil { return nil, err } if !sortOrdersEqual(startCols, ret.suffixFormat) { return nil, errors.New("gae/memory: start cursor is invalid for this query") } if ret.start == nil || bytes.Compare(ret.start, startD) < 0 { ret.start = startD } } else { return nil, errors.New("gae/memory: bad cursor type") } } if end != nil { if c, ok := end.(queryCursor); ok { endCols, endD, err := c.decode() if err != nil { return nil, err } if !sortOrdersEqual(endCols, ret.suffixFormat) { return nil, errors.New("gae/memory: end cursor is invalid for this query") } if ret.end == nil || bytes.Compare(endD, ret.end) < 0 { ret.end = endD } } else { return nil, errors.New("gae/memory: bad cursor type") } } } // Finally, verify that we could even /potentially/ do work. If we have // overlapping range ends, then we don't have anything to do. if ret.end != nil && bytes.Compare(ret.start, ret.end) >= 0 { return nil, ds.ErrNullQuery } ret.numCols = len(ret.suffixFormat) for prop, vals := range ret.eqFilters { if len(ret.suffixFormat) == 1 && prop == "__ancestor__" { continue } ret.numCols += vals.Len() } return ret, nil }
// runMergedQueries executes a user query `fq` against the parent datastore as // well as the in-memory datastore, calling `cb` with the merged result set. // // It's expected that the caller of this function will apply limit and offset // if the query contains those restrictions. This may convert the query to // an expanded projection query with more data than the user asked for. It's the // caller's responsibility to prune away the extra data. // // See also `dsTxnBuf.Run()`. func runMergedQueries(fq *ds.FinalizedQuery, sizes *sizeTracker, memDS, parentDS ds.RawInterface, cb func(k *ds.Key, data ds.PropertyMap) error) error { toRun, err := adjustQuery(fq) if err != nil { return err } cmpLower, cmpUpper := memory.GetBinaryBounds(fq) cmpOrder := fq.Orders() cmpFn := func(i *item) string { return i.getCmpRow(cmpLower, cmpUpper, cmpOrder) } dedup := stringset.Set(nil) distinct := stringset.Set(nil) distinctOrder := []ds.IndexColumn(nil) if len(fq.Project()) > 0 { // the original query was a projection query if fq.Distinct() { // it was a distinct projection query, so we need to dedup by distinct // options. distinct = stringset.New(0) proj := fq.Project() distinctOrder = make([]ds.IndexColumn, len(proj)) for i, p := range proj { distinctOrder[i].Property = p } } } else { // the original was a normal or keys-only query, so we need to dedup by keys. dedup = stringset.New(0) } stopChan := make(chan struct{}) parIter := queryToIter(stopChan, toRun, parentDS) memIter := queryToIter(stopChan, toRun, memDS) parItemGet := func() (*item, error) { for { itm, err := parIter() if itm == nil || err != nil { return nil, err } encKey := itm.getEncKey() if sizes.has(encKey) || (dedup != nil && dedup.Has(encKey)) { continue } return itm, nil } } memItemGet := func() (*item, error) { for { itm, err := memIter() if itm == nil || err != nil { return nil, err } if dedup != nil && dedup.Has(itm.getEncKey()) { continue } return itm, nil } } defer func() { close(stopChan) parItemGet() memItemGet() }() pitm, err := parItemGet() if err != nil { return err } mitm, err := memItemGet() if err != nil { return err } for { // the err can be set during the loop below. If we come around the bend and // it's set, then we need to return it. We don't check it immediately // because it's set after we already have a good result to return to the // user. if err != nil { return err } usePitm := pitm != nil if pitm != nil && mitm != nil { usePitm = cmpFn(pitm) < cmpFn(mitm) } else if pitm == nil && mitm == nil { break } toUse := (*item)(nil) // we check the error at the beginning of the loop. if usePitm { toUse = pitm pitm, err = parItemGet() } else { toUse = mitm mitm, err = memItemGet() } if dedup != nil { if !dedup.Add(toUse.getEncKey()) { continue } } if distinct != nil { // NOTE: We know that toUse will not be used after this point for // comparison purposes, so re-use its cmpRow property for our distinct // filter here. toUse.cmpRow = "" if !distinct.Add(toUse.getCmpRow(nil, nil, distinctOrder)) { continue } } if err := cb(toUse.key, toUse.data); err != nil { if err == ds.Stop { return nil } return err } } return nil }