func inExpr(target interface{}, list ...interface{}) *tipb.Expr { targetDatum := types.NewDatum(target) var listDatums []types.Datum for _, v := range list { listDatums = append(listDatums, types.NewDatum(v)) } types.SortDatums(listDatums) targetExpr := datumExpr(targetDatum) val, _ := codec.EncodeValue(nil, listDatums...) listExpr := &tipb.Expr{Tp: tipb.ExprType_ValueList, Val: val} return &tipb.Expr{Tp: tipb.ExprType_In, Children: []*tipb.Expr{targetExpr, listExpr}} }
// buildColumn builds column statistics from samples. func (t *Table) buildColumn(offset int, samples []types.Datum, bucketCount int64) error { err := types.SortDatums(samples) if err != nil { return errors.Trace(err) } estimatedNDV, err := estimateNDV(t.Count, samples) if err != nil { return errors.Trace(err) } ci := t.info.Columns[offset] col := &Column{ ID: ci.ID, NDV: estimatedNDV, Numbers: make([]int64, 1, bucketCount), Values: make([]types.Datum, 1, bucketCount), Repeats: make([]int64, 1, bucketCount), } valuesPerBucket := t.Count/bucketCount + 1 // As we use samples to build the histogram, the bucket number and repeat should multiply a factor. sampleFactor := t.Count / int64(len(samples)) bucketIdx := 0 var lastNumber int64 for i := int64(0); i < int64(len(samples)); i++ { cmp, err := col.Values[bucketIdx].CompareDatum(samples[i]) if err != nil { return errors.Trace(err) } if cmp == 0 { // The new item has the same value as current bucket value, to ensure that // a same value only stored in a single bucket, we do not increase bucketIdx even if it exceeds // valuesPerBucket. col.Numbers[bucketIdx] = i * sampleFactor col.Repeats[bucketIdx] += sampleFactor } else if i*sampleFactor-lastNumber <= valuesPerBucket { // The bucket still have room to store a new item, update the bucket. col.Numbers[bucketIdx] = i * sampleFactor col.Values[bucketIdx] = samples[i] col.Repeats[bucketIdx] = 0 } else { // The bucket is full, store the item in the next bucket. lastNumber = col.Numbers[bucketIdx] bucketIdx++ col.Numbers = append(col.Numbers, i*sampleFactor) col.Values = append(col.Values, samples[i]) col.Repeats = append(col.Repeats, 0) } } t.Columns[offset] = col return nil }
func (s *testStatisticsSuite) SetUpSuite(c *C) { s.count = 100000 samples := make([]types.Datum, 10000) start := 1000 // 1000 values is null for i := start; i < len(samples); i++ { samples[i].SetInt64(int64(i)) } for i := start; i < len(samples); i += 3 { samples[i].SetInt64(samples[i].GetInt64() + 1) } for i := start; i < len(samples); i += 5 { samples[i].SetInt64(samples[i].GetInt64() + 2) } err := types.SortDatums(samples) c.Check(err, IsNil) s.samples = samples }
func (b *executorBuilder) datumsToValueList(datums []types.Datum) *tipb.Expr { // Don't push value list that has different datum kind. prevKind := types.KindNull for _, d := range datums { if prevKind == types.KindNull { prevKind = d.Kind() } if !d.IsNull() && d.Kind() != prevKind { return nil } } err := types.SortDatums(datums) if err != nil { b.err = errors.Trace(err) return nil } val, err := codec.EncodeValue(nil, datums...) if err != nil { b.err = errors.Trace(err) return nil } return &tipb.Expr{Tp: tipb.ExprType_ValueList.Enum(), Val: val} }
func (pc pbConverter) datumsToValueList(datums []types.Datum) *tipb.Expr { // Don't push value list that has different datum kind. prevKind := types.KindNull for _, d := range datums { if prevKind == types.KindNull { prevKind = d.Kind() } if !d.IsNull() && d.Kind() != prevKind { return nil } } err := types.SortDatums(pc.sc, datums) if err != nil { log.Error(err.Error()) return nil } val, err := codec.EncodeValue(nil, datums...) if err != nil { log.Error(err.Error()) return nil } return &tipb.Expr{Tp: tipb.ExprType_ValueList, Val: val} }