func TestCompareSelectivityWithStatsAndQuantiles(t *testing.T) { sixtyValue := 60.0 numberSixty := NewLiteralNumber(sixtyValue) fortyValue := 40.0 numberForty := NewLiteralNumber(fortyValue) abvStats := stats.DefaultPathStats(0, 100) abvStats.Rows = 50 abvStats.DistinctValues = 10 abvStats.MostFrequentValues.Consider(sixtyValue, 6) abvStats.Quantiles = []stats.QuantileRange{ stats.QuantileRange{Start: 0.0, End: 20.0, Count: 10}, stats.QuantileRange{Start: 21.0, End: 40.0, Count: 10}, stats.QuantileRange{Start: 41.0, End: 60.0, Count: 10}, stats.QuantileRange{Start: 61.0, End: 80.0, Count: 10}, stats.QuantileRange{Start: 81.0, End: 100.0, Count: 10}, } pathStatistics := map[string]stats.PathStatistics{ "doc.abv": abvStats, } tests := []struct { input BooleanExpression output interface{} }{ // when value is in freq vals, we should get precise estimate {NewEqualToOperator(NewProperty("doc.abv"), numberSixty), 6.0 / 50.0}, {NewNotEqualToOperator(NewProperty("doc.abv"), numberSixty), 44.0 / 50.0}, // this time it should fall back to the quantiles {NewEqualToOperator(NewProperty("doc.abv"), numberForty), 10.0 / 50.0}, {NewNotEqualToOperator(NewProperty("doc.abv"), numberForty), 40.0 / 50.0}, // now test the ranges {NewLessThanOperator(NewProperty("doc.abv"), numberSixty), 30.0 / 50.0}, {NewGreaterThanOperator(NewProperty("doc.abv"), numberSixty), 30.0 / 50.0}, // note these two find exact values for the equals portion {NewLessThanOrEqualOperator(NewProperty("doc.abv"), numberSixty), 36.0 / 50.0}, {NewGreaterThanOrEqualOperator(NewProperty("doc.abv"), numberSixty), 36.0 / 50.0}, // these two should not {NewLessThanOrEqualOperator(NewProperty("doc.abv"), numberForty), 30.0 / 50.0}, {NewGreaterThanOrEqualOperator(NewProperty("doc.abv"), numberForty), 50.0 / 50.0}, } for _, x := range tests { result := x.input.GetSelectivity(pathStatistics) if !reflect.DeepEqual(result, x.output) { t.Errorf("Expected %v for %v, got %v", x.output, x.input, result) } } }
func TestCompareSelectivityWithStatsNoQuantiles(t *testing.T) { sixtyValue := 60.0 numberSixty := NewLiteralNumber(sixtyValue) fortyValue := 40.0 numberForty := NewLiteralNumber(fortyValue) abvStats := stats.DefaultPathStats(0, 100) abvStats.Rows = 50 abvStats.DistinctValues = 10 abvStats.MostFrequentValues.Consider(sixtyValue, 6) pathStatistics := map[string]stats.PathStatistics{ "doc.abv": abvStats, } tests := []struct { input BooleanExpression output interface{} }{ // when value is in freq vals, we should get precise estimate {NewEqualToOperator(NewProperty("doc.abv"), numberSixty), 6.0 / 50.0}, {NewNotEqualToOperator(NewProperty("doc.abv"), numberSixty), 44.0 / 50.0}, // if value is not in freq vals, and we dont have quantiles // it should fall back to defaults {NewEqualToOperator(NewProperty("doc.abv"), numberForty), 1.0 / 10.0}, {NewNotEqualToOperator(NewProperty("doc.abv"), numberForty), 9.0 / 10.0}, } for _, x := range tests { result := x.input.GetSelectivity(pathStatistics) if !reflect.DeepEqual(result, x.output) { t.Errorf("Expected %v for %v, got %v", x.output, x.input, result) } } }
func (this *CouchbaseViewAccessPath) UpdateStats() { log.Printf("Starting UpdateStats for %v", this) // only try to address single column stats here if len(this.keys) == 1 { pathStat := stats.DefaultPathStats(MIN_KEY, MAX_KEY) vres, err := this.dataSource.bucket.View(this.ddoc, this.view, map[string]interface{}{"reduce": false, "limit": 0}) if err != nil { log.Printf("Unable to determine cardinality of view, defaulting to MAX") } else { pathStat.Rows = vres.TotalRows pathStat.DistinctValues = vres.TotalRows } // try to gather deeper stats targetCountPerQuantile := pathStat.Rows / pathStat.NumQuantiles() options := map[string]interface{}{"group_level": 1} viewRowsChannel := make(chan couchbase.ViewRow) go WalkViewInBatches(viewRowsChannel, this.dataSource.bucket, this.ddoc, this.view, options, BATCH_SIZE) distinctRows := 0 currentQuantile := stats.QuantileRange{} runningCount := 0 numQuantilesBuilt := 0 for row := range viewRowsChannel { if distinctRows == 0 { pathStat.MinValue = row.Key } if currentQuantile.Count == 0 { currentQuantile.Start = row.Key } pathStat.MaxValue = row.Key currentQuantile.End = row.Key distinctRows++ // expect result to be _stats reduce switch stats_reduce := row.Value.(type) { case map[string]interface{}: switch stats_count := stats_reduce["count"].(type) { case float64: pathStat.MostFrequentValues.Consider(row.Key, stats_count) currentQuantile.Count = currentQuantile.Count + int(stats_count) runningCount = runningCount + int(stats_count) } } if currentQuantile.Count > targetCountPerQuantile { //close out the quantile pathStat.Quantiles = append(pathStat.Quantiles, currentQuantile) numQuantilesBuilt = numQuantilesBuilt + 1 // update the target counts (we may have overshot because of a large bin) targetCountPerQuantile = (pathStat.Rows - runningCount) / (pathStat.NumQuantiles() - numQuantilesBuilt) //empty out a new quantile currentQuantile = stats.QuantileRange{} } } // close out the last quantile pathStat.Quantiles = append(pathStat.Quantiles, currentQuantile) numQuantilesBuilt = numQuantilesBuilt + 1 pathStat.DistinctValues = distinctRows this.dataSource.pathStats[this.keys[0]] = pathStat log.Printf("%v", pathStat) } log.Printf("Finished UpdateStats for %v", this) }