Ejemplo n.º 1
0
func TestCompareSelectivityWithStatsAndQuantiles(t *testing.T) {

	sixtyValue := 60.0
	numberSixty := NewLiteralNumber(sixtyValue)
	fortyValue := 40.0
	numberForty := NewLiteralNumber(fortyValue)

	abvStats := stats.DefaultPathStats(0, 100)
	abvStats.Rows = 50
	abvStats.DistinctValues = 10
	abvStats.MostFrequentValues.Consider(sixtyValue, 6)
	abvStats.Quantiles = []stats.QuantileRange{
		stats.QuantileRange{Start: 0.0, End: 20.0, Count: 10},
		stats.QuantileRange{Start: 21.0, End: 40.0, Count: 10},
		stats.QuantileRange{Start: 41.0, End: 60.0, Count: 10},
		stats.QuantileRange{Start: 61.0, End: 80.0, Count: 10},
		stats.QuantileRange{Start: 81.0, End: 100.0, Count: 10},
	}

	pathStatistics := map[string]stats.PathStatistics{
		"doc.abv": abvStats,
	}

	tests := []struct {
		input  BooleanExpression
		output interface{}
	}{
		// when value is in freq vals, we should get precise estimate
		{NewEqualToOperator(NewProperty("doc.abv"), numberSixty), 6.0 / 50.0},
		{NewNotEqualToOperator(NewProperty("doc.abv"), numberSixty), 44.0 / 50.0},
		// this time it should fall back to the quantiles
		{NewEqualToOperator(NewProperty("doc.abv"), numberForty), 10.0 / 50.0},
		{NewNotEqualToOperator(NewProperty("doc.abv"), numberForty), 40.0 / 50.0},
		// now test the ranges
		{NewLessThanOperator(NewProperty("doc.abv"), numberSixty), 30.0 / 50.0},
		{NewGreaterThanOperator(NewProperty("doc.abv"), numberSixty), 30.0 / 50.0},
		// note these two find exact values for the equals portion
		{NewLessThanOrEqualOperator(NewProperty("doc.abv"), numberSixty), 36.0 / 50.0},
		{NewGreaterThanOrEqualOperator(NewProperty("doc.abv"), numberSixty), 36.0 / 50.0},
		// these two should not
		{NewLessThanOrEqualOperator(NewProperty("doc.abv"), numberForty), 30.0 / 50.0},
		{NewGreaterThanOrEqualOperator(NewProperty("doc.abv"), numberForty), 50.0 / 50.0},
	}

	for _, x := range tests {
		result := x.input.GetSelectivity(pathStatistics)
		if !reflect.DeepEqual(result, x.output) {
			t.Errorf("Expected %v for %v, got %v", x.output, x.input, result)
		}
	}
}
Ejemplo n.º 2
0
func TestCompareSelectivityWithStatsNoQuantiles(t *testing.T) {

	sixtyValue := 60.0
	numberSixty := NewLiteralNumber(sixtyValue)
	fortyValue := 40.0
	numberForty := NewLiteralNumber(fortyValue)

	abvStats := stats.DefaultPathStats(0, 100)
	abvStats.Rows = 50
	abvStats.DistinctValues = 10
	abvStats.MostFrequentValues.Consider(sixtyValue, 6)

	pathStatistics := map[string]stats.PathStatistics{
		"doc.abv": abvStats,
	}

	tests := []struct {
		input  BooleanExpression
		output interface{}
	}{
		// when value is in freq vals, we should get precise estimate
		{NewEqualToOperator(NewProperty("doc.abv"), numberSixty), 6.0 / 50.0},
		{NewNotEqualToOperator(NewProperty("doc.abv"), numberSixty), 44.0 / 50.0},
		// if value is not in freq vals, and we dont have quantiles
		// it should fall back to defaults
		{NewEqualToOperator(NewProperty("doc.abv"), numberForty), 1.0 / 10.0},
		{NewNotEqualToOperator(NewProperty("doc.abv"), numberForty), 9.0 / 10.0},
	}

	for _, x := range tests {
		result := x.input.GetSelectivity(pathStatistics)
		if !reflect.DeepEqual(result, x.output) {
			t.Errorf("Expected %v for %v, got %v", x.output, x.input, result)
		}
	}
}
Ejemplo n.º 3
0
func (this *CouchbaseViewAccessPath) UpdateStats() {

	log.Printf("Starting UpdateStats for %v", this)

	// only try to address single column stats here
	if len(this.keys) == 1 {

		pathStat := stats.DefaultPathStats(MIN_KEY, MAX_KEY)

		vres, err := this.dataSource.bucket.View(this.ddoc, this.view, map[string]interface{}{"reduce": false, "limit": 0})
		if err != nil {
			log.Printf("Unable to determine cardinality of view, defaulting to MAX")
		} else {
			pathStat.Rows = vres.TotalRows
			pathStat.DistinctValues = vres.TotalRows
		}

		// try to gather deeper stats
		targetCountPerQuantile := pathStat.Rows / pathStat.NumQuantiles()
		options := map[string]interface{}{"group_level": 1}
		viewRowsChannel := make(chan couchbase.ViewRow)
		go WalkViewInBatches(viewRowsChannel, this.dataSource.bucket, this.ddoc, this.view, options, BATCH_SIZE)
		distinctRows := 0
		currentQuantile := stats.QuantileRange{}
		runningCount := 0
		numQuantilesBuilt := 0
		for row := range viewRowsChannel {
			if distinctRows == 0 {
				pathStat.MinValue = row.Key
			}
			if currentQuantile.Count == 0 {
				currentQuantile.Start = row.Key
			}
			pathStat.MaxValue = row.Key
			currentQuantile.End = row.Key
			distinctRows++
			// expect result to be _stats reduce
			switch stats_reduce := row.Value.(type) {
			case map[string]interface{}:
				switch stats_count := stats_reduce["count"].(type) {
				case float64:
					pathStat.MostFrequentValues.Consider(row.Key, stats_count)
					currentQuantile.Count = currentQuantile.Count + int(stats_count)
					runningCount = runningCount + int(stats_count)
				}
			}

			if currentQuantile.Count > targetCountPerQuantile {
				//close out the quantile
				pathStat.Quantiles = append(pathStat.Quantiles, currentQuantile)
				numQuantilesBuilt = numQuantilesBuilt + 1
				// update the target counts (we may have overshot because of a large bin)
				targetCountPerQuantile = (pathStat.Rows - runningCount) / (pathStat.NumQuantiles() - numQuantilesBuilt)
				//empty out a new quantile
				currentQuantile = stats.QuantileRange{}
			}
		}
		// close out the last quantile
		pathStat.Quantiles = append(pathStat.Quantiles, currentQuantile)
		numQuantilesBuilt = numQuantilesBuilt + 1
		pathStat.DistinctValues = distinctRows

		this.dataSource.pathStats[this.keys[0]] = pathStat
		log.Printf("%v", pathStat)
	}

	log.Printf("Finished UpdateStats for %v", this)

}