Exemplo n.º 1
0
func avgVar(val [][]float64) float64 {
	vars := make([]float64, len(val))
	for i := 0; i < len(val); i++ {
		vars[i] = govector.Vector(val[i]).Variance()
	}
	return meanSlice(vars)
}
Exemplo n.º 2
0
func maxSlice(x []float64) float64 {
	return govector.Vector(x).Max()
}
Exemplo n.º 3
0
func minSlice(x []float64) float64 {
	return govector.Vector(x).Min()
}
Exemplo n.º 4
0
func meanSlice(x []float64) float64 {
	return govector.Vector(x).Mean()
}
Exemplo n.º 5
0
// JackKnife estimates the variance of the predicted values from the RandomForest
// using an infinitesimal jackknife estimator for the variance, given the
// in-bag samples used by the RandomForest, and the predicted values from each tree.
//
// The underlying algorithm is described in detail in the paper:
// "Confidence Intervals for Random Forests: The JackKnife and the Infinitesimal JackKnife"
//  by Wager S., et al. http://arxiv.org/pdf/1311.4555.pdf
//
// The complete R implementation is available here: https://github.com/swager/randomForestCI
//
func JackKnife(predictionSlice, inbag [][]float64) ([]*Prediction, error) {
	if len(predictionSlice) == 0 || len(inbag) == 0 {
		return nil, fmt.Errorf("prediction and inbag size must be equal")
	}

	var (
		m    = len(predictionSlice)
		n    = len(inbag)
		B    = float64(len(predictionSlice[0]))
		B2   = B * B
		nvar = avgVar(inbag)
	)

	var (
		avgPreds = make([]float64, m)
		sums     = make([]float64, m)
		output   = make([]*Prediction, m)
		errs     = make(chan error, m)
		wg       sync.WaitGroup
	)

	// normalize the prediction slices
	for i, predictions := range predictionSlice {
		wg.Add(1)
		go func(idx int, p []float64) {
			defer wg.Done()

			preds := govector.Vector(p)
			avgPred := preds.Mean()
			predictionSlice[idx] = preds.Apply(func(f float64) float64 { return f - avgPred })
			avgPreds[idx] = avgPred
		}(i, predictions)
	}
	wg.Wait()

	// calculate the raw infinitesimal jackknife values
	// V_{i, j} = \sum_{i, n} Cov( inbag[i], pred ) ^ 2
	for i, preds := range predictionSlice {
		wg.Add(1)
		go func(idx int, p []float64) {
			defer wg.Done()
			sum := 0.0
			for i := 0; i < n; i++ {
				val, err := govector.DotProduct(govector.Vector(inbag[i]), p)
				if err != nil {
					errs <- err
					return
				}

				val *= val
				val /= B2
				sum += val
			}
			sums[idx] = sum
		}(i, preds)
	}
	wg.Wait()

	// check for errors
	if len(errs) > 0 {
		return nil, <-errs
	}

	// normalize the IJ value with Monte-Carlo bias correction
	for i, preds := range predictionSlice {
		wg.Add(1)
		go func(idx int, p []float64) {
			defer wg.Done()
			bv := 0.0
			for i := 0; i < len(p); i++ {
				bv += p[i] * p[i]
			}

			variance := sums[idx] - float64(n)*nvar*(bv/B)/B

			output[idx] = &Prediction{
				Value:    avgPreds[idx],
				Variance: variance,
			}
		}(i, preds)
	}
	wg.Wait()

	return output, nil
}