func avgVar(val [][]float64) float64 { vars := make([]float64, len(val)) for i := 0; i < len(val); i++ { vars[i] = govector.Vector(val[i]).Variance() } return meanSlice(vars) }
func maxSlice(x []float64) float64 { return govector.Vector(x).Max() }
func minSlice(x []float64) float64 { return govector.Vector(x).Min() }
func meanSlice(x []float64) float64 { return govector.Vector(x).Mean() }
// JackKnife estimates the variance of the predicted values from the RandomForest // using an infinitesimal jackknife estimator for the variance, given the // in-bag samples used by the RandomForest, and the predicted values from each tree. // // The underlying algorithm is described in detail in the paper: // "Confidence Intervals for Random Forests: The JackKnife and the Infinitesimal JackKnife" // by Wager S., et al. http://arxiv.org/pdf/1311.4555.pdf // // The complete R implementation is available here: https://github.com/swager/randomForestCI // func JackKnife(predictionSlice, inbag [][]float64) ([]*Prediction, error) { if len(predictionSlice) == 0 || len(inbag) == 0 { return nil, fmt.Errorf("prediction and inbag size must be equal") } var ( m = len(predictionSlice) n = len(inbag) B = float64(len(predictionSlice[0])) B2 = B * B nvar = avgVar(inbag) ) var ( avgPreds = make([]float64, m) sums = make([]float64, m) output = make([]*Prediction, m) errs = make(chan error, m) wg sync.WaitGroup ) // normalize the prediction slices for i, predictions := range predictionSlice { wg.Add(1) go func(idx int, p []float64) { defer wg.Done() preds := govector.Vector(p) avgPred := preds.Mean() predictionSlice[idx] = preds.Apply(func(f float64) float64 { return f - avgPred }) avgPreds[idx] = avgPred }(i, predictions) } wg.Wait() // calculate the raw infinitesimal jackknife values // V_{i, j} = \sum_{i, n} Cov( inbag[i], pred ) ^ 2 for i, preds := range predictionSlice { wg.Add(1) go func(idx int, p []float64) { defer wg.Done() sum := 0.0 for i := 0; i < n; i++ { val, err := govector.DotProduct(govector.Vector(inbag[i]), p) if err != nil { errs <- err return } val *= val val /= B2 sum += val } sums[idx] = sum }(i, preds) } wg.Wait() // check for errors if len(errs) > 0 { return nil, <-errs } // normalize the IJ value with Monte-Carlo bias correction for i, preds := range predictionSlice { wg.Add(1) go func(idx int, p []float64) { defer wg.Done() bv := 0.0 for i := 0; i < len(p); i++ { bv += p[i] * p[i] } variance := sums[idx] - float64(n)*nvar*(bv/B)/B output[idx] = &Prediction{ Value: avgPreds[idx], Variance: variance, } }(i, preds) } wg.Wait() return output, nil }