func TestCancer(t *testing.T) { X, y := datasets.Load("cancer") datasets.RandomShuffle(X, y) XTrain, XTest := X[:67], X[67:] yTrain, yTest := y[:67], y[67:] beta := LinearRegression(XTrain, yTrain) // validate on held out data yPred := matrix.VecMult(XTest, beta) fmt.Println("cancer error", metrics.MeanSquaredError(yPred, yTest)) }
func testDataset(X [][]float64, y []float64) float64 { runtime.GOMAXPROCS(runtime.NumCPU()) datasets.RandomShuffle(X, y) kFolds := 5 foldSize := int(float64(len(X)) / float64(kFolds)) foldSum := 0.0 for k := 0; k < kFolds; k++ { start := k * foldSize for j := 0; j < foldSize; j++ { X[j], X[start+j] = X[start+j], X[j] y[j], y[start+j] = y[start+j], y[j] } tree, _ := DecisionTree(1, GINI) tree.Fit(X[foldSize:], y[foldSize:]) yPred := tree.Classify(X[:foldSize]) foldSum += metrics.Accuracy(yPred, y[:foldSize]) } return foldSum / float64(kFolds) }