func SetupCostGradFunc(n *NN, data []nn.TrainingExample) minimize.CostGradientFunc {

	f := func(t *matrix.Matrix) (float64, *matrix.Matrix) {

		thetas := ReshapeParams(t, n.LayerSizes)

		cost := nn.CostFunction(data, thetas, n.Lambda)
		gradients := nn.BackProp(thetas, data, n.Lambda)

		return cost, UnrollParams(gradients)
	}

	return f
}
func TrainNN(n *NN, data []nn.TrainingExample, iter int) {
	f := SetupCostGradFunc(n, data)
	fmt.Println("Cost before:", nn.CostFunction(data, n.Thetas, n.Lambda))
	n.Thetas = ReshapeParams(minimize.Fmincg(f, UnrollParams(n.Thetas), iter, true), n.LayerSizes)
	fmt.Println("Cost after:", nn.CostFunction(data, n.Thetas, n.Lambda))
}