// Creates the features from the inputs. Features must be nSamples x nFeatures or nil func FeaturizeTrainable(t Trainable, inputs common.RowMatrix, featurizedInputs *mat64.Dense) *mat64.Dense { nSamples, nDim := inputs.Dims() if featurizedInputs == nil { nFeatures := t.NumFeatures() featurizedInputs = mat64.NewDense(nSamples, nFeatures, nil) } rowViewer, isRowViewer := inputs.(mat64.RawRowViewer) var f func(start, end int) if isRowViewer { f = func(start, end int) { featurizer := t.NewFeaturizer() for i := start; i < end; i++ { featurizer.Featurize(rowViewer.RawRowView(i), featurizedInputs.RawRowView(i)) } } } else { f = func(start, end int) { featurizer := t.NewFeaturizer() input := make([]float64, nDim) for i := start; i < end; i++ { inputs.Row(input, i) featurizer.Featurize(input, featurizedInputs.RawRowView(i)) } } } common.ParallelFor(nSamples, common.GetGrainSize(nSamples, minGrain, maxGrain), f) return featurizedInputs }
func (k *KMeans) Train(inputs *common.RowMatrix) { nSamples, inputDim := inputs.Dims() centroids := make([][]float64, k.Clusters) // Assign the centroids to random data point to start perm := rand.Perm(nSamples) for i := 0; i < k.Clusters; i++ { data := make([]float64, inputDim) idx := perm[i] inputs.Row(data, idx) centroids[i] = data } distances := mat64.NewDense(nSamples, k.Clusters, nil) row := make([]float64, inputDim) for i := 0; i < nSamples; i++ { inputs.Row(row, i) for j := 0; j < k.Clusters; j++ { d := floats.Distance(row, centroids[j]) distances.Set(i, j, d) } } }
// TestLinearsolveAndDeriv compares the optimal weights found from gradient-based optimization with those found // from computing a linear solve func TestLinearsolveAndDeriv(t *testing.T, linear train.LinearTrainable, inputs, trueOutputs common.RowMatrix, name string) { // Compare with no weights rows, cols := trueOutputs.Dims() predOutLinear := mat64.NewDense(rows, cols, nil) parametersLinearSolve := train.LinearSolve(linear, nil, inputs, trueOutputs, nil, nil) linear.SetParameters(parametersLinearSolve) linear.Predictor().PredictBatch(inputs, predOutLinear) //fmt.Println("Pred out linear", predOutLinear) linear.RandomizeParameters() parameters := linear.Parameters(nil) batch := train.NewBatchGradBased(linear, true, inputs, trueOutputs, nil, loss.SquaredDistance{}, regularize.None{}) problem := batch settings := multivariate.DefaultSettings() settings.GradAbsTol = 1e-11 //settings. = 0 result, err := multivariate.OptimizeGrad(problem, parameters, settings, nil) if err != nil { t.Errorf("Error training: %v", err) } parametersDeriv := result.Loc deriv := make([]float64, linear.NumParameters()) loss1 := batch.ObjGrad(parametersDeriv, deriv) linear.SetParameters(parametersDeriv) predOutDeriv := mat64.NewDense(rows, cols, nil) linear.Predictor().PredictBatch(inputs, predOutDeriv) linear.RandomizeParameters() init2 := linear.Parameters(nil) batch2 := train.NewBatchGradBased(linear, true, inputs, trueOutputs, nil, loss.SquaredDistance{}, regularize.None{}) problem2 := batch2 result2, err := multivariate.OptimizeGrad(problem2, init2, settings, nil) parametersDeriv2 := result2.Loc //fmt.Println("starting deriv2 loss") deriv2 := make([]float64, linear.NumParameters()) loss2 := batch2.ObjGrad(parametersDeriv2, deriv2) //fmt.Println("starting derivlin loss") derivlinear := make([]float64, linear.NumParameters()) lossLin := batch2.ObjGrad(parametersLinearSolve, derivlinear) _ = loss1 _ = loss2 _ = lossLin /* fmt.Println("param deriv 1 =", parametersDeriv) fmt.Println("param deriv2 =", parametersDeriv2) fmt.Println("linear params =", parametersLinearSolve) fmt.Println("deriv1 loss =", loss1) fmt.Println("deriv2 loss =", loss2) fmt.Println("lin loss =", lossLin) fmt.Println("deriv =", deriv) fmt.Println("deriv2 =", deriv2) fmt.Println("linderiv =", derivlinear) //fmt.Println("Pred out deriv", predOutDeriv) */ /* for i := 0; i < rows; i++ { fmt.Println(predOutLinear.RowView(i), predOutBatch.RowView(i)) } */ if !floats.EqualApprox(parametersLinearSolve, parametersDeriv, 1e-8) { t.Errorf("Parameters don't match for gradient based and linear solve.") //for i := range parametersDeriv { // fmt.Printf("index %v: Deriv = %v, linsolve = %v, diff = %v\n", i, parametersDeriv[i], parametersLinearSolve[i], parametersDeriv[i]-parametersLinearSolve[i]) //} } }
// TestPredict tests that predict returns the expected value, and that calling predict in parallel // also works func TestPredictAndBatch(t *testing.T, p Predictor, inputs, trueOutputs common.RowMatrix, name string) { nSamples, inputDim := inputs.Dims() if inputDim != p.InputDim() { panic("input Dim doesn't match predictor input dim") } nOutSamples, outputDim := trueOutputs.Dims() if outputDim != p.OutputDim() { panic("outpuDim doesn't match predictor outputDim") } if nOutSamples != nSamples { panic("inputs and outputs have different number of rows") } // First, test sequentially for i := 0; i < nSamples; i++ { trueOut := make([]float64, outputDim) for j := 0; j < outputDim; j++ { trueOut[j] = trueOutputs.At(i, j) } // Predict with nil input := make([]float64, inputDim) inputCpy := make([]float64, inputDim) for j := 0; j < inputDim; j++ { input[j] = inputs.At(i, j) inputCpy[j] = inputs.At(i, j) } out1, err := p.Predict(input, nil) if err != nil { t.Errorf(name + ": Error predicting with nil output") return } if !floats.Equal(input, inputCpy) { t.Errorf("%v: input changed with nil input for row %v", name, i) break } out2 := make([]float64, outputDim) for j := 0; j < outputDim; j++ { out2[j] = rand.NormFloat64() } _, err = p.Predict(input, out2) if err != nil { t.Errorf("%v: error predicting with non-nil input for row %v", name, i) break } if !floats.Equal(input, inputCpy) { t.Errorf("%v: input changed with non-nil input for row %v", name, i) break } if !floats.Equal(out1, out2) { t.Errorf(name + ": different answers with nil and non-nil predict ") break } if !floats.EqualApprox(out1, trueOut, 1e-14) { t.Errorf("%v: predicted output doesn't match for row %v. Expected %v, found %v", name, i, trueOut, out1) break } } // Check that predict errors with bad sized arguments badOuput := make([]float64, outputDim+1) input := make([]float64, inputDim) for i := 0; i < inputDim; i++ { input[i] = inputs.At(0, i) } output := make([]float64, outputDim) for i := 0; i < outputDim; i++ { output[i] = trueOutputs.At(0, i) } _, err := p.Predict(input, badOuput) if err == nil { t.Errorf("Predict did not throw an error with an output too large") } if outputDim > 1 { badOuput := make([]float64, outputDim-1) _, err := p.Predict(input, badOuput) if err == nil { t.Errorf("Predict did not throw an error with an output too small") } } badInput := make([]float64, inputDim+1) _, err = p.Predict(badInput, output) if err == nil { t.Errorf("Predict did not err when input is too large") } if inputDim > 1 { badInput := make([]float64, inputDim-1) _, err = p.Predict(badInput, output) if err == nil { t.Errorf("Predict did not err when input is too small") } } // Now, test batch // With non-nil inputCpy := &mat64.Dense{} inputCpy.Clone(inputs) predOutput, err := p.PredictBatch(inputs, nil) if err != nil { t.Errorf("Error batch predicting: %v", err) } if !inputCpy.Equals(inputs) { t.Errorf("Inputs changed during call to PredictBatch") } predOutputRows, predOutputCols := predOutput.Dims() if predOutputRows != nSamples || predOutputCols != outputDim { t.Errorf("Dimension mismatch after predictbatch with nil input") } outputs := mat64.NewDense(nSamples, outputDim, nil) _, err = p.PredictBatch(inputs, outputs) pd := predOutput.(*mat64.Dense) if !pd.Equals(outputs) { t.Errorf("Different outputs from predict batch with nil and non-nil") } badInputs := mat64.NewDense(nSamples, inputDim+1, nil) _, err = p.PredictBatch(badInputs, outputs) if err == nil { t.Error("PredictBatch did not err when input dim too large") } badInputs = mat64.NewDense(nSamples+1, inputDim, nil) _, err = p.PredictBatch(badInputs, outputs) if err == nil { t.Errorf("PredictBatch did not err with row mismatch") } badOuputs := mat64.NewDense(nSamples, outputDim+1, nil) _, err = p.PredictBatch(inputs, badOuputs) if err == nil { t.Errorf("PredictBatch did not err with output dim too large") } }
// NewBatchGradBased creates a new batch grad based with the given inputs func NewBatchGradBased(trainable Trainable, cacheFeatures bool, inputs, outputs common.RowMatrix, weights []float64, losser loss.DerivLosser, regularizer regularize.Regularizer) *BatchGradBased { var features *mat64.Dense if cacheFeatures { features = FeaturizeTrainable(trainable, inputs, nil) } // TODO: Add in error checking if losser == nil { losser = loss.SquaredDistance{} } if regularizer == nil { regularizer = regularize.None{} } if weights != nil { // TODO: Fix weights panic("non-nil weights") } nTrain, outputDim := outputs.Dims() _, inputDim := inputs.Dims() g := &BatchGradBased{ t: trainable, inputs: inputs, outputs: outputs, features: features, losser: losser, regularizer: regularizer, nTrain: nTrain, outputDim: outputDim, inputDim: inputDim, nParameters: trainable.NumParameters(), grainSize: trainable.GrainSize(), } // TODO: Add in row viewer stuff // TODO: Create a different function for computing just the loss //inputRowViewer, ok := inputs.(mat64.RowViewer) //outputRowViewer, ok := outputs.(mat64.RowViewer) // TODO: Move this to its own function var f func(start, end int, c chan lossDerivStruct, parameters []float64) switch { default: panic("Shouldn't be here") case cacheFeatures: f = func(start, end int, c chan lossDerivStruct, parameters []float64) { lossDeriver := g.t.NewLossDeriver() prediction := make([]float64, g.outputDim) dLossDPred := make([]float64, g.outputDim) dLossDWeight := make([]float64, g.nParameters) totalDLossDWeight := make([]float64, g.nParameters) var loss float64 output := make([]float64, g.outputDim) for i := start; i < end; i++ { // Compute the prediction lossDeriver.Predict(parameters, g.features.RawRowView(i), prediction) // Compute the loss g.outputs.Row(output, i) loss += g.losser.LossDeriv(prediction, output, dLossDPred) // Compute the derivative lossDeriver.Deriv(parameters, g.features.RawRowView(i), prediction, dLossDPred, dLossDWeight) floats.Add(totalDLossDWeight, dLossDWeight) } // Send the value back on the channel c <- lossDerivStruct{ loss: loss, deriv: totalDLossDWeight, } } case !cacheFeatures: f = func(start, end int, c chan lossDerivStruct, parameters []float64) { lossDeriver := g.t.NewLossDeriver() prediction := make([]float64, g.outputDim) dLossDPred := make([]float64, g.outputDim) dLossDWeight := make([]float64, g.nParameters) totalDLossDWeight := make([]float64, g.nParameters) var loss float64 output := make([]float64, g.outputDim) input := make([]float64, g.inputDim) features := make([]float64, g.t.NumFeatures()) featurizer := g.t.NewFeaturizer() for i := start; i < end; i++ { g.inputs.Row(input, i) featurizer.Featurize(input, features) // Compute the prediction lossDeriver.Predict(parameters, features, prediction) // Compute the loss g.outputs.Row(output, i) loss += g.losser.LossDeriv(prediction, output, dLossDPred) // Compute the derivative lossDeriver.Deriv(parameters, features, prediction, dLossDPred, dLossDWeight) // Add to the total derivative floats.Add(totalDLossDWeight, dLossDWeight) // Send the value back on the channel c <- lossDerivStruct{ loss: loss, deriv: totalDLossDWeight, } } } } g.lossDerivFunc = f return g }
func BatchPredict(batch BatchPredictor, inputs common.RowMatrix, outputs common.MutableRowMatrix, inputDim, outputDim int, grainSize int) (common.MutableRowMatrix, error) { // TODO: Add in something about error // Check that the inputs and outputs are the right sizes nSamples, dimInputs := inputs.Dims() if inputDim != dimInputs { return outputs, errors.New("predict batch: input dimension mismatch") } if outputs == nil { outputs = mat64.NewDense(nSamples, outputDim, nil) } else { nOutputSamples, dimOutputs := outputs.Dims() if dimOutputs != outputDim { return outputs, errors.New("predict batch: output dimension mismatch") } if nSamples != nOutputSamples { return outputs, errors.New("predict batch: rows mismatch") } } // Perform predictions in parallel. For each parallel call, form a new predictor so that // memory allocations are saved and no race condition happens. // If the input and/or output is a RowViewer, save time by avoiding a copy inputRVer, inputIsRowViewer := inputs.(mat64.RawRowViewer) outputRVer, outputIsRowViewer := outputs.(mat64.RawRowViewer) var f func(start, end int) // wrapper function to allow parallel prediction. Uses RowView if the type has it switch { default: panic("Shouldn't be here") case inputIsRowViewer, outputIsRowViewer: f = func(start, end int) { p := batch.NewPredictor() for i := start; i < end; i++ { p.Predict(inputRVer.RawRowView(i), outputRVer.RawRowView(i)) } } case inputIsRowViewer && !outputIsRowViewer: f = func(start, end int) { p := batch.NewPredictor() output := make([]float64, outputDim) for i := start; i < end; i++ { outputs.Row(output, i) p.Predict(inputRVer.RawRowView(i), output) outputs.SetRow(i, output) } } case !inputIsRowViewer && outputIsRowViewer: f = func(start, end int) { p := batch.NewPredictor() input := make([]float64, inputDim) for i := start; i < end; i++ { inputs.Row(input, i) p.Predict(input, outputRVer.RawRowView(i)) } } case !inputIsRowViewer && !outputIsRowViewer: f = func(start, end int) { p := batch.NewPredictor() input := make([]float64, inputDim) output := make([]float64, outputDim) for i := start; i < end; i++ { inputs.Row(input, i) outputs.Row(output, i) p.Predict(input, output) outputs.SetRow(i, output) } } } common.ParallelFor(nSamples, grainSize, f) return outputs, nil }
// LinearSolve trains a Linear algorithm. // Assumes inputs and outputs are already scaled // If features is nil will call featurize // Will return nil if regularizer is not a linear regularizer // Is destructive if any of the weights are zero // Losser is always the two-norm // Does not set the value of the parameters (in case this is called in parallel with a different routine) func LinearSolve(linearTrainable LinearTrainable, features *mat64.Dense, inputs, trueOutputs common.RowMatrix, weights []float64, regularizer regularize.Regularizer) (parameters []float64) { // TODO: Allow tikhonov regularization // TODO: Add test for weights // TODO: Need to do something about returning a []float64 if !IsLinearSolveRegularizer(regularizer) { return nil } if features == nil { features = FeaturizeTrainable(linearTrainable, inputs, features) } _, nFeatures := features.Dims() var weightedFeatures, weightedOutput *mat64.Dense fmt.Println("In linear solve") if weights != nil { panic("Need functionality to be better. Either banded special case in matrix or do the mulitplication by hand") scaledWeight := make([]float64, len(weights)) for i, weight := range weights { scaledWeight[i] = math.Sqrt(weight) } diagWeight := diagonal.NewDiagonal(len(scaledWeight), scaledWeight) nSamples, outputDim := trueOutputs.Dims() weightedOutput = mat64.NewDense(nSamples, outputDim, nil) weightedFeatures = mat64.NewDense(nSamples, nFeatures, nil) weightedOutput.Copy(trueOutputs) weightedFeatures.Copy(features) // TODO: Replace this with better than mat multiply weightedOutput.Mul(diagWeight, weightedOutput) weightedFeatures.Mul(diagWeight, weightedFeatures) } switch regularizer.(type) { case nil: case regularize.None: default: panic("Shouldn't be here. Must be error in IsLinearRegularizer") } if weights == nil { parameterMat, err := mat64.Solve(features, trueOutputs) if err != nil { panic(err) } return parameterMat.RawMatrix().Data } parameterMat, err := mat64.Solve(weightedFeatures, weightedOutput) if err != nil { panic(err) } return parameterMat.RawMatrix().Data }