// Creates the features from the inputs. Features must be nSamples x nFeatures or nil func FeaturizeTrainable(t Trainable, inputs common.RowMatrix, featurizedInputs *mat64.Dense) *mat64.Dense { nSamples, nDim := inputs.Dims() if featurizedInputs == nil { nFeatures := t.NumFeatures() featurizedInputs = mat64.NewDense(nSamples, nFeatures, nil) } rowViewer, isRowViewer := inputs.(mat64.RowViewer) var f func(start, end int) if isRowViewer { f = func(start, end int) { featurizer := t.NewFeaturizer() for i := start; i < end; i++ { featurizer.Featurize(rowViewer.RowView(i), featurizedInputs.RowView(i)) } } } else { f = func(start, end int) { featurizer := t.NewFeaturizer() input := make([]float64, nDim) for i := start; i < end; i++ { inputs.Row(input, i) featurizer.Featurize(input, featurizedInputs.RowView(i)) } } } common.ParallelFor(nSamples, common.GetGrainSize(nSamples, minGrain, maxGrain), f) return featurizedInputs }
// LinearSolve trains a Linear algorithm. // Assumes inputs and outputs are already scaled // If features is nil will call featurize // Will return nil if regularizer is not a linear regularizer // Is destructive if any of the weights are zero // Losser is always the two-norm // Does not set the value of the parameters (in case this is called in parallel with a different routine) func LinearSolve(linearTrainable LinearTrainable, features *mat64.Dense, inputs, trueOutputs common.RowMatrix, weights []float64, regularizer regularize.Regularizer) (parameters []float64) { // TODO: Allow tikhonov regularization // TODO: Add test for weights // TODO: Need to do something about returning a []float64 if !IsLinearSolveRegularizer(regularizer) { return nil } if features == nil { features = FeaturizeTrainable(linearTrainable, inputs, features) } _, nFeatures := features.Dims() var weightedFeatures, weightedOutput *mat64.Dense if weights != nil { scaledWeight := make([]float64, len(weights)) for i, weight := range weights { scaledWeight[i] = math.Sqrt(weight) } diagWeight := diagonal.NewDiagonal(nFeatures, weights) nSamples, outputDim := trueOutputs.Dims() weightedOutput = mat64.NewDense(nSamples, outputDim, nil) weightedFeatures = mat64.NewDense(nSamples, nFeatures, nil) weightedOutput.Mul(diagWeight, trueOutputs) weightedFeatures.Mul(diagWeight, features) } switch regularizer.(type) { case nil: case regularize.None: default: panic("Shouldn't be here. Must be error in IsLinearRegularizer") } if weights == nil { parameterMat := mat64.Solve(features, trueOutputs) return parameterMat.RawMatrix().Data } parameterMat := mat64.Solve(weightedFeatures, weightedOutput) return parameterMat.RawMatrix().Data }
// TestPredict tests that predict returns the expected value, and that calling predict in parallel // also works func TestPredictAndBatch(t *testing.T, p Predictor, inputs, trueOutputs common.RowMatrix, name string) { nSamples, inputDim := inputs.Dims() if inputDim != p.InputDim() { panic("input Dim doesn't match predictor input dim") } nOutSamples, outputDim := trueOutputs.Dims() if outputDim != p.OutputDim() { panic("outpuDim doesn't match predictor outputDim") } if nOutSamples != nSamples { panic("inputs and outputs have different number of rows") } // First, test sequentially for i := 0; i < nSamples; i++ { trueOut := make([]float64, outputDim) for j := 0; j < outputDim; j++ { trueOut[j] = trueOutputs.At(i, j) } // Predict with nil input := make([]float64, inputDim) inputCpy := make([]float64, inputDim) for j := 0; j < inputDim; j++ { input[j] = inputs.At(i, j) inputCpy[j] = inputs.At(i, j) } out1, err := p.Predict(input, nil) if err != nil { t.Errorf(name + ": Error predicting with nil output") return } if !floats.Equal(input, inputCpy) { t.Errorf("%v: input changed with nil input for row %v", name, i) break } out2 := make([]float64, outputDim) for j := 0; j < outputDim; j++ { out2[j] = rand.NormFloat64() } _, err = p.Predict(input, out2) if err != nil { t.Errorf("%v: error predicting with non-nil input for row %v", name, i) break } if !floats.Equal(input, inputCpy) { t.Errorf("%v: input changed with non-nil input for row %v", name, i) break } if !floats.Equal(out1, out2) { t.Errorf(name + ": different answers with nil and non-nil predict ") break } if !floats.EqualApprox(out1, trueOut, 1e-14) { t.Errorf("%v: predicted output doesn't match for row %v. Expected %v, found %v", name, i, trueOut, out1) break } } // Check that predict errors with bad sized arguments badOuput := make([]float64, outputDim+1) input := make([]float64, inputDim) for i := 0; i < inputDim; i++ { input[i] = inputs.At(0, i) } output := make([]float64, outputDim) for i := 0; i < outputDim; i++ { output[i] = trueOutputs.At(0, i) } _, err := p.Predict(input, badOuput) if err == nil { t.Errorf("Predict did not throw an error with an output too large") } if outputDim > 1 { badOuput := make([]float64, outputDim-1) _, err := p.Predict(input, badOuput) if err == nil { t.Errorf("Predict did not throw an error with an output too small") } } badInput := make([]float64, inputDim+1) _, err = p.Predict(badInput, output) if err == nil { t.Errorf("Predict did not err when input is too large") } if inputDim > 1 { badInput := make([]float64, inputDim-1) _, err = p.Predict(badInput, output) if err == nil { t.Errorf("Predict did not err when input is too small") } } // Now, test batch // With non-nil inputCpy := &mat64.Dense{} inputCpy.Clone(inputs) predOutput, err := p.PredictBatch(inputs, nil) if err != nil { t.Errorf("Error batch predicting: %v", err) } if !inputCpy.Equals(inputs) { t.Errorf("Inputs changed during call to PredictBatch") } predOutputRows, predOutputCols := predOutput.Dims() if predOutputRows != nSamples || predOutputCols != outputDim { t.Errorf("Dimension mismatch after predictbatch with nil input") } outputs := mat64.NewDense(nSamples, outputDim, nil) _, err = p.PredictBatch(inputs, outputs) pd := predOutput.(*mat64.Dense) if !pd.Equals(outputs) { t.Errorf("Different outputs from predict batch with nil and non-nil") } badInputs := mat64.NewDense(nSamples, inputDim+1, nil) _, err = p.PredictBatch(badInputs, outputs) if err == nil { t.Error("PredictBatch did not err when input dim too large") } badInputs = mat64.NewDense(nSamples+1, inputDim, nil) _, err = p.PredictBatch(badInputs, outputs) if err == nil { t.Errorf("PredictBatch did not err with row mismatch") } badOuputs := mat64.NewDense(nSamples, outputDim+1, nil) _, err = p.PredictBatch(inputs, badOuputs) if err == nil { t.Errorf("PredictBatch did not err with output dim too large") } }
func BatchPredict(batch BatchPredictor, inputs common.RowMatrix, outputs common.MutableRowMatrix, inputDim, outputDim int, grainSize int) (common.MutableRowMatrix, error) { // TODO: Add in something about error // Check that the inputs and outputs are the right sizes nSamples, dimInputs := inputs.Dims() if inputDim != dimInputs { return outputs, errors.New("predict batch: input dimension mismatch") } if outputs == nil { outputs = mat64.NewDense(nSamples, outputDim, nil) } else { nOutputSamples, dimOutputs := outputs.Dims() if dimOutputs != outputDim { return outputs, errors.New("predict batch: output dimension mismatch") } if nSamples != nOutputSamples { return outputs, errors.New("predict batch: rows mismatch") } } // Perform predictions in parallel. For each parallel call, form a new predictor so that // memory allocations are saved and no race condition happens. // If the input and/or output is a RowViewer, save time by avoiding a copy inputRVer, inputIsRowViewer := inputs.(mat64.RowViewer) outputRVer, outputIsRowViewer := outputs.(mat64.RowViewer) var f func(start, end int) // wrapper function to allow parallel prediction. Uses RowView if the type has it switch { default: panic("Shouldn't be here") case inputIsRowViewer, outputIsRowViewer: f = func(start, end int) { p := batch.NewPredictor() for i := start; i < end; i++ { p.Predict(inputRVer.RowView(i), outputRVer.RowView(i)) } } case inputIsRowViewer && !outputIsRowViewer: f = func(start, end int) { p := batch.NewPredictor() output := make([]float64, outputDim) for i := start; i < end; i++ { outputs.Row(output, i) p.Predict(inputRVer.RowView(i), output) outputs.SetRow(i, output) } } case !inputIsRowViewer && outputIsRowViewer: f = func(start, end int) { p := batch.NewPredictor() input := make([]float64, inputDim) for i := start; i < end; i++ { inputs.Row(input, i) p.Predict(input, outputRVer.RowView(i)) } } case !inputIsRowViewer && !outputIsRowViewer: f = func(start, end int) { p := batch.NewPredictor() input := make([]float64, inputDim) output := make([]float64, outputDim) for i := start; i < end; i++ { inputs.Row(input, i) outputs.Row(output, i) p.Predict(input, output) outputs.SetRow(i, output) } } } common.ParallelFor(nSamples, grainSize, f) return outputs, nil }
// NewBatchGradBased creates a new batch grad based with the given inputs func NewBatchGradBased(trainable Trainable, cacheFeatures bool, inputs, outputs common.RowMatrix, losser loss.DerivLosser, regularizer regularize.Regularizer) *BatchGradBased { var features *mat64.Dense if cacheFeatures { features = FeaturizeTrainable(trainable, inputs, nil) } // TODO: Add in error checking nTrain, outputDim := outputs.Dims() _, inputDim := inputs.Dims() g := &BatchGradBased{ t: trainable, inputs: inputs, outputs: outputs, features: features, losser: losser, regularizer: regularizer, nTrain: nTrain, outputDim: outputDim, inputDim: inputDim, nParameters: trainable.NumParameters(), grainSize: trainable.GrainSize(), } // TODO: Add in row viewer stuff // TODO: Create a different function for computing just the loss //inputRowViewer, ok := inputs.(mat64.RowViewer) //outputRowViewer, ok := outputs.(mat64.RowViewer) // TODO: Move this to its own function var f func(start, end int, c chan lossDerivStruct, parameters []float64) switch { default: panic("Shouldn't be here") case cacheFeatures: f = func(start, end int, c chan lossDerivStruct, parameters []float64) { lossDeriver := g.t.NewLossDeriver() prediction := make([]float64, g.outputDim) dLossDPred := make([]float64, g.outputDim) dLossDWeight := make([]float64, g.nParameters) totalDLossDWeight := make([]float64, g.nParameters) var loss float64 output := make([]float64, g.outputDim) for i := start; i < end; i++ { // Compute the prediction lossDeriver.Predict(parameters, g.features.RowView(i), prediction) // Compute the loss g.outputs.Row(output, i) loss += g.losser.LossDeriv(prediction, output, dLossDPred) // Compute the derivative lossDeriver.Deriv(parameters, g.features.RowView(i), prediction, dLossDPred, dLossDWeight) floats.Add(totalDLossDWeight, dLossDWeight) } // Send the value back on the channel c <- lossDerivStruct{ loss: loss, deriv: totalDLossDWeight, } } case !cacheFeatures: f = func(start, end int, c chan lossDerivStruct, parameters []float64) { lossDeriver := g.t.NewLossDeriver() prediction := make([]float64, g.outputDim) dLossDPred := make([]float64, g.outputDim) dLossDWeight := make([]float64, g.nParameters) totalDLossDWeight := make([]float64, g.nParameters) var loss float64 output := make([]float64, g.outputDim) input := make([]float64, g.inputDim) features := make([]float64, g.t.NumFeatures()) featurizer := g.t.NewFeaturizer() for i := start; i < end; i++ { g.inputs.Row(input, i) featurizer.Featurize(input, features) // Compute the prediction lossDeriver.Predict(parameters, features, prediction) // Compute the loss g.outputs.Row(output, i) loss += g.losser.LossDeriv(prediction, output, dLossDPred) // Compute the derivative lossDeriver.Deriv(parameters, features, prediction, dLossDPred, dLossDWeight) // Add to the total derivative floats.Add(totalDLossDWeight, dLossDWeight) // Send the value back on the channel c <- lossDerivStruct{ loss: loss, deriv: totalDLossDWeight, } } } } g.lossDerivFunc = f return g }