// Creates the features from the inputs. Features must be nSamples x nFeatures or nil func FeaturizeTrainable(t Trainable, inputs common.RowMatrix, featurizedInputs *mat64.Dense) *mat64.Dense { nSamples, nDim := inputs.Dims() if featurizedInputs == nil { nFeatures := t.NumFeatures() featurizedInputs = mat64.NewDense(nSamples, nFeatures, nil) } rowViewer, isRowViewer := inputs.(mat64.RowViewer) var f func(start, end int) if isRowViewer { f = func(start, end int) { featurizer := t.NewFeaturizer() for i := start; i < end; i++ { featurizer.Featurize(rowViewer.RowView(i), featurizedInputs.RowView(i)) } } } else { f = func(start, end int) { featurizer := t.NewFeaturizer() input := make([]float64, nDim) for i := start; i < end; i++ { inputs.Row(input, i) featurizer.Featurize(input, featurizedInputs.RowView(i)) } } } common.ParallelFor(nSamples, common.GetGrainSize(nSamples, minGrain, maxGrain), f) return featurizedInputs }
// LinearLeastSquares computes the least squares fit for the function // // f(x) = ╬њРѓђtermsРѓђ(x) + ╬њРѓЂtermsРѓЂ(x) + ... // // to the data (xs[i], ys[i]). It returns the parameters ╬њРѓђ, ╬њРѓЂ, ... // that minimize the sum of the squares of the residuals of f: // // РѕЉ (ys[i] - f(xs[i]))┬▓ // // If weights is non-nil, it is used to weight these residuals: // // РѕЉ weights[i] ├Ќ (ys[i] - f(xs[i]))┬▓ // // The function f is specified by one Go function for each linear // term. For efficiency, the Go function is vectorized: it will be // passed a slice of x values in xs and must fill the slice termOut // with the value of the term for each value in xs. func LinearLeastSquares(xs, ys, weights []float64, terms ...func(xs, termOut []float64)) (params []float64) { // The optimal parameters are found by solving for ╬њ╠ѓ in the // "normal equations": // // (ЮљЌрхђЮљќЮљЌ)╬њ╠ѓ = ЮљЌрхђЮљќЮљ▓ // // where Юљќ is a diagonal weight matrix (or the identity matrix // for the unweighted case). // TODO: Consider using orthogonal decomposition. if len(xs) != len(ys) { panic("len(xs) != len(ys)") } if weights != nil && len(xs) != len(weights) { panic("len(xs) != len(weights") } // Construct ЮљЌрхђ. This is the more convenient representation // for efficiently calling the term functions. xTVals := make([]float64, len(terms)*len(xs)) for i, term := range terms { term(xs, xTVals[i*len(xs):i*len(xs)+len(xs)]) } XT := mat64.NewDense(len(terms), len(xs), xTVals) X := XT.T() // Construct ЮљЌрхђЮљќ. var XTW *mat64.Dense if weights == nil { // Юљќ is the identity matrix. XTW = XT } else { // Since Юљќ is a diagonal matrix, we do this directly. XTW = mat64.DenseCopyOf(XT) WDiag := mat64.NewVector(len(weights), weights) for row := 0; row < len(terms); row++ { rowView := XTW.RowView(row) rowView.MulElemVec(rowView, WDiag) } } // Construct Юљ▓. y := mat64.NewVector(len(ys), ys) // Compute ╬њ╠ѓ. lhs := mat64.NewDense(len(terms), len(terms), nil) lhs.Mul(XTW, X) rhs := mat64.NewVector(len(terms), nil) rhs.MulVec(XTW, y) BVals := make([]float64, len(terms)) B := mat64.NewVector(len(terms), BVals) B.SolveVec(lhs, rhs) return BVals }
func predictFeaturized(featurizedInput []float64, featureWeights *mat64.Dense, output []float64) { for i := range output { output[i] = 0 } for j, zval := range featurizedInput { for i, weight := range featureWeights.RowView(j) { output[i] += weight * zval } } }
// Stochastic gradient descent updates the parameters of theta on a random row selection from a matrix. // It is faster as it does not compute the cost function over the entire dataset every time. // It instead calculates the error parameters over only one row of the dataset at a time. // In return, there is a trade off for accuracy. This is minimised by running multiple SGD processes // (the number of goroutines spawned is specified by the procs variable) in parallel and taking an average of the result. func StochasticGradientDescent(x, y, theta *mat64.Dense, alpha float64, epoch, procs int) *mat64.Dense { m, _ := y.Dims() resultPipe := make(chan *mat64.Dense) results := make([]*mat64.Dense, 0) for p := 0; p < procs; p++ { go func() { // Is this just a pointer to theta? thetaCopy := mat64.DenseCopyOf(theta) for i := 0; i < epoch; i++ { for k := 0; k < m; k++ { datXtemp := x.RowView(k) datYtemp := y.RowView(k) datX := mat64.NewDense(1, len(datXtemp), datXtemp) datY := mat64.NewDense(1, 1, datYtemp) datXFlat := mat64.DenseCopyOf(datX) datXFlat.TCopy(datXFlat) datX.Mul(datX, thetaCopy) datX.Sub(datX, datY) datXFlat.Mul(datXFlat, datX) // Horrible hack to get around the fact there is no elementwise division in mat64 xFlatRow, _ := datXFlat.Dims() gradient := make([]float64, 0) for i := 0; i < xFlatRow; i++ { row := datXFlat.RowView(i) for i := range row { divd := row[i] / float64(m) * alpha gradient = append(gradient, divd) } } grows := len(gradient) grad := mat64.NewDense(grows, 1, gradient) thetaCopy.Sub(thetaCopy, grad) } } resultPipe <- thetaCopy }() } for { select { case d := <-resultPipe: results = append(results, d) if len(results) == procs { return averageTheta(results) } } } }
// wrapper for predict, assumes all inputs are correct func predict(input []float64, features *mat64.Dense, b []float64, featureWeights *mat64.Dense, output []float64) { for i := range output { output[i] = 0 } nFeatures, _ := features.Dims() _, outputDim := featureWeights.Dims() sqrt2OverD := math.Sqrt(2.0 / float64(nFeatures)) //for i, feature := range features { for i := 0; i < nFeatures; i++ { z := computeZ(input, features.RowView(i), b[i], sqrt2OverD) for j := 0; j < outputDim; j++ { output[j] += z * featureWeights.At(i, j) } } }
// UnscaleData is a wrapper for unscaling data in parallel. // TODO: Make this work better so that if there is an error somewhere data isn't changed func UnscaleData(scaler Scaler, data *mat64.Dense) error { m := &sync.Mutex{} var e ErrorList f := func(start, end int) { for r := start; r < end; r++ { errTmp := scaler.Unscale(data.RowView(r)) if errTmp != nil { m.Lock() e = append(e, &SliceError{Header: "scale", Idx: r, Err: errTmp}) m.Unlock() } } } nSamples, _ := data.Dims() grain := common.GetGrainSize(nSamples, 1, 500) common.ParallelFor(nSamples, grain, f) if len(e) != 0 { return e } return nil }
func MultiHypothesis(x *mat64.Dense, theta *mat64.Vector) *mat64.Vector { var res mat64.Dense res.Mul(theta.T(), x) return res.RowView(0) }