// Cov returns the covariance between a set of data points based on the current // GP fit. func (g *GP) Cov(m *mat64.SymDense, x mat64.Matrix) *mat64.SymDense { if m != nil { // TODO(btracey): Make this k** panic("resuing m not coded") } // The joint covariance matrix is // K(x_*, k_*) - k(x_*, x) k(x,x)^-1 k(x, x*) nSamp, nDim := x.Dims() if nDim != g.inputDim { panic(badInputLength) } // Compute K(x_*, x) K(x, x)^-1 K(x, x_*) kstar := g.formKStar(x) var tmp mat64.Dense tmp.SolveCholesky(g.cholK, kstar) var tmp2 mat64.Dense tmp2.Mul(kstar.T(), &tmp) // Compute k(x_*, x_*) and perform the subtraction. kstarstar := mat64.NewSymDense(nSamp, nil) for i := 0; i < nSamp; i++ { for j := i; j < nSamp; j++ { v := g.kernel.Distance(mat64.Row(nil, i, x), mat64.Row(nil, j, x)) if i == j { v += g.noise } kstarstar.SetSym(i, j, v-tmp2.At(i, j)) } } return kstarstar }
func toFeatureNodes(X *mat64.Dense) []*C.struct_feature_node { featureNodes := []*C.struct_feature_node{} nRows, nCols := X.Dims() for i := 0; i < nRows; i++ { row := []C.struct_feature_node{} for j := 0; j < nCols; j++ { val := X.At(i, j) if val != 0 { row = append(row, C.struct_feature_node{ index: C.int(j + 1), value: C.double(val), }) } } row = append(row, C.struct_feature_node{ index: C.int(-1), value: C.double(0), }) featureNodes = append(featureNodes, &row[0]) } return featureNodes }
func rowSum(matrix *mat64.Dense, rowId int) float64 { _, col := matrix.Dims() sum := float64(0) for c := 0; c < col; c++ { sum += matrix.At(rowId, c) } return sum }
func colSum(matrix *mat64.Dense, colId int) float64 { row, _ := matrix.Dims() sum := float64(0) for r := 0; r < row; r++ { sum += matrix.At(r, colId) } return sum }
// SetScale sets a linear scale between 0 and 1. If no data // points. If the minimum and maximum value are identical in // a dimension, the minimum and maximum values will be set to // that value +/- 0.5 and a func (l *Linear) SetScale(data *mat64.Dense) error { rows, dim := data.Dims() if rows < 2 { return errors.New("scale: less than two inputs") } // Generate data for min and max if they don't already exist if len(l.Min) < dim { l.Min = make([]float64, dim) } else { l.Min = l.Min[0:dim] } if len(l.Max) < dim { l.Max = make([]float64, dim) } else { l.Max = l.Max[0:dim] } for i := range l.Min { l.Min[i] = math.Inf(1) } for i := range l.Max { l.Max[i] = math.Inf(-1) } // Find the minimum and maximum in each dimension for i := 0; i < rows; i++ { for j := 0; j < dim; j++ { val := data.At(i, j) if val < l.Min[j] { l.Min[j] = val } if val > l.Max[j] { l.Max[j] = val } } } l.Scaled = true l.Dim = dim var unifError *UniformDimension // Check that the maximum and minimum values are not identical for i := range l.Min { if l.Min[i] == l.Max[i] { if unifError == nil { unifError = &UniformDimension{} } unifError.Dims = append(unifError.Dims, i) l.Min[i] -= 0.5 l.Max[i] += 0.5 } } if unifError != nil { return unifError } return nil }
// UpdateBias computes B = B + l.E and updates the bias weights // from a size * 1 back-propagated error vector. func (n *Network) UpdateBias(err *mat64.Dense, learnRate float64) { for i, b := range n.biases { if i < n.input { continue } n.biases[i] = b + err.At(i, 0)*learnRate } }
func regionQuery(p int, ret *big.Int, dist *mat64.Dense, eps float64) *big.Int { rows, _ := dist.Dims() // Return any points within the Eps neighbourhood for i := 0; i < rows; i++ { if dist.At(p, i) <= eps { ret = ret.SetBit(ret, i, 1) // Mark as neighbour } } return ret }
// Convert *mat64.Dense to Mat func Mat64ToGcvMat(mat *mat64.Dense) GcvMat { row, col := mat.Dims() rawData := NewGcvFloat64Vector(int64(row * col)) for i := 0; i < row; i++ { for j := 0; j < col; j++ { rawData.Set(i*col+j, mat.At(i, j)) } } return Mat64ToGcvMat_(row, col, rawData) }
// SetScale Finds the appropriate scaling of the data such that the dataset has // a mean of 0 and a variance of 1. If the standard deviation of any of // the data is zero (all of the entries have the same value), // the standard deviation is set to 1.0 and a UniformDimension error is // returned func (n *Normal) SetScale(data *mat64.Dense) error { rows, dim := data.Dims() if rows < 2 { return errors.New("scale: less than two inputs") } // Need to find the mean input and the std of the input mean := make([]float64, dim) for i := 0; i < rows; i++ { for j := 0; j < dim; j++ { mean[j] += data.At(i, j) } } for i := range mean { mean[i] /= float64(rows) } // TODO: Replace this with something that has better numerical properties std := make([]float64, dim) for i := 0; i < rows; i++ { for j := 0; j < dim; j++ { diff := data.At(i, j) - mean[j] std[j] += diff * diff } } for i := range std { std[i] /= float64(rows) std[i] = math.Sqrt(std[i]) } n.Scaled = true n.Dim = dim var unifError *UniformDimension for i := range std { if std[i] == 0 { if unifError == nil { unifError = &UniformDimension{} } unifError.Dims = append(unifError.Dims, i) std[i] = 1.0 } } n.Mu = mean n.Sigma = std if unifError != nil { return unifError } return nil }
func (nb *NaiveBayes) Fit(X, y *mat64.Dense) { nSamples, nFeatures := X.Dims() tokensTotal := 0 langsTotal, _ := y.Dims() langsCount := histogram(y.Col(nil, 0)) tokensTotalPerLang := map[int]int{} tokenCountPerLang := map[int](map[int]int){} for i := 0; i < nSamples; i++ { langIdx := int(y.At(i, 0)) for j := 0; j < nFeatures; j++ { tokensTotal += int(X.At(i, j)) tokensTotalPerLang[langIdx] += int(X.At(i, j)) if _, ok := tokenCountPerLang[langIdx]; !ok { tokenCountPerLang[langIdx] = map[int]int{} } tokenCountPerLang[langIdx][j] += int(X.At(i, j)) } } params := nbParams{ TokensTotal: tokensTotal, LangsTotal: langsTotal, LangsCount: langsCount, TokensTotalPerLang: tokensTotalPerLang, TokenCountPerLang: tokenCountPerLang, } nb.params = params }
func StackConstr(low, A, up *mat64.Dense) (stackA, b *mat64.Dense, ranges []float64) { neglow := &mat64.Dense{} neglow.Scale(-1, low) b = &mat64.Dense{} b.Stack(up, neglow) negA := &mat64.Dense{} negA.Scale(-1, A) stackA = &mat64.Dense{} stackA.Stack(A, negA) // capture the range of each constraint from A because this information is // lost when converting from "low <= Ax <= up" via stacking to "Ax <= up". m, _ := A.Dims() ranges = make([]float64, m, 2*m) for i := 0; i < m; i++ { ranges[i] = up.At(i, 0) - low.At(i, 0) if ranges[i] == 0 { if up.At(i, 0) == 0 { ranges[i] = 1 } else { ranges[i] = up.At(i, 0) } } } ranges = append(ranges, ranges...) return stackA, b, ranges }
func (lr *LinearRegression) Fit(inst *base.Instances) error { if inst.Rows < inst.GetAttributeCount() { return NotEnoughDataError } // Split into two matrices, observed results (dependent variable y) // and the explanatory variables (X) - see http://en.wikipedia.org/wiki/Linear_regression observed := mat64.NewDense(inst.Rows, 1, nil) explVariables := mat64.NewDense(inst.Rows, inst.GetAttributeCount(), nil) for i := 0; i < inst.Rows; i++ { observed.Set(i, 0, inst.Get(i, inst.ClassIndex)) // Set observed data for j := 0; j < inst.GetAttributeCount(); j++ { if j == 0 { // Set intercepts to 1.0 // Could / should be done better: http://www.theanalysisfactor.com/interpret-the-intercept/ explVariables.Set(i, 0, 1.0) } else { explVariables.Set(i, j, inst.Get(i, j-1)) } } } n := inst.GetAttributeCount() qr := mat64.QR(explVariables) q := qr.Q() reg := qr.R() var transposed, qty mat64.Dense transposed.TCopy(q) qty.Mul(&transposed, observed) regressionCoefficients := make([]float64, n) for i := n - 1; i >= 0; i-- { regressionCoefficients[i] = qty.At(i, 0) for j := i + 1; j < n; j++ { regressionCoefficients[i] -= regressionCoefficients[j] * reg.At(i, j) } regressionCoefficients[i] /= reg.At(i, i) } lr.disturbance = regressionCoefficients[0] lr.regressionCoefficients = regressionCoefficients[1:] lr.fitted = true return nil }
// Manhattan distance, also known as L1 distance. // Compute sum of absolute values of elements. func (self *Manhattan) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { r1, c1 := vectorX.Dims() r2, c2 := vectorY.Dims() if r1 != r2 || c1 != c2 { panic(mat64.ErrShape) } result := .0 for i := 0; i < r1; i++ { for j := 0; j < c1; j++ { result += math.Abs(vectorX.At(i, j) - vectorY.At(i, j)) } } return result }
// wrapper for predict, assumes all inputs are correct func predict(input []float64, features *mat64.Dense, b []float64, featureWeights *mat64.Dense, output []float64) { for i := range output { output[i] = 0 } nFeatures, _ := features.Dims() _, outputDim := featureWeights.Dims() sqrt2OverD := math.Sqrt(2.0 / float64(nFeatures)) //for i, feature := range features { for i := 0; i < nFeatures; i++ { z := computeZ(input, features.RowView(i), b[i], sqrt2OverD) for j := 0; j < outputDim; j++ { output[j] += z * featureWeights.At(i, j) } } }
func (self *Chebyshev) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { r1, c1 := vectorX.Dims() r2, c2 := vectorY.Dims() if r1 != r2 || c1 != c2 { panic(mat64.ErrShape) } max := float64(0) for i := 0; i < r1; i++ { for j := 0; j < c1; j++ { max = math.Max(max, math.Abs(vectorX.At(i, j)-vectorY.At(i, j))) } } return max }
func cost(y, yHat *mat64.Dense) float64 { // TODO add runtime check that y and yHat have the same dimensions rows, cols := y.Dims() errorSquares := []float64{} for row := 0; row < rows; row++ { for col := 0; col < cols; col++ { yValue := y.At(row, col) yHatValue := yHat.At(row, col) error := 0.5 * math.Pow((yValue-yHatValue), 2) errorSquares = append(errorSquares, error) } } var errorSum float64 for _, error := range errorSquares { errorSum += error } return errorSum }
// UpdateWeights takes an output size * 1 output vector and a size * 1 // back-propagated error vector, as well as a learnRate and updates // the internal weights matrix. func (n *Network) UpdateWeights(out, err *mat64.Dense, learnRate float64) { if n.origWeights == nil { n.origWeights = mat64.DenseCopyOf(n.weights) } // Multiply that by the learning rate mulFunc := func(target, source int, v float64) float64 { if target == source { return v } if math.Abs(n.origWeights.At(target, source)) > 0.005 { return v + learnRate*out.At(source, 0)*err.At(target, 0) } return 0.00 } // Add that to the weights n.weights.Apply(mulFunc, n.weights) }
// covToCorr converts a covariance matrix to a correlation matrix. func covToCorr(c *mat64.Dense) { // TODO(jonlawlor): use a *mat64.Symmetric as input. r, _ := c.Dims() s := make([]float64, r) for i := 0; i < r; i++ { s[i] = 1 / math.Sqrt(c.At(i, i)) } for i, sx := range s { row := c.RawRowView(i) for j, sy := range s { if i == j { // Ensure that the diagonal has exactly ones. row[j] = 1 continue } row[j] *= sx row[j] *= sy } } }
func (c *Cranberra) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 { r1, c1 := vectorX.Dims() r2, c2 := vectorY.Dims() if r1 != r2 || c1 != c2 { panic(mat64.ErrShape) } sum := .0 for i := 0; i < r1; i++ { for j := 0; j < c1; j++ { p1 := vectorX.At(i, j) p2 := vectorY.At(i, j) num := math.Abs(p1 - p2) denom := math.Abs(p1) + math.Abs(p2) sum += cranberraDistanceStep(num, denom) } } return sum }
func testScaling(t *testing.T, u Scaler, data *mat64.Dense, scaledData *mat64.Dense, name string) { // Copy data r, c := data.Dims() origData := mat64.NewDense(r, c, nil) for i := 0; i < r; i++ { for j := 0; j < c; j++ { origData.Set(i, j, data.At(i, j)) } } err := ScaleData(u, data) if err != nil { t.Errorf("Error found in ScaleData for case " + name + ": " + err.Error()) } /* for i := range data { if !floats.EqualApprox(data[i], scaledData[i], 1e-14) { t.Errorf("Improper scaling for case"+name+". Expected: %v, Found: %v", data[i], scaledData[i]) } } */ if !data.EqualsApprox(scaledData, 1e-14) { t.Errorf("Improper scaling for case"+name+". Expected: %v, Found: %v", scaledData, data) } err = UnscaleData(u, data) if err != nil { t.Errorf("Error found in UnscaleData for case " + name + ": " + err.Error()) } if !data.EqualsApprox(origData, 1e-14) { t.Errorf("Improper unscaling for case"+name+". Expected: %v, Found: %v", origData, data) } }
/* Calculates the appropriate power of the next-even transformation matrix * and returns the coefficients in the linear equation it represents. */ func calcTMat(n int) (aa, ab, ba, bb int) { t1Mat := mat64.NewDense(2, 2, []float64{3, 2, 2, 1}) tMat := new(mat64.Dense) tMat.Pow(t1Mat, n) return round(tMat.At(0, 0)), round(tMat.At(0, 1)), round(tMat.At(1, 0)), round(tMat.At(1, 1)) }
func (lr *LinearRegression) Fit(inst base.FixedDataGrid) error { // Retrieve row size _, rows := inst.Size() // Validate class Attribute count classAttrs := inst.AllClassAttributes() if len(classAttrs) != 1 { return fmt.Errorf("Only 1 class variable is permitted") } classAttrSpecs := base.ResolveAttributes(inst, classAttrs) // Retrieve relevant Attributes allAttrs := base.NonClassAttributes(inst) attrs := make([]base.Attribute, 0) for _, a := range allAttrs { if _, ok := a.(*base.FloatAttribute); ok { attrs = append(attrs, a) } } cols := len(attrs) + 1 if rows < cols { return NotEnoughDataError } // Retrieve relevant Attribute specifications attrSpecs := base.ResolveAttributes(inst, attrs) // Split into two matrices, observed results (dependent variable y) // and the explanatory variables (X) - see http://en.wikipedia.org/wiki/Linear_regression observed := mat64.NewDense(rows, 1, nil) explVariables := mat64.NewDense(rows, cols, nil) // Build the observed matrix inst.MapOverRows(classAttrSpecs, func(row [][]byte, i int) (bool, error) { val := base.UnpackBytesToFloat(row[0]) observed.Set(i, 0, val) return true, nil }) // Build the explainatory variables inst.MapOverRows(attrSpecs, func(row [][]byte, i int) (bool, error) { // Set intercepts to 1.0 explVariables.Set(i, 0, 1.0) for j, r := range row { explVariables.Set(i, j+1, base.UnpackBytesToFloat(r)) } return true, nil }) n := cols qr := new(mat64.QR) qr.Factorize(explVariables) var q, reg mat64.Dense q.QFromQR(qr) reg.RFromQR(qr) var transposed, qty mat64.Dense transposed.Clone(q.T()) qty.Mul(&transposed, observed) regressionCoefficients := make([]float64, n) for i := n - 1; i >= 0; i-- { regressionCoefficients[i] = qty.At(i, 0) for j := i + 1; j < n; j++ { regressionCoefficients[i] -= regressionCoefficients[j] * reg.At(i, j) } regressionCoefficients[i] /= reg.At(i, i) } lr.disturbance = regressionCoefficients[0] lr.regressionCoefficients = regressionCoefficients[1:] lr.fitted = true lr.attrs = attrs lr.cls = classAttrs[0] return nil }
// ConditionNormal returns the Normal distribution that is the receiver conditioned // on the input evidence. The returned multivariate normal has dimension // n - len(observed), where n is the dimension of the original receiver. The updated // mean and covariance are // mu = mu_un + sigma_{ob,un}^T * sigma_{ob,ob}^-1 (v - mu_ob) // sigma = sigma_{un,un} - sigma_{ob,un}^T * sigma_{ob,ob}^-1 * sigma_{ob,un} // where mu_un and mu_ob are the original means of the unobserved and observed // variables respectively, sigma_{un,un} is the unobserved subset of the covariance // matrix, sigma_{ob,ob} is the observed subset of the covariance matrix, and // sigma_{un,ob} are the cross terms. The elements of x_2 have been observed with // values v. The dimension order is preserved during conditioning, so if the value // of dimension 1 is observed, the returned normal represents dimensions {0, 2, ...} // of the original Normal distribution. // // ConditionNormal returns {nil, false} if there is a failure during the update. // Mathematically this is impossible, but can occur with finite precision arithmetic. func (n *Normal) ConditionNormal(observed []int, values []float64, src *rand.Rand) (*Normal, bool) { if len(observed) == 0 { panic("normal: no observed value") } if len(observed) != len(values) { panic("normal: input slice length mismatch") } for _, v := range observed { if v < 0 || v >= n.Dim() { panic("normal: observed value out of bounds") } } ob := len(observed) unob := n.Dim() - ob obMap := make(map[int]struct{}) for _, v := range observed { if _, ok := obMap[v]; ok { panic("normal: observed dimension occurs twice") } obMap[v] = struct{}{} } if len(observed) == n.Dim() { panic("normal: all dimensions observed") } unobserved := make([]int, 0, unob) for i := 0; i < n.Dim(); i++ { if _, ok := obMap[i]; !ok { unobserved = append(unobserved, i) } } mu1 := make([]float64, unob) for i, v := range unobserved { mu1[i] = n.mu[v] } mu2 := make([]float64, ob) // really v - mu2 for i, v := range observed { mu2[i] = values[i] - n.mu[v] } n.setSigma() var sigma11, sigma22 mat64.SymDense sigma11.SubsetSym(n.sigma, unobserved) sigma22.SubsetSym(n.sigma, observed) sigma21 := mat64.NewDense(ob, unob, nil) for i, r := range observed { for j, c := range unobserved { v := n.sigma.At(r, c) sigma21.Set(i, j, v) } } var chol mat64.Cholesky ok := chol.Factorize(&sigma22) if !ok { return nil, ok } // Compute sigma_{2,1}^T * sigma_{2,2}^-1 (v - mu_2). v := mat64.NewVector(ob, mu2) var tmp, tmp2 mat64.Vector err := tmp.SolveCholeskyVec(&chol, v) if err != nil { return nil, false } tmp2.MulVec(sigma21.T(), &tmp) // Compute sigma_{2,1}^T * sigma_{2,2}^-1 * sigma_{2,1}. // TODO(btracey): Should this be a method of SymDense? var tmp3, tmp4 mat64.Dense err = tmp3.SolveCholesky(&chol, sigma21) if err != nil { return nil, false } tmp4.Mul(sigma21.T(), &tmp3) for i := range mu1 { mu1[i] += tmp2.At(i, 0) } // TODO(btracey): If tmp2 can constructed with a method, then this can be // replaced with SubSym. for i := 0; i < len(unobserved); i++ { for j := i; j < len(unobserved); j++ { v := sigma11.At(i, j) sigma11.SetSym(i, j, v-tmp4.At(i, j)) } } return NewNormal(mu1, &sigma11, src) }