// MeanBatch predicts the mean at the set of locations specified by x. Stores in-place into yPred // If yPred is nil new memory is allocated. func (g *GP) MeanBatch(yPred []float64, x mat64.Matrix) []float64 { rx, cx := x.Dims() if cx != g.inputDim { panic(badInputLength) } if yPred == nil { yPred = make([]float64, rx) } ry := len(yPred) if rx != ry { panic(badOutputLength) } nSamples, _ := g.inputs.Dims() covariance := mat64.NewDense(nSamples, rx, nil) row := make([]float64, g.inputDim) for j := 0; j < rx; j++ { for k := 0; k < g.inputDim; k++ { row[k] = x.At(j, k) } for i := 0; i < nSamples; i++ { v := g.kernel.Distance(g.inputs.RawRowView(i), row) covariance.Set(i, j, v) } } yPredVec := mat64.NewVector(len(yPred), yPred) yPredVec.MulVec(covariance.T(), g.sigInvY) // Rescale the outputs for i, v := range yPred { yPred[i] = v*g.std + g.mean } return yPred }
// findLinearlyIndependnt finds a set of linearly independent columns of A, and // returns the column indexes of the linearly independent columns. func findLinearlyIndependent(A mat64.Matrix) []int { m, n := A.Dims() idxs := make([]int, 0, m) columns := mat64.NewDense(m, m, nil) newCol := make([]float64, m) // Walk in reverse order because slack variables are typically the last columns // of A. for i := n - 1; i >= 0; i-- { if len(idxs) == m { break } mat64.Col(newCol, i, A) if len(idxs) == 0 { // A column is linearly independent from the null set. // This is what needs to be changed if zero columns are allowed, as // a column of all zeros is not linearly independent from itself. columns.SetCol(len(idxs), newCol) idxs = append(idxs, i) continue } if linearlyDependent(mat64.NewVector(m, newCol), columns.View(0, 0, m, len(idxs))) { continue } columns.SetCol(len(idxs), newCol) idxs = append(idxs, i) } return idxs }
func columnMean(M mat.Matrix) mat.Matrix { r, c := M.Dims() SumMatrix := columnSum(M) switch t := SumMatrix.(type) { case *mat.Dense: M := mat.NewDense(1, c, nil) M.Scale(1/float64(r), SumMatrix) return M case mat.Mutable: _ = t V := SumMatrix.(mat.Mutable) _, cols := V.Dims() for i := 0; i < cols; i++ { V.Set(0, i, SumMatrix.At(0, i)/float64(r)) } return V default: panic("M is of an unknown type") } }
// Cov returns the covariance between a set of data points based on the current // GP fit. func (g *GP) Cov(m *mat64.SymDense, x mat64.Matrix) *mat64.SymDense { if m != nil { // TODO(btracey): Make this k** panic("resuing m not coded") } // The joint covariance matrix is // K(x_*, k_*) - k(x_*, x) k(x,x)^-1 k(x, x*) nSamp, nDim := x.Dims() if nDim != g.inputDim { panic(badInputLength) } // Compute K(x_*, x) K(x, x)^-1 K(x, x_*) kstar := g.formKStar(x) var tmp mat64.Dense tmp.SolveCholesky(g.cholK, kstar) var tmp2 mat64.Dense tmp2.Mul(kstar.T(), &tmp) // Compute k(x_*, x_*) and perform the subtraction. kstarstar := mat64.NewSymDense(nSamp, nil) for i := 0; i < nSamp; i++ { for j := i; j < nSamp; j++ { v := g.kernel.Distance(mat64.Row(nil, i, x), mat64.Row(nil, j, x)) if i == j { v += g.noise } kstarstar.SetSym(i, j, v-tmp2.At(i, j)) } } return kstarstar }
func benchmarkCovarianceMatrixInPlace(b *testing.B, m mat64.Matrix) { _, c := m.Dims() res := mat64.NewDense(c, c, nil) b.ResetTimer() for i := 0; i < b.N; i++ { CovarianceMatrix(res, m, nil) } }
// extractColumns creates a new matrix out of the columns of A specified by cols. // TODO(btracey): Allow this to take a receiver. func extractColumns(A mat64.Matrix, cols []int) *mat64.Dense { r, _ := A.Dims() sub := mat64.NewDense(r, len(cols), nil) col := make([]float64, r) for j, idx := range cols { mat64.Col(col, idx, A) sub.SetCol(j, col) } return sub }
// linearlyDependent returns whether the vector is linearly dependent // with the columns of A. It assumes that A is a full-rank matrix. func linearlyDependent(vec *mat64.Vector, A mat64.Matrix) bool { // Add vec to the columns of A, and see if the condition number is reasonable. m, n := A.Dims() aNew := mat64.NewDense(m, n+1, nil) aNew.Copy(A) col := mat64.Col(nil, 0, vec) aNew.SetCol(n, col) cond := mat64.Cond(aNew, 1) return cond > 1e12 }
func rowSum(M mat.Matrix) mat.Matrix { rows, _ := M.Dims() floatRes := make([]float64, rows) for i := 0; i < rows; i++ { floatRes[i] = mat.Sum(getRowVector(i, M)) } return mat.NewDense(rows, 1, floatRes) }
func columnSum(M mat.Matrix) mat.Matrix { _, cols := M.Dims() floatRes := make([]float64, cols) for i := 0; i < cols; i++ { floatRes[i] = mat.Sum(getColumnVector(i, M)) } return mat.NewDense(1, cols, floatRes) }
func MatrixToImage(src mat64.Matrix) image.Image { width, height := src.Dims() img := image.NewRGBA(image.Rect(0, 0, width, height)) for x := 0; x < width; x++ { for y := 0; y < height; y++ { img.Set(x, y, Float64ToColor(src.At(x, y))) } } return img }
func J(idx *mat.Vector, X, Mu mat.Matrix) float64 { Mux := ConstructXCentroidMatrix(idx, Mu) xRows, xCols := X.Dims() Diff := mat.NewDense(xRows, xCols, nil) Diff.Sub(X, Mux) Diff.MulElem(Diff, Diff) Diff = rowSum(Diff).(*mat.Dense) return columnSum(Diff).At(0, 0) / float64(xRows) }
// AssignCentroid assigns all of the examples in X to one of the groups // in Mu // X -> (m*n), Mu -> (K*n) // returns (m*1) func AssignCentroid(X, Mu mat.Matrix) *mat.Vector { m, _ := X.Dims() idx := mat.NewVector(m, nil) for i := 0; i < m; i++ { x := getRowVector(i, X) idx.SetVec(i, float64(NearestCentroid(x, Mu))) } return idx }
func getRowVector(index int, M mat.Matrix) *mat.Vector { _, cols := M.Dims() var rowData []float64 if cols == 0 { rowData = []float64{} } else { rowData = mat.Row(nil, index, M) } return mat.NewVector(cols, rowData) }
func benchmarkCovarianceMatrixWeighted(b *testing.B, m mat64.Matrix) { r, _ := m.Dims() wts := make([]float64, r) for i := range wts { wts[i] = 0.5 } b.ResetTimer() for i := 0; i < b.N; i++ { CovarianceMatrix(nil, m, wts) } }
// MoveCentroid computes the averages for all the points inside each of the cluster // centroid groups, then move the cluster centroid points to those averages. // It then returns the new Centroids func MoveCentroids(idx *mat.Vector, X, Mu mat.Matrix) mat.Matrix { muRows, muCols := Mu.Dims() NewMu := mat.NewDense(muRows, muCols, nil) for k := 0; k < muRows; k++ { CentroidKMean := columnMean(rowIndexIn(findIn(float64(k), idx), X)) NewMu.SetRow(k, mat.Row(nil, 0, CentroidKMean)) } return NewMu }
func getColumnVector(index int, M mat.Matrix) *mat.Vector { rows, _ := M.Dims() var colData []float64 if rows == 0 { colData = []float64{} } else { colData = mat.Col(nil, index, M) } return mat.NewVector(rows, colData) }
//Just a wrapper for the mat64.Dense.TCopy method func (F *Matrix) TCopy(A mat64.Matrix) { //NOTE: This function has been removed from gonum, hence I should remove it from here too. ******************* //Somehow the mat64.TCopy method seems to misbehave if I give it a mat64.Matrix. //Although I can't see a bug in the mat64.Dense.TCopy function, it seems that if I //call it with an A which is not a mat64.Dense, it doesn't work. That is why this wrapper //has not been deleted. This seems to be a bug in gochem somehow, not in gonum. if A, ok := A.(*Matrix); ok { F.Dense.Copy(A.Dense.T()) } else { F.Dense.Copy(A.T()) } }
func CropMatrix(src mat64.Matrix, x0, y0, x1, y1 int) (*mat64.Dense, error) { rows := y1 - y0 + 1 cols := x1 - x0 + 1 mtx := make([]float64, rows*cols) for x := x0; x <= x1; x++ { for y := y0; y <= y1; y++ { mtx[(y-y0)*cols+(x-x0)] = src.At(x, y) } } return mat64.NewDense(rows, cols, mtx), nil }
// NewComplex returns a complex matrix constructed from r and i. At least one of // r or i must be non-nil otherwise NewComplex will panic. If one of the inputs // is nil, that part of the complex number will be zero when returned by At. // If both are non-nil but differ in their sizes, NewComplex will panic. func NewComplex(r, i mat64.Matrix) Complex { if r == nil && i == nil { panic("conv: no matrix") } else if r != nil && i != nil { rr, rc := r.Dims() ir, ic := i.Dims() if rr != ir || rc != ic { panic(matrix.ErrShape) } } return Complex{r: r, i: i, imagSign: 1} }
func transpose(A mat64.Matrix) mat64.Matrix { r, s := A.Dims() var data []float64 for j := 0; j < s; j++ { for i := 0; i < r; i++ { data = append(data, A.At(i, j)) } } // Está medio chafa que regrese Dense, cómo hacemos para que regrese // el mismo tipo de A? return mat64.NewDense(s, r, data) }
// rowIndexIn returns a matrix contains the rows in indexes vector func rowIndexIn(indexes *mat.Vector, M mat.Matrix) mat.Matrix { m := indexes.Len() _, n := M.Dims() Res := mat.NewDense(m, n, nil) for i := 0; i < m; i++ { Res.SetRow(i, mat.Row( nil, int(indexes.At(i, 0)), M)) } return Res }
// CovarianceMatrix calculates a covariance matrix (also known as a // variance-covariance matrix) from a matrix of data, using a two-pass // algorithm. // // The weights must have length equal to the number of rows in // input data matrix x. If cov is nil, then a new matrix with appropriate size will // be constructed. If cov is not nil, it should have the same number of columns as the // input data matrix x, and it will be used as the destination for the covariance // data. Weights must not be negative. func CovarianceMatrix(cov *mat64.SymDense, x mat64.Matrix, weights []float64) *mat64.SymDense { // This is the matrix version of the two-pass algorithm. It doesn't use the // additional floating point error correction that the Covariance function uses // to reduce the impact of rounding during centering. r, c := x.Dims() if cov == nil { cov = mat64.NewSymDense(c, nil) } else if n := cov.Symmetric(); n != c { panic(matrix.ErrShape) } var xt mat64.Dense xt.Clone(x.T()) // Subtract the mean of each of the columns. for i := 0; i < c; i++ { v := xt.RawRowView(i) // This will panic with ErrShape if len(weights) != len(v), so // we don't have to check the size later. mean := Mean(v, weights) floats.AddConst(-mean, v) } if weights == nil { // Calculate the normalization factor // scaled by the sample size. cov.SymOuterK(1/(float64(r)-1), &xt) return cov } // Multiply by the sqrt of the weights, so that multiplication is symmetric. sqrtwts := make([]float64, r) for i, w := range weights { if w < 0 { panic("stat: negative covariance matrix weights") } sqrtwts[i] = math.Sqrt(w) } // Weight the rows. for i := 0; i < c; i++ { v := xt.RawRowView(i) floats.Mul(v, sqrtwts) } // Calculate the normalization factor // scaled by the weighted sample size. cov.SymOuterK(1/(floats.Sum(weights)-1), &xt) return cov }
// NearestCentroid returns the index of the row in Mu for which its // vector magnitude with x is the least. // x should be (n*1) space and M (K*n) space func NearestCentroid(x *mat.Vector, Mu mat.Matrix) (rowIndex int) { k, _ := Mu.Dims() rowIndex = 0 leastDistance := vectorDistance(x, getRowVector(rowIndex, Mu)) for i := 1; i < k; i++ { distance := vectorDistance(x, getRowVector(i, Mu)) if distance < leastDistance { leastDistance = distance rowIndex = i } } return }
// PrintMat imprime una matriz con un formato entendible func PrintMat(A mat64.Matrix) { r, s := A.Dims() fmt.Printf("(") for i := 0; i < r; i++ { fmt.Printf("\t") for j := 0; j < s; j++ { fmt.Printf("%0.1f\t", A.At(i, j)) } if i != r-1 { fmt.Printf("\n") } } fmt.Printf(")\n") }
//ScaleByCol scales each column of matrix A by Col, putting the result //in the received. func (F *Matrix) ScaleByCol(A, Col mat64.Matrix) { ar, ac := A.Dims() cr, cc := Col.Dims() fr, fc := F.Dims() if ar != cr || cc > 1 || ar != fr || ac != fc { panic(ErrShape) } if F != A { F.Copy(A) } for i := 0; i < ac; i++ { temp := F.ColView(i) temp.MulElem(temp, Col) } }
// VerifyInputs returns true if the number of rows in inputs is not the same // as the number of rows in outputs and the length of weights. As a special case, // the length of weights is allowed to be zero. func VerifyInputs(inputs, outputs mat64.Matrix, weights []float64) error { if inputs == nil || outputs == nil { return NoData } nSamples, _ := inputs.Dims() nOutputSamples, _ := outputs.Dims() nWeights := len(weights) if nSamples != nOutputSamples || (nWeights != 0 && nSamples != nWeights) { return DataMismatch{ Input: nSamples, Output: nOutputSamples, Weight: nWeights, } } return nil }
// PrincipalComponents returns the principal component direction vectors and // the column variances of the principal component scores, vecs * a, computed // using the singular value decomposition of the input. The input a is an n×d // matrix where each row is an observation and each column represents a variable. // // PrincipalComponents centers the variables but does not scale the variance. // // The slice weights is used to weight the observations. If weights is nil, // each weight is considered to have a value of one, otherwise the length of // weights must match the number of observations or PrincipalComponents will // panic. // // On successful completion, the principal component direction vectors are // returned in vecs as a d×min(n, d) matrix, and the variances are returned in // vars as a min(n, d)-long slice in descending sort order. // // If no singular value decomposition is possible, vecs and vars are returned // nil and ok is returned false. func PrincipalComponents(a mat64.Matrix, weights []float64) (vecs *mat64.Dense, vars []float64, ok bool) { n, d := a.Dims() if weights != nil && len(weights) != n { panic("stat: len(weights) != observations") } centered := mat64.NewDense(n, d, nil) col := make([]float64, n) for j := 0; j < d; j++ { mat64.Col(col, j, a) floats.AddConst(-Mean(col, weights), col) centered.SetCol(j, col) } for i, w := range weights { floats.Scale(math.Sqrt(w), centered.RawRowView(i)) } kind := matrix.SVDFull if n > d { kind = matrix.SVDThin } var svd mat64.SVD ok = svd.Factorize(centered, kind) if !ok { return nil, nil, false } vecs = &mat64.Dense{} vecs.VFromSVD(&svd) if n < d { // Don't retain columns that are not valid direction vectors. vecs.Clone(vecs.View(0, 0, d, n)) } vars = svd.Values(nil) var f float64 if weights == nil { f = 1 / float64(n-1) } else { f = 1 / (floats.Sum(weights) - 1) } for i, v := range vars { vars[i] = f * v * v } return vecs, vars, true }
// Given number of clusters K and the training set X of dimension (m*n) // InitializeCentroids returns matrix Mu of dimension (K*n) // The steps to initialize the centroids are as follows // 1. Randomly pick K training examples (Make sure the selected examples are unique) // 2. Set Mu to the K examples func InitializeCentroids(K int, X mat.Matrix) (Mu mat.Matrix) { m, n := X.Dims() // panic if K >= m if K >= m { panic("K should be less than the size of the training set") } randomIndexes := randArray(0, m, K, true) // 1. pick K training examples // 2. set Mu Mu = mat.NewDense(K, n, nil) for i := 0; i < K; i++ { Mu.(*mat.Dense).SetRow(i, mat.Row(nil, randomIndexes[i], X)) } return }
// formKStar forms the covariance matrix between the inputs and new points. func (g *GP) formKStar(x mat64.Matrix) *mat64.Dense { // TODO(btracey): Parallelize r, c := x.Dims() n := len(g.outputs) kStar := mat64.NewDense(n, r, nil) data := make([]float64, c) for j := 0; j < r; j++ { for k := 0; k < c; k++ { data[k] = x.At(j, k) } for i := 0; i < n; i++ { row := g.inputs.RawRowView(i) v := g.kernel.Distance(row, data) kStar.Set(i, j, v) } } return kStar }
// StdDevBatch predicts the standard deviation at a set of locations of x. func (g *GP) StdDevBatch(std []float64, x mat64.Matrix) []float64 { r, c := x.Dims() if c != g.inputDim { panic(badInputLength) } if std == nil { std = make([]float64, r) } if len(std) != r { panic(badStorage) } // For a single point, the stddev is // sigma = k(x,x) - k_*^T * K^-1 * k_* // where k is the vector of kernels between the input points and the output points // For many points, the formula is: // nu_* = k(x_*, k_*) - k_*^T * K^-1 * k_* // This creates the full covariance matrix which is an rxr matrix. However, // the standard deviations are just the diagonal of this matrix. Instead, be // smart about it and compute the diagonal terms one at a time. kStar := g.formKStar(x) var tmp mat64.Dense tmp.SolveCholesky(g.cholK, kStar) // set k(x_*, x_*) into std then subtract k_*^T K^-1 k_* , computed one row at a time var tmp2 mat64.Vector row := make([]float64, c) for i := range std { for k := 0; k < c; k++ { row[k] = x.At(i, k) } std[i] = g.kernel.Distance(row, row) tmp2.MulVec(kStar.ColView(i).T(), tmp.ColView(i)) rt, ct := tmp2.Dims() if rt != 1 && ct != 1 { panic("bad size") } std[i] -= tmp2.At(0, 0) std[i] = math.Sqrt(std[i]) } // Need to scale the standard deviation to be in the same units as y. floats.Scale(g.std, std) return std }