func TestWeightedTimeSeeded(t *testing.T) { if !*prob { t.Skip("probabilistic testing not requested") } t.Log("Note: This test is stochastic and is expected to fail with probability ≈ 0.05.") rand.Seed(time.Now().Unix()) f := make([]float64, len(obt)) for i := 0; i < 1e6; i++ { item, ok := newTestWeighted().Take() if !ok { t.Fatal("Weighted unexpectedly empty") } f[item]++ } exp := newExp() fac := floats.Sum(f) / floats.Sum(exp) for i := range f { exp[i] *= fac } // Check that our obtained values are within statistical expectations for p = 0.05. // This will not be true approximately 1 in 20 tests. X := chi2(f, exp) if X >= sigChi2 { t.Errorf("H₀: d(Sample) = d(Expect), H₁: d(S) ≠ d(Expect). df = %d, p = 0.05, X² threshold = %.2f, X² = %f", len(f)-1, sigChi2, X) } }
func TestWeightIncrease(t *testing.T) { rand.Seed(0) want := Weighted{ weights: []float64{1 << 0, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 9 * 2, 1 << 7, 1 << 8, 1 << 9}, heap: []float64{ exp[0] + exp[1] + exp[3] + exp[4] + exp[7] + exp[8] + exp[9] + exp[2] + exp[5] + exp[9]*2, exp[1] + exp[3] + exp[4] + exp[7] + exp[8] + exp[9], exp[2] + exp[5] + exp[9]*2, exp[3] + exp[7] + exp[8], exp[4] + exp[9], exp[5], exp[9] * 2, exp[7], exp[8], exp[9], }, } ts := newTestWeighted() ts.Reweight(6, ts.weights[len(ts.weights)-1]*2) if !reflect.DeepEqual(ts, want) { t.Fatalf("unexpected new Weighted value:\ngot: %#v\nwant:%#v", ts, want) } f := make([]float64, len(obt)) for i := 0; i < 1e6; i++ { ts := newTestWeighted() ts.Reweight(6, ts.weights[len(ts.weights)-1]*2) item, ok := ts.Take() if !ok { t.Fatal("Weighted unexpectedly empty") } f[item]++ } exp := newExp() fac := floats.Sum(f) / floats.Sum(exp) for i := range f { exp[i] *= fac } if f[6] < f[9] { t.Errorf("unexpected selection rate for re-weighted item: got: %v want:%v", f[6], f[9]) } if reflect.DeepEqual(f[:6], obt[:6]) { t.Fatal("unexpected selection: too many elements chosen in range:\ngot: %v\nwant:%v", f[:6], obt[:6]) } if reflect.DeepEqual(f[7:], obt[7:]) { t.Fatal("unexpected selection: too many elements chosen in range:\ngot: %v\nwant:%v", f[7:], obt[7:]) } }
func TestWeightedUnseeded(t *testing.T) { rand.Seed(0) want := Weighted{ weights: []float64{1 << 0, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7, 1 << 8, 1 << 9}, heap: []float64{ exp[0] + exp[1] + exp[3] + exp[4] + exp[7] + exp[8] + exp[9] + exp[2] + exp[5] + exp[6], exp[1] + exp[3] + exp[4] + exp[7] + exp[8] + exp[9], exp[2] + exp[5] + exp[6], exp[3] + exp[7] + exp[8], exp[4] + exp[9], exp[5], exp[6], exp[7], exp[8], exp[9], }, } ts := newTestWeighted() if !reflect.DeepEqual(ts, want) { t.Fatalf("unexpected new Weighted value:\ngot: %#v\nwant:%#v", ts, want) } f := make([]float64, len(obt)) for i := 0; i < 1e6; i++ { item, ok := newTestWeighted().Take() if !ok { t.Fatal("Weighted unexpectedly empty") } f[item]++ } exp := newExp() fac := floats.Sum(f) / floats.Sum(exp) for i := range f { exp[i] *= fac } if !reflect.DeepEqual(f, obt) { t.Fatalf("unexpected selection:\ngot: %#v\nwant:%#v", f, obt) } // Check that this is within statistical expectations - we know this is true for this set. X := chi2(f, exp) if X >= sigChi2 { t.Errorf("H₀: d(Sample) = d(Expect), H₁: d(S) ≠ d(Expect). df = %d, p = 0.05, X² threshold = %.2f, X² = %f", len(f)-1, sigChi2, X) } }
func TestCategoricalCDF(t *testing.T) { for _, test := range [][]float64{ {1, 2, 3, 0, 4}, } { c := make([]float64, len(test)) copy(c, test) floats.Scale(1/floats.Sum(c), c) sum := make([]float64, len(test)) floats.CumSum(sum, c) dist := NewCategorical(test, nil) cdf := dist.CDF(-0.5) if cdf != 0 { t.Errorf("CDF of negative number not zero") } for i := range c { cdf := dist.CDF(float64(i)) if math.Abs(cdf-sum[i]) > 1e-14 { t.Errorf("CDF mismatch %v. Want %v, got %v.", float64(i), sum[i], cdf) } cdfp := dist.CDF(float64(i) + 0.5) if cdfp != cdf { t.Errorf("CDF mismatch for non-integer input") } } } }
func TestCategoricalProb(t *testing.T) { for _, test := range [][]float64{ {1, 2, 3, 0}, } { dist := NewCategorical(test, nil) norm := make([]float64, len(test)) floats.Scale(1/floats.Sum(norm), norm) for i, v := range norm { p := dist.Prob(float64(i)) if math.Abs(p-v) > 1e-14 { t.Errorf("Probability mismatch element %d", i) } p = dist.Prob(float64(i) + 0.5) if p != 0 { t.Errorf("Non-zero probability for non-integer x") } } p := dist.Prob(-1) if p != 0 { t.Errorf("Non-zero probability for -1") } p = dist.Prob(float64(len(test))) if p != 0 { t.Errorf("Non-zero probability for len(test)") } } }
// locationAsy returns the node locations and weights of a Hermite quadrature rule // with len(x) points. func (h Hermite) locationsAsy(x, w []float64) { // A. Townsend, T. Trogdon, and S.Olver, Fast computation of Gauss quadrature // nodes and weights the whole real line, IMA J. Numer. Anal., // 36: 337–358, 2016. http://arxiv.org/abs/1410.5286 // Find the positive locations and weights. n := len(x) l := n / 2 xa := x[l:] wa := w[l:] for i := range xa { xa[i], wa[i] = h.locationsAsy0(i, n) } // Flip around zero -- copy the negative x locations with the corresponding // weights. if n%2 == 0 { l-- } for i, v := range xa { x[l-i] = -v } for i, v := range wa { w[l-i] = v } sumW := floats.Sum(w) c := math.SqrtPi / sumW floats.Scale(c, w) }
// Estimate computes model parameters using sufficient statistics. func (g *Model) Estimate() error { if g.NSamples > minNumSamples { /* Estimate the mean. */ floatx.Apply(floatx.ScaleFunc(1.0/g.NSamples), g.Sumx, g.Mean) /* * Estimate the variance. sigma_sq = 1/n (sumxsq - 1/n sumx^2) or * 1/n sumxsq - mean^2. */ tmp := g.variance // borrow as an intermediate array. // floatx.Apply(sq, g.Mean, g.tmpArray) floatx.Sq(g.tmpArray, g.Mean) floatx.Apply(floatx.ScaleFunc(1.0/g.NSamples), g.Sumxsq, tmp) floats.SubTo(g.variance, tmp, g.tmpArray) floatx.Apply(floatx.Floorv(smallVar), g.variance, nil) } else { /* Not enough training sample. */ glog.Warningf("not enough training samples, name [%s], num samples [%e]", g.ModelName, g.NSamples) floatx.Apply(floatx.SetValueFunc(smallVar), g.variance, nil) floatx.Apply(floatx.SetValueFunc(0), g.Mean, nil) } g.setVariance(g.variance) // to update varInv and stddev. /* Update log Gaussian constant. */ floatx.Log(g.tmpArray, g.variance) g.const2 = g.const1 - floats.Sum(g.tmpArray)/2.0 glog.V(6).Infof("gaussian reest, name:%s, mean:%v, sd:%v", g.ModelName, g.Mean, g.StdDev) return nil }
func fitnessRMSE(ind, targ *imgut.Image) float64 { // Images to vector dataInd := imgut.ToSlice(ind) dataTarg := imgut.ToSlice(targ) // (root mean square) error floats.Sub(dataInd, dataTarg) // (root mean) square error floats.Mul(dataInd, dataInd) // (root) mean square error totErr := floats.Sum(dataInd) return math.Sqrt(totErr / float64(len(dataInd))) }
func ExampleStdErr() { x := []float64{8, 2, -9, 15, 4} weights := []float64{2, 2, 6, 7, 1} mean := Mean(x, weights) stdev := StdDev(x, weights) nSamples := floats.Sum(weights) stdErr := StdErr(stdev, nSamples) fmt.Printf("The standard deviation is %.4f and there are %g samples, so the mean\nis likely %.4f ± %.4f.", stdev, nSamples, mean, stdErr) // Output: // The standard deviation is 10.5733 and there are 18 samples, so the mean // is likely 4.1667 ± 2.4921. }
func main() { runtime.GOMAXPROCS(runtime.NumCPU() - 2) gopath := os.Getenv("GOPATH") path := filepath.Join(gopath, "prof", "github.com", "reggo", "reggo", "nnet") nInputs := 10 nOutputs := 3 nLayers := 2 nNeurons := 50 nSamples := 1000000 nRuns := 50 config := &profile.Config{ CPUProfile: true, ProfilePath: path, } defer profile.Start(config).Stop() net, err := nnet.NewSimpleTrainer(nInputs, nOutputs, nLayers, nNeurons, nnet.Linear{}) if err != nil { log.Fatal(err) } // Generate some random data inputs := mat64.NewDense(nSamples, nInputs, nil) outputs := mat64.NewDense(nSamples, nOutputs, nil) for i := 0; i < nSamples; i++ { for j := 0; j < nInputs; j++ { inputs.Set(i, j, rand.Float64()) } for j := 0; j < nOutputs; j++ { outputs.Set(i, j, rand.Float64()) } } // Create trainer prob := train.NewBatchGradBased(net, true, inputs, outputs, nil, nil, nil) nParameters := net.NumParameters() parameters := make([]float64, nParameters) derivative := make([]float64, nParameters) for i := 0; i < nRuns; i++ { net.RandomizeParameters() net.Parameters(parameters) prob.ObjGrad(parameters, derivative) fmt.Println(floats.Sum(derivative)) } }
func MakeFitLinScale(targetImage *imgut.Image) func(*imgut.Image) float64 { // Pre-compute image to slice of floats dataTarg := imgut.ToSlice(targetImage) // Pre-compute average avgt := floats.Sum(dataTarg) / float64(len(dataTarg)) return func(indImage *imgut.Image) float64 { // Images to vector dataInd := imgut.ToSlice(indImage) // Compute average pixels avgy := floats.Sum(dataInd) / float64(len(dataInd)) // Difference y - avgy y_avgy := make([]float64, len(dataInd)) copy(y_avgy, dataInd) floats.AddConst(-avgy, y_avgy) // Difference t - avgt t_avgt := make([]float64, len(dataTarg)) copy(t_avgt, dataTarg) floats.AddConst(-avgt, t_avgt) // Multuplication (t - avgt)(y - avgy) floats.Mul(t_avgt, y_avgy) // Summation numerator := floats.Sum(t_avgt) // Square (y - avgy)^2 floats.Mul(y_avgy, y_avgy) denomin := floats.Sum(y_avgy) // Compute b-value b := numerator / denomin // Compute a-value a := avgt - b*avgy // Compute now the scaled RMSE, using y' = a + b*y floats.Scale(b, dataInd) // b*y floats.AddConst(a, dataInd) // a + b*y floats.Sub(dataInd, dataTarg) // (a + b * y - t) floats.Mul(dataInd, dataInd) // (a + b * y - t)^2 total := floats.Sum(dataInd) // Sum(...) return math.Sqrt(total / float64(len(dataInd))) } }
func MakeFitMSE(targetImage *imgut.Image) func(*imgut.Image) float64 { dataTarg := imgut.ToSliceChans(targetImage, "R") return func(indImage *imgut.Image) float64 { // Get data dataImg := imgut.ToSliceChans(indImage, "R") // Difference (X - Y) floats.Sub(dataImg, dataTarg) // Squared (X - Y)^2 floats.Mul(dataImg, dataImg) // Summation return floats.Sum(dataImg) / float64(len(dataImg)) } }
func sampleCategorical(t *testing.T, dist Categorical, nSamples int) []float64 { counts := make([]float64, dist.Len()) for i := 0; i < nSamples; i++ { v := dist.Rand() if float64(int(v)) != v { t.Fatalf("Random number is not an integer") } counts[int(v)]++ } sum := floats.Sum(counts) floats.Scale(1/sum, counts) return counts }
// CovarianceMatrix calculates a covariance matrix (also known as a // variance-covariance matrix) from a matrix of data, using a two-pass // algorithm. // // The weights must have length equal to the number of rows in // input data matrix x. If cov is nil, then a new matrix with appropriate size will // be constructed. If cov is not nil, it should have the same number of columns as the // input data matrix x, and it will be used as the destination for the covariance // data. Weights must not be negative. func CovarianceMatrix(cov *mat64.SymDense, x mat64.Matrix, weights []float64) *mat64.SymDense { // This is the matrix version of the two-pass algorithm. It doesn't use the // additional floating point error correction that the Covariance function uses // to reduce the impact of rounding during centering. r, c := x.Dims() if cov == nil { cov = mat64.NewSymDense(c, nil) } else if n := cov.Symmetric(); n != c { panic(matrix.ErrShape) } var xt mat64.Dense xt.Clone(x.T()) // Subtract the mean of each of the columns. for i := 0; i < c; i++ { v := xt.RawRowView(i) // This will panic with ErrShape if len(weights) != len(v), so // we don't have to check the size later. mean := Mean(v, weights) floats.AddConst(-mean, v) } if weights == nil { // Calculate the normalization factor // scaled by the sample size. cov.SymOuterK(1/(float64(r)-1), &xt) return cov } // Multiply by the sqrt of the weights, so that multiplication is symmetric. sqrtwts := make([]float64, r) for i, w := range weights { if w < 0 { panic("stat: negative covariance matrix weights") } sqrtwts[i] = math.Sqrt(w) } // Weight the rows. for i := 0; i < c; i++ { v := xt.RawRowView(i) floats.Mul(v, sqrtwts) } // Calculate the normalization factor // scaled by the weighted sample size. cov.SymOuterK(1/(floats.Sum(weights)-1), &xt) return cov }
// LogDet returns the log of the determinant and the sign of the determinant // for the matrix that has been factorized. Numerical stability in product and // division expressions is generally improved by working in log space. func (lu *LU) LogDet() (det float64, sign float64) { _, n := lu.lu.Dims() logDiag := make([]float64, n) sign = 1.0 for i := 0; i < n; i++ { v := lu.lu.at(i, i) if v < 0 { sign *= -1 } if lu.pivot[i] != i { sign *= -1 } logDiag[i] = math.Log(math.Abs(v)) } return floats.Sum(logDiag), sign }
// Mean computes the weighted mean of the data set. // sum_i {w_i * x_i} / sum_i {w_i} // If weights is nil then all of the weights are 1. If weights is not nil, then // len(x) must equal len(weights). func Mean(x, weights []float64) float64 { if weights == nil { return floats.Sum(x) / float64(len(x)) } if len(x) != len(weights) { panic("stat: slice length mismatch") } var ( sumValues float64 sumWeights float64 ) for i, w := range weights { sumValues += w * x[i] sumWeights += w } return sumValues / sumWeights }
// PrincipalComponents returns the principal component direction vectors and // the column variances of the principal component scores, vecs * a, computed // using the singular value decomposition of the input. The input a is an n×d // matrix where each row is an observation and each column represents a variable. // // PrincipalComponents centers the variables but does not scale the variance. // // The slice weights is used to weight the observations. If weights is nil, // each weight is considered to have a value of one, otherwise the length of // weights must match the number of observations or PrincipalComponents will // panic. // // On successful completion, the principal component direction vectors are // returned in vecs as a d×min(n, d) matrix, and the variances are returned in // vars as a min(n, d)-long slice in descending sort order. // // If no singular value decomposition is possible, vecs and vars are returned // nil and ok is returned false. func PrincipalComponents(a mat64.Matrix, weights []float64) (vecs *mat64.Dense, vars []float64, ok bool) { n, d := a.Dims() if weights != nil && len(weights) != n { panic("stat: len(weights) != observations") } centered := mat64.NewDense(n, d, nil) col := make([]float64, n) for j := 0; j < d; j++ { mat64.Col(col, j, a) floats.AddConst(-Mean(col, weights), col) centered.SetCol(j, col) } for i, w := range weights { floats.Scale(math.Sqrt(w), centered.RawRowView(i)) } kind := matrix.SVDFull if n > d { kind = matrix.SVDThin } var svd mat64.SVD ok = svd.Factorize(centered, kind) if !ok { return nil, nil, false } vecs = &mat64.Dense{} vecs.VFromSVD(&svd) if n < d { // Don't retain columns that are not valid direction vectors. vecs.Clone(vecs.View(0, 0, d, n)) } vars = svd.Values(nil) var f float64 if weights == nil { f = 1 / float64(n-1) } else { f = 1 / (floats.Sum(weights) - 1) } for i, v := range vars { vars[i] = f * v * v } return vecs, vars, true }
// CDF returns the empirical cumulative distribution function value of x, that is // the fraction of the samples less than or equal to q. The // exact behavior is determined by the CumulantKind. CDF is theoretically // the inverse of the Quantile function, though it may not be the actual inverse // for all values q and CumulantKinds. // // The x data must be sorted in increasing order. If weights is nil then all // of the weights are 1. If weights is not nil, then len(x) must equal len(weights). // // CumulantKind behaviors: // - Empirical: Returns the lowest fraction for which q is greater than or equal // to that fraction of samples func CDF(q float64, c CumulantKind, x, weights []float64) float64 { if weights != nil && len(x) != len(weights) { panic("stat: slice length mismatch") } if floats.HasNaN(x) { return math.NaN() } if !sort.Float64sAreSorted(x) { panic("x data are not sorted") } if q < x[0] { return 0 } if q >= x[len(x)-1] { return 1 } var sumWeights float64 if weights == nil { sumWeights = float64(len(x)) } else { sumWeights = floats.Sum(weights) } // Calculate the index switch c { case Empirical: // Find the smallest value that is greater than that percent of the samples var w float64 for i, v := range x { if v > q { return w / sumWeights } if weights == nil { w++ } else { w += weights[i] } } panic("impossible") default: panic("stat: bad cumulant kind") } }
// Fit sets the parameters of the probability distribution from the // data samples x with relative weights w. // If weights is nil, then all the weights are 1. // If weights is not nil, then the len(weights) must equal len(samples). // // Note: Laplace distribution has no FitPrior because it has no sufficient // statistics. func (l *Laplace) Fit(samples, weights []float64) { if len(samples) != len(weights) { panic(badLength) } if len(samples) == 0 { panic(badNoSamples) } if len(samples) == 1 { l.Mu = samples[0] l.Scale = 0 return } var ( sortedSamples []float64 sortedWeights []float64 ) if sort.Float64sAreSorted(samples) { sortedSamples = samples sortedWeights = weights } else { // Need to copy variables so the input variables aren't effected by the sorting sortedSamples = make([]float64, len(samples)) copy(sortedSamples, samples) sortedWeights := make([]float64, len(samples)) copy(sortedWeights, weights) stat.SortWeighted(sortedSamples, sortedWeights) } // The (weighted) median of the samples is the maximum likelihood estimate // of the mean parameter // TODO: Rethink quantile type when stat has more options l.Mu = stat.Quantile(0.5, stat.Empirical, sortedSamples, sortedWeights) sumWeights := floats.Sum(weights) // The scale parameter is the average absolute distance // between the sample and the mean absError := stat.MomentAbout(1, samples, l.Mu, weights) l.Scale = absError / sumWeights }
// SuffStat computes the sufficient statistics of set of samples to update // the distribution. The sufficient statistics are stored in place, and the // effective number of samples are returned. // // The exponential distribution has one sufficient statistic, the average rate // of the samples. // // If weights is nil, the weights are assumed to be 1, otherwise panics if // len(samples) != len(weights). Panics if len(suffStat) != 1. func (Exponential) SuffStat(samples, weights, suffStat []float64) (nSamples float64) { if len(weights) != 0 && len(samples) != len(weights) { panic("dist: slice size mismatch") } if len(suffStat) != 1 { panic("exponential: wrong suffStat length") } if len(weights) == 0 { nSamples = float64(len(samples)) } else { nSamples = floats.Sum(weights) } mean := stat.Mean(samples, weights) suffStat[0] = 1 / mean return nSamples }
// SuffStat computes the sufficient statistics of set of samples to update // the distribution. The sufficient statistics are stored in place, and the // effective number of samples are returned. // // The exponential distribution has one sufficient statistic, the average rate // of the samples. // // If weights is nil, the weights are assumed to be 1, otherwise panics if // len(samples) != len(weights). Panics if len(suffStat) != 1. func (Exponential) SuffStat(samples, weights, suffStat []float64) (nSamples float64) { if len(weights) != 0 && len(samples) != len(weights) { panic(badLength) } if len(suffStat) != 1 { panic(badSuffStat) } if len(weights) == 0 { nSamples = float64(len(samples)) } else { nSamples = floats.Sum(weights) } mean := stat.Mean(samples, weights) suffStat[0] = 1 / mean return nSamples }
// NewModel creates a new Gaussian model. func NewModel(dim int, options ...Option) *Model { g := &Model{ ModelName: "Gaussian", ModelDim: dim, Diag: true, variance: make([]float64, dim), varianceInv: make([]float64, dim), tmpArray: make([]float64, dim), } g.Type = reflect.TypeOf(*g).String() // Set options. for _, option := range options { option(g) } if len(g.Sumx) == 0 { g.Sumx = make([]float64, dim) } if len(g.Sumxsq) == 0 { g.Sumxsq = make([]float64, dim) } if g.Mean == nil { g.Mean = make([]float64, dim) } if g.StdDev == nil { g.StdDev = make([]float64, dim) floatx.Apply(floatx.SetValueFunc(smallSD), g.StdDev, nil) } floatx.Sq(g.variance, g.StdDev) // Initializes variance, varianceInv, and StdDev. g.setVariance(g.variance) floatx.Log(g.tmpArray, g.variance) g.const1 = -float64(g.ModelDim) * math.Log(2.0*math.Pi) / 2.0 g.const2 = g.const1 - floats.Sum(g.tmpArray)/2.0 return g }
// Quantile returns the sample of x such that x is greater than or // equal to the fraction p of samples. The exact behavior is determined by the // CumulantKind, and p should be a number between 0 and 1. Quantile is theoretically // the inverse of the CDF function, though it may not be the actual inverse // for all values p and CumulantKinds. // // The x data must be sorted in increasing order. If weights is nil then all // of the weights are 1. If weights is not nil, then len(x) must equal len(weights). // // CumulantKind behaviors: // - Empirical: Returns the lowest value q for which q is greater than or equal // to the fraction p of samples func Quantile(p float64, c CumulantKind, x, weights []float64) float64 { if !(p >= 0 && p <= 1) { panic("stat: percentile out of bounds") } if weights != nil && len(x) != len(weights) { panic("stat: slice length mismatch") } if floats.HasNaN(x) { return math.NaN() // This is needed because the algorithm breaks otherwise } if !sort.Float64sAreSorted(x) { panic("x data are not sorted") } var sumWeights float64 if weights == nil { sumWeights = float64(len(x)) } else { sumWeights = floats.Sum(weights) } switch c { case Empirical: var cumsum float64 fidx := p * sumWeights for i := range x { if weights == nil { cumsum++ } else { cumsum += weights[i] } if cumsum >= fidx { return x[i] } } panic("impossible") default: panic("stat: bad cumulant kind") } }
// SuffStat computes the sufficient statistics of a set of samples to update // the distribution. The sufficient statistics are stored in place, and the // effective number of samples are returned. // // The normal distribution has two sufficient statistics, the mean of the samples // and the standard deviation of the samples. // // If weights is nil, the weights are assumed to be 1, otherwise panics if // len(samples) != len(weights). Panics if len(suffStat) != 2. func (Normal) SuffStat(samples, weights, suffStat []float64) (nSamples float64) { lenSamp := len(samples) if len(weights) != 0 && len(samples) != len(weights) { panic("dist: slice size mismatch") } if len(suffStat) != 2 { panic("dist: incorrect suffStat length") } if len(weights) == 0 { nSamples = float64(lenSamp) } else { nSamples = floats.Sum(weights) } mean := stat.Mean(samples, weights) suffStat[0] = mean // Use Moment and not StdDev because we want it to be uncorrected variance := stat.Moment(2, samples, mean, weights) suffStat[1] = math.Sqrt(variance) return nSamples }
// CovarianceMatrix calculates a covariance matrix (also known as a // variance-covariance matrix) from a matrix of data, using a two-pass // algorithm. The matrix returned will be symmetric and square. // // The weights wts should have the length equal to the number of rows in // input data matrix x. If c is nil, then a new matrix with appropriate size will // be constructed. If c is not nil, it should be a square matrix with the same // number of columns as the input data matrix x, and it will be used as the receiver // for the covariance data. Weights cannot be negative. func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.Dense { // This is the matrix version of the two-pass algorithm. It doesn't use the // additional floating point error correction that the Covariance function uses // to reduce the impact of rounding during centering. // TODO(jonlawlor): indicate that the resulting matrix is symmetric, and change // the returned type from a *mat.Dense to a *mat.Symmetric. r, c := x.Dims() if cov == nil { cov = mat64.NewDense(c, c, nil) } else if covr, covc := cov.Dims(); covr != covc || covc != c { panic(mat64.ErrShape) } var xt mat64.Dense xt.TCopy(x) // Subtract the mean of each of the columns. for i := 0; i < c; i++ { v := xt.RawRowView(i) // This will panic with ErrShape if len(wts) != len(v), so // we don't have to check the size later. mean := Mean(v, wts) floats.AddConst(-mean, v) } var n float64 if wts == nil { n = float64(r) cov.MulTrans(&xt, false, &xt, true) // Scale by the sample size. cov.Scale(1/(n-1), cov) return cov } // Multiply by the sqrt of the weights, so that multiplication is symmetric. sqrtwts := make([]float64, r) for i, w := range wts { if w < 0 { panic("stat: negative covariance matrix weights") } sqrtwts[i] = math.Sqrt(w) } // Weight the rows. for i := 0; i < c; i++ { v := xt.RawRowView(i) floats.Mul(v, sqrtwts) } // Calculate the normalization factor. n = floats.Sum(wts) cov.MulTrans(&xt, false, &xt, true) // Scale by the sample size. cov.Scale(1/(n-1), cov) return cov }
func main() { fmt.Println("Sum: ", floats.Sum(a)) fmt.Println("Product:", floats.Prod(a)) }
// KolmogorovSmirnov computes the largest distance between two empirical CDFs. // Each dataset x and y consists of sample locations and counts, xWeights and // yWeights, respectively. // // x and y may have different lengths, though len(x) must equal len(xWeights), and // len(y) must equal len(yWeights). Both x and y must be sorted. // // Special cases are: // = 0 if len(x) == len(y) == 0 // = 1 if len(x) == 0, len(y) != 0 or len(x) != 0 and len(y) == 0 func KolmogorovSmirnov(x, xWeights, y, yWeights []float64) float64 { if xWeights != nil && len(x) != len(xWeights) { panic("stat: slice length mismatch") } if yWeights != nil && len(y) != len(yWeights) { panic("stat: slice length mismatch") } if len(x) == 0 || len(y) == 0 { if len(x) == 0 && len(y) == 0 { return 0 } return 1 } if floats.HasNaN(x) { return math.NaN() } if floats.HasNaN(y) { return math.NaN() } if !sort.Float64sAreSorted(x) { panic("x data are not sorted") } if !sort.Float64sAreSorted(y) { panic("y data are not sorted") } xWeightsNil := xWeights == nil yWeightsNil := yWeights == nil var ( maxDist float64 xSum, ySum float64 xCdf, yCdf float64 xIdx, yIdx int ) if xWeightsNil { xSum = float64(len(x)) } else { xSum = floats.Sum(xWeights) } if yWeightsNil { ySum = float64(len(y)) } else { ySum = floats.Sum(yWeights) } xVal := x[0] yVal := y[0] // Algorithm description: // The goal is to find the maximum difference in the empirical CDFs for the // two datasets. The CDFs are piecewise-constant, and thus the distance // between the CDFs will only change at the values themselves. // // To find the maximum distance, step through the data in ascending order // of value between the two datasets. At each step, compute the empirical CDF // and compare the local distance with the maximum distance. // Due to some corner cases, equal data entries must be tallied simultaneously. for { switch { case xVal < yVal: xVal, xCdf, xIdx = updateKS(xIdx, xCdf, xSum, x, xWeights, xWeightsNil) case yVal < xVal: yVal, yCdf, yIdx = updateKS(yIdx, yCdf, ySum, y, yWeights, yWeightsNil) case xVal == yVal: newX := x[xIdx] newY := y[yIdx] if newX < newY { xVal, xCdf, xIdx = updateKS(xIdx, xCdf, xSum, x, xWeights, xWeightsNil) } else if newY < newX { yVal, yCdf, yIdx = updateKS(yIdx, yCdf, ySum, y, yWeights, yWeightsNil) } else { // Update them both, they'll be equal next time and the right // thing will happen xVal, xCdf, xIdx = updateKS(xIdx, xCdf, xSum, x, xWeights, xWeightsNil) yVal, yCdf, yIdx = updateKS(yIdx, yCdf, ySum, y, yWeights, yWeightsNil) } default: panic("unreachable") } dist := math.Abs(xCdf - yCdf) if dist > maxDist { maxDist = dist } // Both xCdf and yCdf will equal 1 at the end, so if we have reached the // end of either sample list, the distance is as large as it can be. if xIdx == len(x) || yIdx == len(y) { return maxDist } } }
func (b *BatchGradient) funcGrad(params, deriv []float64) float64 { nParameters := len(deriv) // Send out all of the work done := make(chan result) sz := b.nSamples / b.Workers sent := 0 for i := 0; i < b.Workers; i++ { outputDim := b.outputDim last := sent + sz if i == b.Workers-1 { last = b.nSamples } go func(sent, last int) { lossDeriver := b.Trainable.NewLossDeriver() predOutput := make([]float64, outputDim) dLossDPred := make([]float64, outputDim) dLossDParam := make([]float64, nParameters) outputs := make([]float64, outputDim) tmpderiv := make([]float64, nParameters) var totalLoss float64 for i := sent; i < last; i++ { lossDeriver.Predict(params, b.features.RawRowView(i), predOutput) b.Outputs.Row(outputs, i) loss := b.Losser.LossDeriv(predOutput, outputs, dLossDPred) if b.Weights == nil { totalLoss += loss } else { totalLoss += b.Weights[i] * loss } lossDeriver.Deriv(params, b.features.RawRowView(i), predOutput, dLossDPred, dLossDParam) if b.Weights != nil { floats.Scale(b.Weights[i], dLossDParam) } floats.Add(tmpderiv, dLossDParam) } done <- result{totalLoss, tmpderiv} }(sent, last) sent += sz } // Collect all the results var totalLoss float64 for i := range deriv { deriv[i] = 0 } for i := 0; i < b.Workers; i++ { w := <-done totalLoss += w.loss floats.Add(deriv, w.deriv) } // Compute the regularizer if b.Regularizer != nil { tmp := make([]float64, nParameters) totalLoss += b.Regularizer.LossDeriv(params, tmp) floats.Add(deriv, tmp) } sumWeights := float64(b.nSamples) if b.Weights != nil { sumWeights = floats.Sum(b.Weights) } totalLoss /= sumWeights floats.Scale(1/sumWeights, deriv) return totalLoss }
// Normalize the vector of value, summing them and dividing each by the total func normalSlice(v []float64) { tot := floats.Sum(v) floats.Scale(1.0/tot, v) }
func (Linear) Func(x []float64) float64 { return floats.Sum(x) }