Пример #1
0
func TestWeightedTimeSeeded(t *testing.T) {
	if !*prob {
		t.Skip("probabilistic testing not requested")
	}
	t.Log("Note: This test is stochastic and is expected to fail with probability ≈ 0.05.")

	rand.Seed(time.Now().Unix())

	f := make([]float64, len(obt))
	for i := 0; i < 1e6; i++ {
		item, ok := newTestWeighted().Take()
		if !ok {
			t.Fatal("Weighted unexpectedly empty")
		}
		f[item]++
	}

	exp := newExp()
	fac := floats.Sum(f) / floats.Sum(exp)
	for i := range f {
		exp[i] *= fac
	}

	// Check that our obtained values are within statistical expectations for p = 0.05.
	// This will not be true approximately 1 in 20 tests.
	X := chi2(f, exp)
	if X >= sigChi2 {
		t.Errorf("H₀: d(Sample) = d(Expect), H₁: d(S) ≠ d(Expect). df = %d, p = 0.05, X² threshold = %.2f, X² = %f", len(f)-1, sigChi2, X)
	}
}
Пример #2
0
func TestWeightIncrease(t *testing.T) {
	rand.Seed(0)

	want := Weighted{
		weights: []float64{1 << 0, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 9 * 2, 1 << 7, 1 << 8, 1 << 9},
		heap: []float64{
			exp[0] + exp[1] + exp[3] + exp[4] + exp[7] + exp[8] + exp[9] + exp[2] + exp[5] + exp[9]*2,
			exp[1] + exp[3] + exp[4] + exp[7] + exp[8] + exp[9],
			exp[2] + exp[5] + exp[9]*2,
			exp[3] + exp[7] + exp[8],
			exp[4] + exp[9],
			exp[5],
			exp[9] * 2,
			exp[7],
			exp[8],
			exp[9],
		},
	}

	ts := newTestWeighted()
	ts.Reweight(6, ts.weights[len(ts.weights)-1]*2)
	if !reflect.DeepEqual(ts, want) {
		t.Fatalf("unexpected new Weighted value:\ngot: %#v\nwant:%#v", ts, want)
	}

	f := make([]float64, len(obt))
	for i := 0; i < 1e6; i++ {
		ts := newTestWeighted()
		ts.Reweight(6, ts.weights[len(ts.weights)-1]*2)
		item, ok := ts.Take()
		if !ok {
			t.Fatal("Weighted unexpectedly empty")
		}
		f[item]++
	}

	exp := newExp()
	fac := floats.Sum(f) / floats.Sum(exp)
	for i := range f {
		exp[i] *= fac
	}

	if f[6] < f[9] {
		t.Errorf("unexpected selection rate for re-weighted item: got: %v want:%v", f[6], f[9])
	}
	if reflect.DeepEqual(f[:6], obt[:6]) {
		t.Fatal("unexpected selection: too many elements chosen in range:\ngot: %v\nwant:%v",
			f[:6], obt[:6])
	}
	if reflect.DeepEqual(f[7:], obt[7:]) {
		t.Fatal("unexpected selection: too many elements chosen in range:\ngot: %v\nwant:%v",
			f[7:], obt[7:])
	}
}
Пример #3
0
func TestWeightedUnseeded(t *testing.T) {
	rand.Seed(0)

	want := Weighted{
		weights: []float64{1 << 0, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7, 1 << 8, 1 << 9},
		heap: []float64{
			exp[0] + exp[1] + exp[3] + exp[4] + exp[7] + exp[8] + exp[9] + exp[2] + exp[5] + exp[6],
			exp[1] + exp[3] + exp[4] + exp[7] + exp[8] + exp[9],
			exp[2] + exp[5] + exp[6],
			exp[3] + exp[7] + exp[8],
			exp[4] + exp[9],
			exp[5],
			exp[6],
			exp[7],
			exp[8],
			exp[9],
		},
	}

	ts := newTestWeighted()
	if !reflect.DeepEqual(ts, want) {
		t.Fatalf("unexpected new Weighted value:\ngot: %#v\nwant:%#v", ts, want)
	}

	f := make([]float64, len(obt))
	for i := 0; i < 1e6; i++ {
		item, ok := newTestWeighted().Take()
		if !ok {
			t.Fatal("Weighted unexpectedly empty")
		}
		f[item]++
	}

	exp := newExp()
	fac := floats.Sum(f) / floats.Sum(exp)
	for i := range f {
		exp[i] *= fac
	}

	if !reflect.DeepEqual(f, obt) {
		t.Fatalf("unexpected selection:\ngot: %#v\nwant:%#v", f, obt)
	}

	// Check that this is within statistical expectations - we know this is true for this set.
	X := chi2(f, exp)
	if X >= sigChi2 {
		t.Errorf("H₀: d(Sample) = d(Expect), H₁: d(S) ≠ d(Expect). df = %d, p = 0.05, X² threshold = %.2f, X² = %f", len(f)-1, sigChi2, X)
	}
}
Пример #4
0
func TestCategoricalCDF(t *testing.T) {
	for _, test := range [][]float64{
		{1, 2, 3, 0, 4},
	} {
		c := make([]float64, len(test))
		copy(c, test)
		floats.Scale(1/floats.Sum(c), c)
		sum := make([]float64, len(test))
		floats.CumSum(sum, c)

		dist := NewCategorical(test, nil)
		cdf := dist.CDF(-0.5)
		if cdf != 0 {
			t.Errorf("CDF of negative number not zero")
		}
		for i := range c {
			cdf := dist.CDF(float64(i))
			if math.Abs(cdf-sum[i]) > 1e-14 {
				t.Errorf("CDF mismatch %v. Want %v, got %v.", float64(i), sum[i], cdf)
			}
			cdfp := dist.CDF(float64(i) + 0.5)
			if cdfp != cdf {
				t.Errorf("CDF mismatch for non-integer input")
			}
		}
	}
}
Пример #5
0
func TestCategoricalProb(t *testing.T) {
	for _, test := range [][]float64{
		{1, 2, 3, 0},
	} {
		dist := NewCategorical(test, nil)
		norm := make([]float64, len(test))
		floats.Scale(1/floats.Sum(norm), norm)
		for i, v := range norm {
			p := dist.Prob(float64(i))
			if math.Abs(p-v) > 1e-14 {
				t.Errorf("Probability mismatch element %d", i)
			}
			p = dist.Prob(float64(i) + 0.5)
			if p != 0 {
				t.Errorf("Non-zero probability for non-integer x")
			}
		}
		p := dist.Prob(-1)
		if p != 0 {
			t.Errorf("Non-zero probability for -1")
		}
		p = dist.Prob(float64(len(test)))
		if p != 0 {
			t.Errorf("Non-zero probability for len(test)")
		}
	}
}
Пример #6
0
// locationAsy returns the node locations and weights of a Hermite quadrature rule
// with len(x) points.
func (h Hermite) locationsAsy(x, w []float64) {
	// A. Townsend, T. Trogdon, and S.Olver, Fast computation of Gauss quadrature
	// nodes and weights the whole real line, IMA J. Numer. Anal.,
	// 36: 337–358, 2016. http://arxiv.org/abs/1410.5286

	// Find the positive locations and weights.
	n := len(x)
	l := n / 2
	xa := x[l:]
	wa := w[l:]
	for i := range xa {
		xa[i], wa[i] = h.locationsAsy0(i, n)
	}
	// Flip around zero -- copy the negative x locations with the corresponding
	// weights.
	if n%2 == 0 {
		l--
	}
	for i, v := range xa {
		x[l-i] = -v
	}
	for i, v := range wa {
		w[l-i] = v
	}
	sumW := floats.Sum(w)
	c := math.SqrtPi / sumW
	floats.Scale(c, w)
}
Пример #7
0
// Estimate computes model parameters using sufficient statistics.
func (g *Model) Estimate() error {

	if g.NSamples > minNumSamples {

		/* Estimate the mean. */
		floatx.Apply(floatx.ScaleFunc(1.0/g.NSamples), g.Sumx, g.Mean)
		/*
		 * Estimate the variance. sigma_sq = 1/n (sumxsq - 1/n sumx^2) or
		 * 1/n sumxsq - mean^2.
		 */
		tmp := g.variance // borrow as an intermediate array.

		//		floatx.Apply(sq, g.Mean, g.tmpArray)
		floatx.Sq(g.tmpArray, g.Mean)
		floatx.Apply(floatx.ScaleFunc(1.0/g.NSamples), g.Sumxsq, tmp)
		floats.SubTo(g.variance, tmp, g.tmpArray)
		floatx.Apply(floatx.Floorv(smallVar), g.variance, nil)
	} else {

		/* Not enough training sample. */
		glog.Warningf("not enough training samples, name [%s], num samples [%e]", g.ModelName, g.NSamples)
		floatx.Apply(floatx.SetValueFunc(smallVar), g.variance, nil)
		floatx.Apply(floatx.SetValueFunc(0), g.Mean, nil)
	}
	g.setVariance(g.variance) // to update varInv and stddev.

	/* Update log Gaussian constant. */
	floatx.Log(g.tmpArray, g.variance)
	g.const2 = g.const1 - floats.Sum(g.tmpArray)/2.0

	glog.V(6).Infof("gaussian reest, name:%s, mean:%v, sd:%v", g.ModelName, g.Mean, g.StdDev)
	return nil
}
Пример #8
0
func fitnessRMSE(ind, targ *imgut.Image) float64 {
	// Images to vector
	dataInd := imgut.ToSlice(ind)
	dataTarg := imgut.ToSlice(targ)
	// (root mean square) error
	floats.Sub(dataInd, dataTarg)
	// (root mean) square error
	floats.Mul(dataInd, dataInd)
	// (root) mean square error
	totErr := floats.Sum(dataInd)
	return math.Sqrt(totErr / float64(len(dataInd)))
}
Пример #9
0
func ExampleStdErr() {
	x := []float64{8, 2, -9, 15, 4}
	weights := []float64{2, 2, 6, 7, 1}
	mean := Mean(x, weights)
	stdev := StdDev(x, weights)
	nSamples := floats.Sum(weights)
	stdErr := StdErr(stdev, nSamples)
	fmt.Printf("The standard deviation is %.4f and there are %g samples, so the mean\nis likely %.4f ± %.4f.", stdev, nSamples, mean, stdErr)
	// Output:
	// The standard deviation is 10.5733 and there are 18 samples, so the mean
	// is likely 4.1667 ± 2.4921.
}
Пример #10
0
func main() {

	runtime.GOMAXPROCS(runtime.NumCPU() - 2)

	gopath := os.Getenv("GOPATH")
	path := filepath.Join(gopath, "prof", "github.com", "reggo", "reggo", "nnet")

	nInputs := 10
	nOutputs := 3
	nLayers := 2
	nNeurons := 50
	nSamples := 1000000
	nRuns := 50

	config := &profile.Config{
		CPUProfile:  true,
		ProfilePath: path,
	}

	defer profile.Start(config).Stop()

	net, err := nnet.NewSimpleTrainer(nInputs, nOutputs, nLayers, nNeurons, nnet.Linear{})
	if err != nil {
		log.Fatal(err)
	}

	// Generate some random data
	inputs := mat64.NewDense(nSamples, nInputs, nil)
	outputs := mat64.NewDense(nSamples, nOutputs, nil)
	for i := 0; i < nSamples; i++ {
		for j := 0; j < nInputs; j++ {
			inputs.Set(i, j, rand.Float64())
		}
		for j := 0; j < nOutputs; j++ {
			outputs.Set(i, j, rand.Float64())
		}
	}

	// Create trainer
	prob := train.NewBatchGradBased(net, true, inputs, outputs, nil, nil, nil)
	nParameters := net.NumParameters()

	parameters := make([]float64, nParameters)
	derivative := make([]float64, nParameters)

	for i := 0; i < nRuns; i++ {
		net.RandomizeParameters()
		net.Parameters(parameters)
		prob.ObjGrad(parameters, derivative)
		fmt.Println(floats.Sum(derivative))
	}
}
Пример #11
0
func MakeFitLinScale(targetImage *imgut.Image) func(*imgut.Image) float64 {
	// Pre-compute image to slice of floats
	dataTarg := imgut.ToSlice(targetImage)
	// Pre-compute average
	avgt := floats.Sum(dataTarg) / float64(len(dataTarg))
	return func(indImage *imgut.Image) float64 {
		// Images to vector
		dataInd := imgut.ToSlice(indImage)
		// Compute average pixels
		avgy := floats.Sum(dataInd) / float64(len(dataInd))
		// Difference y - avgy
		y_avgy := make([]float64, len(dataInd))
		copy(y_avgy, dataInd)
		floats.AddConst(-avgy, y_avgy)
		// Difference t - avgt
		t_avgt := make([]float64, len(dataTarg))
		copy(t_avgt, dataTarg)
		floats.AddConst(-avgt, t_avgt)
		// Multuplication (t - avgt)(y - avgy)
		floats.Mul(t_avgt, y_avgy)
		// Summation
		numerator := floats.Sum(t_avgt)
		// Square (y - avgy)^2
		floats.Mul(y_avgy, y_avgy)
		denomin := floats.Sum(y_avgy)
		// Compute b-value
		b := numerator / denomin
		// Compute a-value
		a := avgt - b*avgy

		// Compute now the scaled RMSE, using y' = a + b*y
		floats.Scale(b, dataInd)      // b*y
		floats.AddConst(a, dataInd)   // a + b*y
		floats.Sub(dataInd, dataTarg) // (a + b * y - t)
		floats.Mul(dataInd, dataInd)  // (a + b * y - t)^2
		total := floats.Sum(dataInd)  // Sum(...)
		return math.Sqrt(total / float64(len(dataInd)))
	}
}
Пример #12
0
func MakeFitMSE(targetImage *imgut.Image) func(*imgut.Image) float64 {
	dataTarg := imgut.ToSliceChans(targetImage, "R")
	return func(indImage *imgut.Image) float64 {
		// Get data
		dataImg := imgut.ToSliceChans(indImage, "R")
		// Difference (X - Y)
		floats.Sub(dataImg, dataTarg)
		// Squared (X - Y)^2
		floats.Mul(dataImg, dataImg)
		// Summation
		return floats.Sum(dataImg) / float64(len(dataImg))
	}
}
Пример #13
0
func sampleCategorical(t *testing.T, dist Categorical, nSamples int) []float64 {
	counts := make([]float64, dist.Len())
	for i := 0; i < nSamples; i++ {
		v := dist.Rand()
		if float64(int(v)) != v {
			t.Fatalf("Random number is not an integer")
		}
		counts[int(v)]++
	}
	sum := floats.Sum(counts)
	floats.Scale(1/sum, counts)
	return counts
}
Пример #14
0
// CovarianceMatrix calculates a covariance matrix (also known as a
// variance-covariance matrix) from a matrix of data, using a two-pass
// algorithm.
//
// The weights must have length equal to the number of rows in
// input data matrix x. If cov is nil, then a new matrix with appropriate size will
// be constructed. If cov is not nil, it should have the same number of columns as the
// input data matrix x, and it will be used as the destination for the covariance
// data. Weights must not be negative.
func CovarianceMatrix(cov *mat64.SymDense, x mat64.Matrix, weights []float64) *mat64.SymDense {
	// This is the matrix version of the two-pass algorithm. It doesn't use the
	// additional floating point error correction that the Covariance function uses
	// to reduce the impact of rounding during centering.

	r, c := x.Dims()

	if cov == nil {
		cov = mat64.NewSymDense(c, nil)
	} else if n := cov.Symmetric(); n != c {
		panic(matrix.ErrShape)
	}

	var xt mat64.Dense
	xt.Clone(x.T())
	// Subtract the mean of each of the columns.
	for i := 0; i < c; i++ {
		v := xt.RawRowView(i)
		// This will panic with ErrShape if len(weights) != len(v), so
		// we don't have to check the size later.
		mean := Mean(v, weights)
		floats.AddConst(-mean, v)
	}

	if weights == nil {
		// Calculate the normalization factor
		// scaled by the sample size.
		cov.SymOuterK(1/(float64(r)-1), &xt)
		return cov
	}

	// Multiply by the sqrt of the weights, so that multiplication is symmetric.
	sqrtwts := make([]float64, r)
	for i, w := range weights {
		if w < 0 {
			panic("stat: negative covariance matrix weights")
		}
		sqrtwts[i] = math.Sqrt(w)
	}
	// Weight the rows.
	for i := 0; i < c; i++ {
		v := xt.RawRowView(i)
		floats.Mul(v, sqrtwts)
	}

	// Calculate the normalization factor
	// scaled by the weighted sample size.
	cov.SymOuterK(1/(floats.Sum(weights)-1), &xt)
	return cov
}
Пример #15
0
// LogDet returns the log of the determinant and the sign of the determinant
// for the matrix that has been factorized. Numerical stability in product and
// division expressions is generally improved by working in log space.
func (lu *LU) LogDet() (det float64, sign float64) {
	_, n := lu.lu.Dims()
	logDiag := make([]float64, n)
	sign = 1.0
	for i := 0; i < n; i++ {
		v := lu.lu.at(i, i)
		if v < 0 {
			sign *= -1
		}
		if lu.pivot[i] != i {
			sign *= -1
		}
		logDiag[i] = math.Log(math.Abs(v))
	}
	return floats.Sum(logDiag), sign
}
Пример #16
0
// Mean computes the weighted mean of the data set.
//  sum_i {w_i * x_i} / sum_i {w_i}
// If weights is nil then all of the weights are 1. If weights is not nil, then
// len(x) must equal len(weights).
func Mean(x, weights []float64) float64 {
	if weights == nil {
		return floats.Sum(x) / float64(len(x))
	}
	if len(x) != len(weights) {
		panic("stat: slice length mismatch")
	}
	var (
		sumValues  float64
		sumWeights float64
	)
	for i, w := range weights {
		sumValues += w * x[i]
		sumWeights += w
	}
	return sumValues / sumWeights
}
Пример #17
0
// PrincipalComponents returns the principal component direction vectors and
// the column variances of the principal component scores, vecs * a, computed
// using the singular value decomposition of the input. The input a is an n×d
// matrix where each row is an observation and each column represents a variable.
//
// PrincipalComponents centers the variables but does not scale the variance.
//
// The slice weights is used to weight the observations. If weights is nil,
// each weight is considered to have a value of one, otherwise the length of
// weights must match the number of observations or PrincipalComponents will
// panic.
//
// On successful completion, the principal component direction vectors are
// returned in vecs as a d×min(n, d) matrix, and the variances are returned in
// vars as a min(n, d)-long slice in descending sort order.
//
// If no singular value decomposition is possible, vecs and vars are returned
// nil and ok is returned false.
func PrincipalComponents(a mat64.Matrix, weights []float64) (vecs *mat64.Dense, vars []float64, ok bool) {
	n, d := a.Dims()
	if weights != nil && len(weights) != n {
		panic("stat: len(weights) != observations")
	}

	centered := mat64.NewDense(n, d, nil)
	col := make([]float64, n)
	for j := 0; j < d; j++ {
		mat64.Col(col, j, a)
		floats.AddConst(-Mean(col, weights), col)
		centered.SetCol(j, col)
	}
	for i, w := range weights {
		floats.Scale(math.Sqrt(w), centered.RawRowView(i))
	}

	kind := matrix.SVDFull
	if n > d {
		kind = matrix.SVDThin
	}
	var svd mat64.SVD
	ok = svd.Factorize(centered, kind)
	if !ok {
		return nil, nil, false
	}

	vecs = &mat64.Dense{}
	vecs.VFromSVD(&svd)
	if n < d {
		// Don't retain columns that are not valid direction vectors.
		vecs.Clone(vecs.View(0, 0, d, n))
	}
	vars = svd.Values(nil)
	var f float64
	if weights == nil {
		f = 1 / float64(n-1)
	} else {
		f = 1 / (floats.Sum(weights) - 1)
	}
	for i, v := range vars {
		vars[i] = f * v * v
	}
	return vecs, vars, true
}
Пример #18
0
// CDF returns the empirical cumulative distribution function value of x, that is
// the fraction of the samples less than or equal to q. The
// exact behavior is determined by the CumulantKind. CDF is theoretically
// the inverse of the Quantile function, though it may not be the actual inverse
// for all values q and CumulantKinds.
//
// The x data must be sorted in increasing order. If weights is nil then all
// of the weights are 1. If weights is not nil, then len(x) must equal len(weights).
//
// CumulantKind behaviors:
//  - Empirical: Returns the lowest fraction for which q is greater than or equal
//  to that fraction of samples
func CDF(q float64, c CumulantKind, x, weights []float64) float64 {
	if weights != nil && len(x) != len(weights) {
		panic("stat: slice length mismatch")
	}
	if floats.HasNaN(x) {
		return math.NaN()
	}
	if !sort.Float64sAreSorted(x) {
		panic("x data are not sorted")
	}

	if q < x[0] {
		return 0
	}
	if q >= x[len(x)-1] {
		return 1
	}

	var sumWeights float64
	if weights == nil {
		sumWeights = float64(len(x))
	} else {
		sumWeights = floats.Sum(weights)
	}

	// Calculate the index
	switch c {
	case Empirical:
		// Find the smallest value that is greater than that percent of the samples
		var w float64
		for i, v := range x {
			if v > q {
				return w / sumWeights
			}
			if weights == nil {
				w++
			} else {
				w += weights[i]
			}
		}
		panic("impossible")
	default:
		panic("stat: bad cumulant kind")
	}
}
Пример #19
0
// Fit sets the parameters of the probability distribution from the
// data samples x with relative weights w.
// If weights is nil, then all the weights are 1.
// If weights is not nil, then the len(weights) must equal len(samples).
//
// Note: Laplace distribution has no FitPrior because it has no sufficient
// statistics.
func (l *Laplace) Fit(samples, weights []float64) {
	if len(samples) != len(weights) {
		panic(badLength)
	}

	if len(samples) == 0 {
		panic(badNoSamples)
	}
	if len(samples) == 1 {
		l.Mu = samples[0]
		l.Scale = 0
		return
	}

	var (
		sortedSamples []float64
		sortedWeights []float64
	)
	if sort.Float64sAreSorted(samples) {
		sortedSamples = samples
		sortedWeights = weights
	} else {
		// Need to copy variables so the input variables aren't effected by the sorting
		sortedSamples = make([]float64, len(samples))
		copy(sortedSamples, samples)
		sortedWeights := make([]float64, len(samples))
		copy(sortedWeights, weights)

		stat.SortWeighted(sortedSamples, sortedWeights)
	}

	// The (weighted) median of the samples is the maximum likelihood estimate
	// of the mean parameter
	// TODO: Rethink quantile type when stat has more options
	l.Mu = stat.Quantile(0.5, stat.Empirical, sortedSamples, sortedWeights)

	sumWeights := floats.Sum(weights)

	// The scale parameter is the average absolute distance
	// between the sample and the mean
	absError := stat.MomentAbout(1, samples, l.Mu, weights)

	l.Scale = absError / sumWeights
}
Пример #20
0
// SuffStat computes the sufficient statistics of set of samples to update
// the distribution. The sufficient statistics are stored in place, and the
// effective number of samples are returned.
//
// The exponential distribution has one sufficient statistic, the average rate
// of the samples.
//
// If weights is nil, the weights are assumed to be 1, otherwise panics if
// len(samples) != len(weights). Panics if len(suffStat) != 1.
func (Exponential) SuffStat(samples, weights, suffStat []float64) (nSamples float64) {
	if len(weights) != 0 && len(samples) != len(weights) {
		panic("dist: slice size mismatch")
	}

	if len(suffStat) != 1 {
		panic("exponential: wrong suffStat length")
	}

	if len(weights) == 0 {
		nSamples = float64(len(samples))
	} else {
		nSamples = floats.Sum(weights)
	}

	mean := stat.Mean(samples, weights)
	suffStat[0] = 1 / mean
	return nSamples
}
Пример #21
0
// SuffStat computes the sufficient statistics of set of samples to update
// the distribution. The sufficient statistics are stored in place, and the
// effective number of samples are returned.
//
// The exponential distribution has one sufficient statistic, the average rate
// of the samples.
//
// If weights is nil, the weights are assumed to be 1, otherwise panics if
// len(samples) != len(weights). Panics if len(suffStat) != 1.
func (Exponential) SuffStat(samples, weights, suffStat []float64) (nSamples float64) {
	if len(weights) != 0 && len(samples) != len(weights) {
		panic(badLength)
	}

	if len(suffStat) != 1 {
		panic(badSuffStat)
	}

	if len(weights) == 0 {
		nSamples = float64(len(samples))
	} else {
		nSamples = floats.Sum(weights)
	}

	mean := stat.Mean(samples, weights)
	suffStat[0] = 1 / mean
	return nSamples
}
Пример #22
0
// NewModel creates a new Gaussian model.
func NewModel(dim int, options ...Option) *Model {

	g := &Model{
		ModelName:   "Gaussian",
		ModelDim:    dim,
		Diag:        true,
		variance:    make([]float64, dim),
		varianceInv: make([]float64, dim),
		tmpArray:    make([]float64, dim),
	}
	g.Type = reflect.TypeOf(*g).String()

	// Set options.
	for _, option := range options {
		option(g)
	}
	if len(g.Sumx) == 0 {
		g.Sumx = make([]float64, dim)
	}
	if len(g.Sumxsq) == 0 {
		g.Sumxsq = make([]float64, dim)
	}
	if g.Mean == nil {
		g.Mean = make([]float64, dim)
	}
	if g.StdDev == nil {
		g.StdDev = make([]float64, dim)
		floatx.Apply(floatx.SetValueFunc(smallSD), g.StdDev, nil)
	}

	floatx.Sq(g.variance, g.StdDev)

	// Initializes variance, varianceInv, and StdDev.
	g.setVariance(g.variance)

	floatx.Log(g.tmpArray, g.variance)
	g.const1 = -float64(g.ModelDim) * math.Log(2.0*math.Pi) / 2.0
	g.const2 = g.const1 - floats.Sum(g.tmpArray)/2.0
	return g
}
Пример #23
0
// Quantile returns the sample of x such that x is greater than or
// equal to the fraction p of samples. The exact behavior is determined by the
// CumulantKind, and p should be a number between 0 and 1. Quantile is theoretically
// the inverse of the CDF function, though it may not be the actual inverse
// for all values p and CumulantKinds.
//
// The x data must be sorted in increasing order. If weights is nil then all
// of the weights are 1. If weights is not nil, then len(x) must equal len(weights).
//
// CumulantKind behaviors:
//  - Empirical: Returns the lowest value q for which q is greater than or equal
//  to the fraction p of samples
func Quantile(p float64, c CumulantKind, x, weights []float64) float64 {
	if !(p >= 0 && p <= 1) {
		panic("stat: percentile out of bounds")
	}

	if weights != nil && len(x) != len(weights) {
		panic("stat: slice length mismatch")
	}
	if floats.HasNaN(x) {
		return math.NaN() // This is needed because the algorithm breaks otherwise
	}
	if !sort.Float64sAreSorted(x) {
		panic("x data are not sorted")
	}

	var sumWeights float64
	if weights == nil {
		sumWeights = float64(len(x))
	} else {
		sumWeights = floats.Sum(weights)
	}
	switch c {
	case Empirical:
		var cumsum float64
		fidx := p * sumWeights
		for i := range x {
			if weights == nil {
				cumsum++
			} else {
				cumsum += weights[i]
			}
			if cumsum >= fidx {
				return x[i]
			}
		}
		panic("impossible")
	default:
		panic("stat: bad cumulant kind")
	}
}
Пример #24
0
// SuffStat computes the sufficient statistics of a set of samples to update
// the distribution. The sufficient statistics are stored in place, and the
// effective number of samples are returned.
//
// The normal distribution has two sufficient statistics, the mean of the samples
// and the standard deviation of the samples.
//
// If weights is nil, the weights are assumed to be 1, otherwise panics if
// len(samples) != len(weights). Panics if len(suffStat) != 2.
func (Normal) SuffStat(samples, weights, suffStat []float64) (nSamples float64) {
	lenSamp := len(samples)
	if len(weights) != 0 && len(samples) != len(weights) {
		panic("dist: slice size mismatch")
	}
	if len(suffStat) != 2 {
		panic("dist: incorrect suffStat length")
	}

	if len(weights) == 0 {
		nSamples = float64(lenSamp)
	} else {
		nSamples = floats.Sum(weights)
	}

	mean := stat.Mean(samples, weights)
	suffStat[0] = mean

	// Use Moment and not StdDev because we want it to be uncorrected
	variance := stat.Moment(2, samples, mean, weights)
	suffStat[1] = math.Sqrt(variance)
	return nSamples
}
Пример #25
0
// CovarianceMatrix calculates a covariance matrix (also known as a
// variance-covariance matrix) from a matrix of data, using a two-pass
// algorithm. The matrix returned will be symmetric and square.
//
// The weights wts should have the length equal to the number of rows in
// input data matrix x. If c is nil, then a new matrix with appropriate size will
// be constructed.  If c is not nil, it should be a square matrix with the same
// number of columns as the input data matrix x, and it will be used as the receiver
// for the covariance data.  Weights cannot be negative.
func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.Dense {
	// This is the matrix version of the two-pass algorithm. It doesn't use the
	// additional floating point error correction that the Covariance function uses
	// to reduce the impact of rounding during centering.

	// TODO(jonlawlor): indicate that the resulting matrix is symmetric, and change
	// the returned type from a *mat.Dense to a *mat.Symmetric.

	r, c := x.Dims()

	if cov == nil {
		cov = mat64.NewDense(c, c, nil)
	} else if covr, covc := cov.Dims(); covr != covc || covc != c {
		panic(mat64.ErrShape)
	}

	var xt mat64.Dense
	xt.TCopy(x)
	// Subtract the mean of each of the columns.
	for i := 0; i < c; i++ {
		v := xt.RawRowView(i)
		// This will panic with ErrShape if len(wts) != len(v), so
		// we don't have to check the size later.
		mean := Mean(v, wts)
		floats.AddConst(-mean, v)
	}

	var n float64
	if wts == nil {

		n = float64(r)

		cov.MulTrans(&xt, false, &xt, true)

		// Scale by the sample size.
		cov.Scale(1/(n-1), cov)
		return cov
	}

	// Multiply by the sqrt of the weights, so that multiplication is symmetric.
	sqrtwts := make([]float64, r)
	for i, w := range wts {
		if w < 0 {
			panic("stat: negative covariance matrix weights")
		}
		sqrtwts[i] = math.Sqrt(w)
	}
	// Weight the rows.
	for i := 0; i < c; i++ {
		v := xt.RawRowView(i)
		floats.Mul(v, sqrtwts)
	}

	// Calculate the normalization factor.
	n = floats.Sum(wts)
	cov.MulTrans(&xt, false, &xt, true)

	// Scale by the sample size.
	cov.Scale(1/(n-1), cov)
	return cov
}
func main() {
	fmt.Println("Sum:    ", floats.Sum(a))
	fmt.Println("Product:", floats.Prod(a))
}
Пример #27
0
// KolmogorovSmirnov computes the largest distance between two empirical CDFs.
// Each dataset x and y consists of sample locations and counts, xWeights and
// yWeights, respectively.
//
// x and y may have different lengths, though len(x) must equal len(xWeights), and
// len(y) must equal len(yWeights).  Both x and y must be sorted.
//
// Special cases are:
//  = 0 if len(x) == len(y) == 0
//  = 1 if len(x) == 0, len(y) != 0 or len(x) != 0 and len(y) == 0
func KolmogorovSmirnov(x, xWeights, y, yWeights []float64) float64 {
	if xWeights != nil && len(x) != len(xWeights) {
		panic("stat: slice length mismatch")
	}
	if yWeights != nil && len(y) != len(yWeights) {
		panic("stat: slice length mismatch")
	}
	if len(x) == 0 || len(y) == 0 {
		if len(x) == 0 && len(y) == 0 {
			return 0
		}
		return 1
	}

	if floats.HasNaN(x) {
		return math.NaN()
	}
	if floats.HasNaN(y) {
		return math.NaN()
	}

	if !sort.Float64sAreSorted(x) {
		panic("x data are not sorted")
	}
	if !sort.Float64sAreSorted(y) {
		panic("y data are not sorted")
	}

	xWeightsNil := xWeights == nil
	yWeightsNil := yWeights == nil

	var (
		maxDist    float64
		xSum, ySum float64
		xCdf, yCdf float64
		xIdx, yIdx int
	)

	if xWeightsNil {
		xSum = float64(len(x))
	} else {
		xSum = floats.Sum(xWeights)
	}

	if yWeightsNil {
		ySum = float64(len(y))
	} else {
		ySum = floats.Sum(yWeights)
	}

	xVal := x[0]
	yVal := y[0]

	// Algorithm description:
	// The goal is to find the maximum difference in the empirical CDFs for the
	// two datasets. The CDFs are piecewise-constant, and thus the distance
	// between the CDFs will only change at the values themselves.
	//
	// To find the maximum distance, step through the data in ascending order
	// of value between the two datasets. At each step, compute the empirical CDF
	// and compare the local distance with the maximum distance.
	// Due to some corner cases, equal data entries must be tallied simultaneously.
	for {
		switch {
		case xVal < yVal:
			xVal, xCdf, xIdx = updateKS(xIdx, xCdf, xSum, x, xWeights, xWeightsNil)
		case yVal < xVal:
			yVal, yCdf, yIdx = updateKS(yIdx, yCdf, ySum, y, yWeights, yWeightsNil)
		case xVal == yVal:
			newX := x[xIdx]
			newY := y[yIdx]
			if newX < newY {
				xVal, xCdf, xIdx = updateKS(xIdx, xCdf, xSum, x, xWeights, xWeightsNil)
			} else if newY < newX {
				yVal, yCdf, yIdx = updateKS(yIdx, yCdf, ySum, y, yWeights, yWeightsNil)
			} else {
				// Update them both, they'll be equal next time and the right
				// thing will happen
				xVal, xCdf, xIdx = updateKS(xIdx, xCdf, xSum, x, xWeights, xWeightsNil)
				yVal, yCdf, yIdx = updateKS(yIdx, yCdf, ySum, y, yWeights, yWeightsNil)
			}
		default:
			panic("unreachable")
		}

		dist := math.Abs(xCdf - yCdf)
		if dist > maxDist {
			maxDist = dist
		}

		// Both xCdf and yCdf will equal 1 at the end, so if we have reached the
		// end of either sample list, the distance is as large as it can be.
		if xIdx == len(x) || yIdx == len(y) {
			return maxDist
		}
	}
}
Пример #28
0
func (b *BatchGradient) funcGrad(params, deriv []float64) float64 {
	nParameters := len(deriv)

	// Send out all of the work
	done := make(chan result)
	sz := b.nSamples / b.Workers
	sent := 0
	for i := 0; i < b.Workers; i++ {
		outputDim := b.outputDim
		last := sent + sz
		if i == b.Workers-1 {
			last = b.nSamples
		}
		go func(sent, last int) {
			lossDeriver := b.Trainable.NewLossDeriver()
			predOutput := make([]float64, outputDim)
			dLossDPred := make([]float64, outputDim)
			dLossDParam := make([]float64, nParameters)
			outputs := make([]float64, outputDim)
			tmpderiv := make([]float64, nParameters)
			var totalLoss float64
			for i := sent; i < last; i++ {
				lossDeriver.Predict(params, b.features.RawRowView(i), predOutput)
				b.Outputs.Row(outputs, i)
				loss := b.Losser.LossDeriv(predOutput, outputs, dLossDPred)
				if b.Weights == nil {
					totalLoss += loss
				} else {
					totalLoss += b.Weights[i] * loss
				}
				lossDeriver.Deriv(params, b.features.RawRowView(i), predOutput, dLossDPred, dLossDParam)
				if b.Weights != nil {
					floats.Scale(b.Weights[i], dLossDParam)
				}
				floats.Add(tmpderiv, dLossDParam)
			}
			done <- result{totalLoss, tmpderiv}
		}(sent, last)
		sent += sz
	}
	// Collect all the results
	var totalLoss float64
	for i := range deriv {
		deriv[i] = 0
	}
	for i := 0; i < b.Workers; i++ {
		w := <-done
		totalLoss += w.loss
		floats.Add(deriv, w.deriv)
	}
	// Compute the regularizer
	if b.Regularizer != nil {
		tmp := make([]float64, nParameters)
		totalLoss += b.Regularizer.LossDeriv(params, tmp)
		floats.Add(deriv, tmp)
	}
	sumWeights := float64(b.nSamples)
	if b.Weights != nil {
		sumWeights = floats.Sum(b.Weights)
	}
	totalLoss /= sumWeights
	floats.Scale(1/sumWeights, deriv)
	return totalLoss
}
Пример #29
0
// Normalize the vector of value, summing them and dividing each by the total
func normalSlice(v []float64) {
	tot := floats.Sum(v)
	floats.Scale(1.0/tot, v)
}
Пример #30
0
func (Linear) Func(x []float64) float64 {
	return floats.Sum(x)
}