Exemple #1
0
// Creates the features from the inputs. Features must be nSamples x nFeatures or nil
func FeaturizeTrainable(t Trainable, inputs common.RowMatrix, featurizedInputs *mat64.Dense) *mat64.Dense {
	nSamples, nDim := inputs.Dims()
	if featurizedInputs == nil {
		nFeatures := t.NumFeatures()
		featurizedInputs = mat64.NewDense(nSamples, nFeatures, nil)
	}

	rowViewer, isRowViewer := inputs.(mat64.RawRowViewer)
	var f func(start, end int)
	if isRowViewer {
		f = func(start, end int) {
			featurizer := t.NewFeaturizer()
			for i := start; i < end; i++ {
				featurizer.Featurize(rowViewer.RawRowView(i), featurizedInputs.RawRowView(i))
			}
		}
	} else {
		f = func(start, end int) {
			featurizer := t.NewFeaturizer()
			input := make([]float64, nDim)
			for i := start; i < end; i++ {
				inputs.Row(input, i)
				featurizer.Featurize(input, featurizedInputs.RawRowView(i))
			}
		}
	}

	common.ParallelFor(nSamples, common.GetGrainSize(nSamples, minGrain, maxGrain), f)
	return featurizedInputs
}
Exemple #2
0
// LatinHypercube generates rows(batch) samples using Latin hypercube sampling
// from the given distribution. If src is not nil, it will be used to generate
// random numbers, otherwise rand.Float64 will be used.
//
// Latin hypercube sampling divides the cumulative distribution function into equally
// spaced bins and guarantees that one sample is generated per bin. Within each bin,
// the location is randomly sampled. The distmv.UnitNormal variable can be used
// for easy generation from the unit interval.
func LatinHypercube(batch *mat64.Dense, q distmv.Quantiler, src *rand.Rand) {
	r, c := batch.Dims()
	var f64 func() float64
	var perm func(int) []int
	if src != nil {
		f64 = src.Float64
		perm = src.Perm
	} else {
		f64 = rand.Float64
		perm = rand.Perm
	}
	r64 := float64(r)
	for i := 0; i < c; i++ {
		p := perm(r)
		for j := 0; j < r; j++ {
			var v float64
			v = f64()
			v = v/r64 + float64(j)/r64
			batch.Set(p[j], i, v)
		}
	}
	p := make([]float64, c)
	for i := 0; i < r; i++ {
		copy(p, batch.RawRowView(i))
		q.Quantile(batch.RawRowView(i), p)
	}
}
Exemple #3
0
// Importance sampling generates rows(batch) samples from the proposal distribution,
// and stores the locations and importance sampling weights in place.
//
// Importance sampling is a variance reduction technique where samples are
// generated from a proposal distribution, q(x), instead of the target distribution
// p(x). This allows relatively unlikely samples in p(x) to be generated more frequently.
//
// The importance sampling weight at x is given by p(x)/q(x). To reduce variance,
// a good proposal distribution will bound this sampling weight. This implies the
// support of q(x) should be at least as broad as p(x), and q(x) should be "fatter tailed"
// than p(x).
//
// If weights is nil, the weights are not stored. The length of weights must equal
// the length of batch, otherwise Importance will panic.
func Importance(batch *mat64.Dense, weights []float64, target distmv.LogProber, proposal distmv.RandLogProber) {
	r, _ := batch.Dims()
	if r != len(weights) {
		panic(badLengthMismatch)
	}
	for i := 0; i < r; i++ {
		v := batch.RawRowView(i)
		proposal.Rand(v)
		weights[i] = math.Exp(target.LogProb(v) - proposal.LogProb(v))
	}
}
// Sample generates rows(batch) samples using the Metropolis Hastings sample
// generation method. The initial location is NOT updated during the call to Sample.
//
// The number of columns in batch must equal len(m.Initial), otherwise Sample
// will panic.
func (m MetropolisHastingser) Sample(batch *mat64.Dense) {
	burnIn := m.BurnIn
	rate := m.Rate
	if rate == 0 {
		rate = 1
	}
	r, c := batch.Dims()
	if len(m.Initial) != c {
		panic("metropolishastings: length mismatch")
	}

	// Use the optimal size for the temporary memory to allow the fewest calls
	// to MetropolisHastings. The case where tmp shadows samples must be
	// aligned with the logic after burn-in so that tmp does not shadow samples
	// during the rate portion.
	tmp := batch
	if rate > r {
		tmp = mat64.NewDense(rate, c, nil)
	}
	rTmp, _ := tmp.Dims()

	// Perform burn-in.
	remaining := burnIn
	initial := make([]float64, c)
	copy(initial, m.Initial)
	for remaining != 0 {
		newSamp := min(rTmp, remaining)
		MetropolisHastings(tmp.View(0, 0, newSamp, c).(*mat64.Dense), initial, m.Target, m.Proposal, m.Src)
		copy(initial, tmp.RawRowView(newSamp-1))
		remaining -= newSamp
	}

	if rate == 1 {
		MetropolisHastings(batch, initial, m.Target, m.Proposal, m.Src)
		return
	}

	if rTmp <= r {
		tmp = mat64.NewDense(rate, c, nil)
	}

	// Take a single sample from the chain.
	MetropolisHastings(batch.View(0, 0, 1, c).(*mat64.Dense), initial, m.Target, m.Proposal, m.Src)

	copy(initial, batch.RawRowView(0))
	// For all of the other samples, first generate Rate samples and then actually
	// accept the last one.
	for i := 1; i < r; i++ {
		MetropolisHastings(tmp, initial, m.Target, m.Proposal, m.Src)
		v := tmp.RawRowView(rate - 1)
		batch.SetRow(i, v)
		copy(initial, v)
	}
}
// CovarianceMatrix calculates a covariance matrix (also known as a
// variance-covariance matrix) from a matrix of data, using a two-pass
// algorithm.
//
// The weights must have length equal to the number of rows in
// input data matrix x. If cov is nil, then a new matrix with appropriate size will
// be constructed. If cov is not nil, it should have the same number of columns as the
// input data matrix x, and it will be used as the destination for the covariance
// data. Weights must not be negative.
func CovarianceMatrix(cov *mat64.SymDense, x mat64.Matrix, weights []float64) *mat64.SymDense {
	// This is the matrix version of the two-pass algorithm. It doesn't use the
	// additional floating point error correction that the Covariance function uses
	// to reduce the impact of rounding during centering.

	r, c := x.Dims()

	if cov == nil {
		cov = mat64.NewSymDense(c, nil)
	} else if n := cov.Symmetric(); n != c {
		panic(matrix.ErrShape)
	}

	var xt mat64.Dense
	xt.Clone(x.T())
	// Subtract the mean of each of the columns.
	for i := 0; i < c; i++ {
		v := xt.RawRowView(i)
		// This will panic with ErrShape if len(weights) != len(v), so
		// we don't have to check the size later.
		mean := Mean(v, weights)
		floats.AddConst(-mean, v)
	}

	if weights == nil {
		// Calculate the normalization factor
		// scaled by the sample size.
		cov.SymOuterK(1/(float64(r)-1), &xt)
		return cov
	}

	// Multiply by the sqrt of the weights, so that multiplication is symmetric.
	sqrtwts := make([]float64, r)
	for i, w := range weights {
		if w < 0 {
			panic("stat: negative covariance matrix weights")
		}
		sqrtwts[i] = math.Sqrt(w)
	}
	// Weight the rows.
	for i := 0; i < c; i++ {
		v := xt.RawRowView(i)
		floats.Mul(v, sqrtwts)
	}

	// Calculate the normalization factor
	// scaled by the weighted sample size.
	cov.SymOuterK(1/(floats.Sum(weights)-1), &xt)
	return cov
}
Exemple #6
0
// UnscaleData is a wrapper for unscaling data in parallel.
// TODO: Make this work better so that if there is an error somewhere data isn't changed
func UnscaleData(scaler Scaler, data *mat64.Dense) error {
	m := &sync.Mutex{}
	var e ErrorList
	f := func(start, end int) {
		for r := start; r < end; r++ {
			errTmp := scaler.Unscale(data.RawRowView(r))
			if errTmp != nil {
				m.Lock()
				e = append(e, &SliceError{Header: "scale", Idx: r, Err: errTmp})
				m.Unlock()
			}
		}
	}

	nSamples, _ := data.Dims()
	grain := common.GetGrainSize(nSamples, 1, 500)
	common.ParallelFor(nSamples, grain, f)
	if len(e) != 0 {
		return e
	}
	return nil
}
Exemple #7
0
// corrToCov converts a correlation matrix to a covariance matrix.
// The input sigma should be vector of standard deviations corresponding
// to the covariance.  It will panic if len(sigma) is not equal to the
// number of rows in the correlation matrix.
func corrToCov(c *mat64.Dense, sigma []float64) {

	// TODO(jonlawlor): use a *mat64.Symmetric as input.

	r, _ := c.Dims()

	if r != len(sigma) {
		panic(mat64.ErrShape)
	}

	for i, sx := range sigma {
		row := c.RawRowView(i)
		for j, sy := range sigma {
			if i == j {
				// Ensure that the diagonal has exactly sigma squared.
				row[j] = sx * sx
				continue
			}
			row[j] *= sx
			row[j] *= sy
		}
	}
}
Exemple #8
0
// covToCorr converts a covariance matrix to a correlation matrix.
func covToCorr(c *mat64.Dense) {

	// TODO(jonlawlor): use a *mat64.Symmetric as input.

	r, _ := c.Dims()

	s := make([]float64, r)
	for i := 0; i < r; i++ {
		s[i] = 1 / math.Sqrt(c.At(i, i))
	}
	for i, sx := range s {
		row := c.RawRowView(i)
		for j, sy := range s {
			if i == j {
				// Ensure that the diagonal has exactly ones.
				row[j] = 1
				continue
			}
			row[j] *= sx
			row[j] *= sy
		}
	}
}
Exemple #9
0
// CovarianceMatrix calculates a covariance matrix (also known as a
// variance-covariance matrix) from a matrix of data, using a two-pass
// algorithm. The matrix returned will be symmetric and square.
//
// The weights wts should have the length equal to the number of rows in
// input data matrix x. If c is nil, then a new matrix with appropriate size will
// be constructed.  If c is not nil, it should be a square matrix with the same
// number of columns as the input data matrix x, and it will be used as the receiver
// for the covariance data.  Weights cannot be negative.
func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.Dense {
	// This is the matrix version of the two-pass algorithm. It doesn't use the
	// additional floating point error correction that the Covariance function uses
	// to reduce the impact of rounding during centering.

	// TODO(jonlawlor): indicate that the resulting matrix is symmetric, and change
	// the returned type from a *mat.Dense to a *mat.Symmetric.

	r, c := x.Dims()

	if cov == nil {
		cov = mat64.NewDense(c, c, nil)
	} else if covr, covc := cov.Dims(); covr != covc || covc != c {
		panic(mat64.ErrShape)
	}

	var xt mat64.Dense
	xt.TCopy(x)
	// Subtract the mean of each of the columns.
	for i := 0; i < c; i++ {
		v := xt.RawRowView(i)
		// This will panic with ErrShape if len(wts) != len(v), so
		// we don't have to check the size later.
		mean := Mean(v, wts)
		floats.AddConst(-mean, v)
	}

	var n float64
	if wts == nil {

		n = float64(r)

		cov.MulTrans(&xt, false, &xt, true)

		// Scale by the sample size.
		cov.Scale(1/(n-1), cov)
		return cov
	}

	// Multiply by the sqrt of the weights, so that multiplication is symmetric.
	sqrtwts := make([]float64, r)
	for i, w := range wts {
		if w < 0 {
			panic("stat: negative covariance matrix weights")
		}
		sqrtwts[i] = math.Sqrt(w)
	}
	// Weight the rows.
	for i := 0; i < c; i++ {
		v := xt.RawRowView(i)
		floats.Mul(v, sqrtwts)
	}

	// Calculate the normalization factor.
	n = floats.Sum(wts)
	cov.MulTrans(&xt, false, &xt, true)

	// Scale by the sample size.
	cov.Scale(1/(n-1), cov)
	return cov
}
Exemple #10
0
// IID generates a set of independently and identically distributed samples from
// the input distribution.
func IID(batch *mat64.Dense, d distmv.Rander) {
	r, _ := batch.Dims()
	for i := 0; i < r; i++ {
		d.Rand(batch.RawRowView(i))
	}
}