Beispiel #1
0
// Cov returns the covariance between a set of data points based on the current
// GP fit.
func (g *GP) Cov(m *mat64.SymDense, x mat64.Matrix) *mat64.SymDense {
	if m != nil {
		// TODO(btracey): Make this k**
		panic("resuing m not coded")
	}
	// The joint covariance matrix is
	// K(x_*, k_*) - k(x_*, x) k(x,x)^-1 k(x, x*)
	nSamp, nDim := x.Dims()
	if nDim != g.inputDim {
		panic(badInputLength)
	}

	// Compute K(x_*, x) K(x, x)^-1 K(x, x_*)
	kstar := g.formKStar(x)
	var tmp mat64.Dense
	tmp.SolveCholesky(g.cholK, kstar)
	var tmp2 mat64.Dense
	tmp2.Mul(kstar.T(), &tmp)

	// Compute k(x_*, x_*) and perform the subtraction.
	kstarstar := mat64.NewSymDense(nSamp, nil)
	for i := 0; i < nSamp; i++ {
		for j := i; j < nSamp; j++ {
			v := g.kernel.Distance(mat64.Row(nil, i, x), mat64.Row(nil, j, x))
			if i == j {
				v += g.noise
			}
			kstarstar.SetSym(i, j, v-tmp2.At(i, j))
		}
	}
	return kstarstar
}
Beispiel #2
0
func toFeatureNodes(X *mat64.Dense) []*C.struct_feature_node {
	featureNodes := []*C.struct_feature_node{}

	nRows, nCols := X.Dims()

	for i := 0; i < nRows; i++ {
		row := []C.struct_feature_node{}
		for j := 0; j < nCols; j++ {
			val := X.At(i, j)
			if val != 0 {
				row = append(row, C.struct_feature_node{
					index: C.int(j + 1),
					value: C.double(val),
				})
			}
		}

		row = append(row, C.struct_feature_node{
			index: C.int(-1),
			value: C.double(0),
		})
		featureNodes = append(featureNodes, &row[0])
	}

	return featureNodes
}
Beispiel #3
0
func rowSum(matrix *mat64.Dense, rowId int) float64 {
	_, col := matrix.Dims()
	sum := float64(0)
	for c := 0; c < col; c++ {
		sum += matrix.At(rowId, c)
	}
	return sum
}
Beispiel #4
0
func colSum(matrix *mat64.Dense, colId int) float64 {
	row, _ := matrix.Dims()
	sum := float64(0)
	for r := 0; r < row; r++ {
		sum += matrix.At(r, colId)
	}
	return sum
}
Beispiel #5
0
// SetScale sets a linear scale between 0 and 1. If no data
// points. If the minimum and maximum value are identical in
// a dimension, the minimum and maximum values will be set to
// that value +/- 0.5 and a
func (l *Linear) SetScale(data *mat64.Dense) error {

	rows, dim := data.Dims()
	if rows < 2 {
		return errors.New("scale: less than two inputs")
	}

	// Generate data for min and max if they don't already exist
	if len(l.Min) < dim {
		l.Min = make([]float64, dim)
	} else {
		l.Min = l.Min[0:dim]
	}
	if len(l.Max) < dim {
		l.Max = make([]float64, dim)
	} else {
		l.Max = l.Max[0:dim]
	}
	for i := range l.Min {
		l.Min[i] = math.Inf(1)
	}
	for i := range l.Max {
		l.Max[i] = math.Inf(-1)
	}
	// Find the minimum and maximum in each dimension
	for i := 0; i < rows; i++ {
		for j := 0; j < dim; j++ {
			val := data.At(i, j)
			if val < l.Min[j] {
				l.Min[j] = val
			}
			if val > l.Max[j] {
				l.Max[j] = val
			}
		}
	}
	l.Scaled = true
	l.Dim = dim

	var unifError *UniformDimension

	// Check that the maximum and minimum values are not identical
	for i := range l.Min {
		if l.Min[i] == l.Max[i] {
			if unifError == nil {
				unifError = &UniformDimension{}
			}
			unifError.Dims = append(unifError.Dims, i)
			l.Min[i] -= 0.5
			l.Max[i] += 0.5
		}
	}
	if unifError != nil {
		return unifError
	}
	return nil
}
Beispiel #6
0
// UpdateBias computes B = B + l.E and updates the bias weights
// from a size * 1 back-propagated error vector.
func (n *Network) UpdateBias(err *mat64.Dense, learnRate float64) {

	for i, b := range n.biases {
		if i < n.input {
			continue
		}
		n.biases[i] = b + err.At(i, 0)*learnRate
	}

}
Beispiel #7
0
func regionQuery(p int, ret *big.Int, dist *mat64.Dense, eps float64) *big.Int {
	rows, _ := dist.Dims()
	// Return any points within the Eps neighbourhood
	for i := 0; i < rows; i++ {
		if dist.At(p, i) <= eps {
			ret = ret.SetBit(ret, i, 1) // Mark as neighbour
		}
	}
	return ret
}
Beispiel #8
0
// Convert *mat64.Dense to Mat
func Mat64ToGcvMat(mat *mat64.Dense) GcvMat {
	row, col := mat.Dims()

	rawData := NewGcvFloat64Vector(int64(row * col))

	for i := 0; i < row; i++ {
		for j := 0; j < col; j++ {
			rawData.Set(i*col+j, mat.At(i, j))
		}
	}

	return Mat64ToGcvMat_(row, col, rawData)
}
Beispiel #9
0
// SetScale Finds the appropriate scaling of the data such that the dataset has
// a mean of 0 and a variance of 1. If the standard deviation of any of
// the data is zero (all of the entries have the same value),
// the standard deviation is set to 1.0 and a UniformDimension error is
// returned
func (n *Normal) SetScale(data *mat64.Dense) error {

	rows, dim := data.Dims()
	if rows < 2 {
		return errors.New("scale: less than two inputs")
	}

	// Need to find the mean input and the std of the input
	mean := make([]float64, dim)
	for i := 0; i < rows; i++ {
		for j := 0; j < dim; j++ {
			mean[j] += data.At(i, j)
		}
	}
	for i := range mean {
		mean[i] /= float64(rows)
	}

	// TODO: Replace this with something that has better numerical properties
	std := make([]float64, dim)
	for i := 0; i < rows; i++ {
		for j := 0; j < dim; j++ {
			diff := data.At(i, j) - mean[j]
			std[j] += diff * diff
		}
	}
	for i := range std {
		std[i] /= float64(rows)
		std[i] = math.Sqrt(std[i])
	}
	n.Scaled = true
	n.Dim = dim

	var unifError *UniformDimension
	for i := range std {
		if std[i] == 0 {
			if unifError == nil {
				unifError = &UniformDimension{}
			}
			unifError.Dims = append(unifError.Dims, i)
			std[i] = 1.0
		}
	}

	n.Mu = mean
	n.Sigma = std
	if unifError != nil {
		return unifError
	}
	return nil
}
Beispiel #10
0
func (nb *NaiveBayes) Fit(X, y *mat64.Dense) {
	nSamples, nFeatures := X.Dims()

	tokensTotal := 0
	langsTotal, _ := y.Dims()

	langsCount := histogram(y.Col(nil, 0))

	tokensTotalPerLang := map[int]int{}
	tokenCountPerLang := map[int](map[int]int){}

	for i := 0; i < nSamples; i++ {
		langIdx := int(y.At(i, 0))

		for j := 0; j < nFeatures; j++ {
			tokensTotal += int(X.At(i, j))
			tokensTotalPerLang[langIdx] += int(X.At(i, j))

			if _, ok := tokenCountPerLang[langIdx]; !ok {
				tokenCountPerLang[langIdx] = map[int]int{}
			}
			tokenCountPerLang[langIdx][j] += int(X.At(i, j))
		}
	}

	params := nbParams{
		TokensTotal:        tokensTotal,
		LangsTotal:         langsTotal,
		LangsCount:         langsCount,
		TokensTotalPerLang: tokensTotalPerLang,
		TokenCountPerLang:  tokenCountPerLang,
	}

	nb.params = params
}
Beispiel #11
0
func StackConstr(low, A, up *mat64.Dense) (stackA, b *mat64.Dense, ranges []float64) {
	neglow := &mat64.Dense{}
	neglow.Scale(-1, low)
	b = &mat64.Dense{}
	b.Stack(up, neglow)

	negA := &mat64.Dense{}
	negA.Scale(-1, A)
	stackA = &mat64.Dense{}
	stackA.Stack(A, negA)

	// capture the range of each constraint from A because this information is
	// lost when converting from "low <= Ax <= up" via stacking to "Ax <= up".
	m, _ := A.Dims()
	ranges = make([]float64, m, 2*m)
	for i := 0; i < m; i++ {
		ranges[i] = up.At(i, 0) - low.At(i, 0)
		if ranges[i] == 0 {
			if up.At(i, 0) == 0 {
				ranges[i] = 1
			} else {
				ranges[i] = up.At(i, 0)
			}
		}
	}
	ranges = append(ranges, ranges...)

	return stackA, b, ranges
}
Beispiel #12
0
func (lr *LinearRegression) Fit(inst *base.Instances) error {
	if inst.Rows < inst.GetAttributeCount() {
		return NotEnoughDataError
	}

	// Split into two matrices, observed results (dependent variable y)
	// and the explanatory variables (X) - see http://en.wikipedia.org/wiki/Linear_regression
	observed := mat64.NewDense(inst.Rows, 1, nil)
	explVariables := mat64.NewDense(inst.Rows, inst.GetAttributeCount(), nil)

	for i := 0; i < inst.Rows; i++ {
		observed.Set(i, 0, inst.Get(i, inst.ClassIndex)) // Set observed data

		for j := 0; j < inst.GetAttributeCount(); j++ {
			if j == 0 {
				// Set intercepts to 1.0
				// Could / should be done better: http://www.theanalysisfactor.com/interpret-the-intercept/
				explVariables.Set(i, 0, 1.0)
			} else {
				explVariables.Set(i, j, inst.Get(i, j-1))
			}
		}
	}

	n := inst.GetAttributeCount()
	qr := mat64.QR(explVariables)
	q := qr.Q()
	reg := qr.R()

	var transposed, qty mat64.Dense
	transposed.TCopy(q)
	qty.Mul(&transposed, observed)

	regressionCoefficients := make([]float64, n)
	for i := n - 1; i >= 0; i-- {
		regressionCoefficients[i] = qty.At(i, 0)
		for j := i + 1; j < n; j++ {
			regressionCoefficients[i] -= regressionCoefficients[j] * reg.At(i, j)
		}
		regressionCoefficients[i] /= reg.At(i, i)
	}

	lr.disturbance = regressionCoefficients[0]
	lr.regressionCoefficients = regressionCoefficients[1:]
	lr.fitted = true

	return nil
}
Beispiel #13
0
// Manhattan distance, also known as L1 distance.
// Compute sum of absolute values of elements.
func (self *Manhattan) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
	r1, c1 := vectorX.Dims()
	r2, c2 := vectorY.Dims()
	if r1 != r2 || c1 != c2 {
		panic(mat64.ErrShape)
	}

	result := .0

	for i := 0; i < r1; i++ {
		for j := 0; j < c1; j++ {
			result += math.Abs(vectorX.At(i, j) - vectorY.At(i, j))
		}
	}
	return result
}
Beispiel #14
0
// wrapper for predict, assumes all inputs are correct
func predict(input []float64, features *mat64.Dense, b []float64, featureWeights *mat64.Dense, output []float64) {
	for i := range output {
		output[i] = 0
	}

	nFeatures, _ := features.Dims()
	_, outputDim := featureWeights.Dims()

	sqrt2OverD := math.Sqrt(2.0 / float64(nFeatures))
	//for i, feature := range features {
	for i := 0; i < nFeatures; i++ {
		z := computeZ(input, features.RowView(i), b[i], sqrt2OverD)
		for j := 0; j < outputDim; j++ {
			output[j] += z * featureWeights.At(i, j)
		}
	}
}
Beispiel #15
0
func (self *Chebyshev) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
	r1, c1 := vectorX.Dims()
	r2, c2 := vectorY.Dims()
	if r1 != r2 || c1 != c2 {
		panic(mat64.ErrShape)
	}

	max := float64(0)

	for i := 0; i < r1; i++ {
		for j := 0; j < c1; j++ {
			max = math.Max(max, math.Abs(vectorX.At(i, j)-vectorY.At(i, j)))
		}
	}

	return max
}
Beispiel #16
0
func cost(y, yHat *mat64.Dense) float64 {
	// TODO add runtime check that y and yHat have the same dimensions
	rows, cols := y.Dims()
	errorSquares := []float64{}
	for row := 0; row < rows; row++ {
		for col := 0; col < cols; col++ {
			yValue := y.At(row, col)
			yHatValue := yHat.At(row, col)
			error := 0.5 * math.Pow((yValue-yHatValue), 2)
			errorSquares = append(errorSquares, error)
		}
	}
	var errorSum float64
	for _, error := range errorSquares {
		errorSum += error
	}
	return errorSum
}
Beispiel #17
0
// UpdateWeights takes an output size * 1 output vector and a size * 1
// back-propagated error vector, as well as a learnRate and updates
// the internal weights matrix.
func (n *Network) UpdateWeights(out, err *mat64.Dense, learnRate float64) {

	if n.origWeights == nil {
		n.origWeights = mat64.DenseCopyOf(n.weights)
	}

	// Multiply that by the learning rate
	mulFunc := func(target, source int, v float64) float64 {
		if target == source {
			return v
		}
		if math.Abs(n.origWeights.At(target, source)) > 0.005 {
			return v + learnRate*out.At(source, 0)*err.At(target, 0)
		}
		return 0.00
	}

	// Add that to the weights
	n.weights.Apply(mulFunc, n.weights)
}
Beispiel #18
0
// covToCorr converts a covariance matrix to a correlation matrix.
func covToCorr(c *mat64.Dense) {

	// TODO(jonlawlor): use a *mat64.Symmetric as input.

	r, _ := c.Dims()

	s := make([]float64, r)
	for i := 0; i < r; i++ {
		s[i] = 1 / math.Sqrt(c.At(i, i))
	}
	for i, sx := range s {
		row := c.RawRowView(i)
		for j, sy := range s {
			if i == j {
				// Ensure that the diagonal has exactly ones.
				row[j] = 1
				continue
			}
			row[j] *= sx
			row[j] *= sy
		}
	}
}
Beispiel #19
0
func (c *Cranberra) Distance(vectorX *mat64.Dense, vectorY *mat64.Dense) float64 {
	r1, c1 := vectorX.Dims()
	r2, c2 := vectorY.Dims()
	if r1 != r2 || c1 != c2 {
		panic(mat64.ErrShape)
	}

	sum := .0

	for i := 0; i < r1; i++ {
		for j := 0; j < c1; j++ {
			p1 := vectorX.At(i, j)
			p2 := vectorY.At(i, j)

			num := math.Abs(p1 - p2)
			denom := math.Abs(p1) + math.Abs(p2)

			sum += cranberraDistanceStep(num, denom)
		}
	}

	return sum
}
Beispiel #20
0
func testScaling(t *testing.T, u Scaler, data *mat64.Dense, scaledData *mat64.Dense, name string) {
	// Copy data

	r, c := data.Dims()
	origData := mat64.NewDense(r, c, nil)
	for i := 0; i < r; i++ {
		for j := 0; j < c; j++ {
			origData.Set(i, j, data.At(i, j))
		}
	}

	err := ScaleData(u, data)
	if err != nil {
		t.Errorf("Error found in ScaleData for case " + name + ": " + err.Error())
	}
	/*
		for i := range data {
			if !floats.EqualApprox(data[i], scaledData[i], 1e-14) {
				t.Errorf("Improper scaling for case"+name+". Expected: %v, Found: %v", data[i], scaledData[i])
			}
		}
	*/

	if !data.EqualsApprox(scaledData, 1e-14) {
		t.Errorf("Improper scaling for case"+name+". Expected: %v, Found: %v", scaledData, data)
	}

	err = UnscaleData(u, data)
	if err != nil {
		t.Errorf("Error found in UnscaleData for case " + name + ": " + err.Error())
	}

	if !data.EqualsApprox(origData, 1e-14) {
		t.Errorf("Improper unscaling for case"+name+". Expected: %v, Found: %v", origData, data)
	}
}
Beispiel #21
0
/* Calculates the appropriate power of the next-even transformation matrix
 * and returns the coefficients in the linear equation it represents.
 */
func calcTMat(n int) (aa, ab, ba, bb int) {
	t1Mat := mat64.NewDense(2, 2, []float64{3, 2, 2, 1})
	tMat := new(mat64.Dense)
	tMat.Pow(t1Mat, n)

	return round(tMat.At(0, 0)),
		round(tMat.At(0, 1)),
		round(tMat.At(1, 0)),
		round(tMat.At(1, 1))
}
Beispiel #22
0
func (lr *LinearRegression) Fit(inst base.FixedDataGrid) error {

	// Retrieve row size
	_, rows := inst.Size()

	// Validate class Attribute count
	classAttrs := inst.AllClassAttributes()
	if len(classAttrs) != 1 {
		return fmt.Errorf("Only 1 class variable is permitted")
	}
	classAttrSpecs := base.ResolveAttributes(inst, classAttrs)

	// Retrieve relevant Attributes
	allAttrs := base.NonClassAttributes(inst)
	attrs := make([]base.Attribute, 0)
	for _, a := range allAttrs {
		if _, ok := a.(*base.FloatAttribute); ok {
			attrs = append(attrs, a)
		}
	}

	cols := len(attrs) + 1

	if rows < cols {
		return NotEnoughDataError
	}

	// Retrieve relevant Attribute specifications
	attrSpecs := base.ResolveAttributes(inst, attrs)

	// Split into two matrices, observed results (dependent variable y)
	// and the explanatory variables (X) - see http://en.wikipedia.org/wiki/Linear_regression
	observed := mat64.NewDense(rows, 1, nil)
	explVariables := mat64.NewDense(rows, cols, nil)

	// Build the observed matrix
	inst.MapOverRows(classAttrSpecs, func(row [][]byte, i int) (bool, error) {
		val := base.UnpackBytesToFloat(row[0])
		observed.Set(i, 0, val)
		return true, nil
	})

	// Build the explainatory variables
	inst.MapOverRows(attrSpecs, func(row [][]byte, i int) (bool, error) {
		// Set intercepts to 1.0
		explVariables.Set(i, 0, 1.0)
		for j, r := range row {
			explVariables.Set(i, j+1, base.UnpackBytesToFloat(r))
		}
		return true, nil
	})

	n := cols
	qr := new(mat64.QR)
	qr.Factorize(explVariables)
	var q, reg mat64.Dense
	q.QFromQR(qr)
	reg.RFromQR(qr)

	var transposed, qty mat64.Dense
	transposed.Clone(q.T())
	qty.Mul(&transposed, observed)

	regressionCoefficients := make([]float64, n)
	for i := n - 1; i >= 0; i-- {
		regressionCoefficients[i] = qty.At(i, 0)
		for j := i + 1; j < n; j++ {
			regressionCoefficients[i] -= regressionCoefficients[j] * reg.At(i, j)
		}
		regressionCoefficients[i] /= reg.At(i, i)
	}

	lr.disturbance = regressionCoefficients[0]
	lr.regressionCoefficients = regressionCoefficients[1:]
	lr.fitted = true
	lr.attrs = attrs
	lr.cls = classAttrs[0]
	return nil
}
Beispiel #23
0
// ConditionNormal returns the Normal distribution that is the receiver conditioned
// on the input evidence. The returned multivariate normal has dimension
// n - len(observed), where n is the dimension of the original receiver. The updated
// mean and covariance are
//  mu = mu_un + sigma_{ob,un}^T * sigma_{ob,ob}^-1 (v - mu_ob)
//  sigma = sigma_{un,un} - sigma_{ob,un}^T * sigma_{ob,ob}^-1 * sigma_{ob,un}
// where mu_un and mu_ob are the original means of the unobserved and observed
// variables respectively, sigma_{un,un} is the unobserved subset of the covariance
// matrix, sigma_{ob,ob} is the observed subset of the covariance matrix, and
// sigma_{un,ob} are the cross terms. The elements of x_2 have been observed with
// values v. The dimension order is preserved during conditioning, so if the value
// of dimension 1 is observed, the returned normal represents dimensions {0, 2, ...}
// of the original Normal distribution.
//
// ConditionNormal returns {nil, false} if there is a failure during the update.
// Mathematically this is impossible, but can occur with finite precision arithmetic.
func (n *Normal) ConditionNormal(observed []int, values []float64, src *rand.Rand) (*Normal, bool) {
	if len(observed) == 0 {
		panic("normal: no observed value")
	}
	if len(observed) != len(values) {
		panic("normal: input slice length mismatch")
	}
	for _, v := range observed {
		if v < 0 || v >= n.Dim() {
			panic("normal: observed value out of bounds")
		}
	}

	ob := len(observed)
	unob := n.Dim() - ob
	obMap := make(map[int]struct{})
	for _, v := range observed {
		if _, ok := obMap[v]; ok {
			panic("normal: observed dimension occurs twice")
		}
		obMap[v] = struct{}{}
	}
	if len(observed) == n.Dim() {
		panic("normal: all dimensions observed")
	}
	unobserved := make([]int, 0, unob)
	for i := 0; i < n.Dim(); i++ {
		if _, ok := obMap[i]; !ok {
			unobserved = append(unobserved, i)
		}
	}
	mu1 := make([]float64, unob)
	for i, v := range unobserved {
		mu1[i] = n.mu[v]
	}
	mu2 := make([]float64, ob) // really v - mu2
	for i, v := range observed {
		mu2[i] = values[i] - n.mu[v]
	}

	n.setSigma()

	var sigma11, sigma22 mat64.SymDense
	sigma11.SubsetSym(n.sigma, unobserved)
	sigma22.SubsetSym(n.sigma, observed)

	sigma21 := mat64.NewDense(ob, unob, nil)
	for i, r := range observed {
		for j, c := range unobserved {
			v := n.sigma.At(r, c)
			sigma21.Set(i, j, v)
		}
	}

	var chol mat64.Cholesky
	ok := chol.Factorize(&sigma22)
	if !ok {
		return nil, ok
	}

	// Compute sigma_{2,1}^T * sigma_{2,2}^-1 (v - mu_2).
	v := mat64.NewVector(ob, mu2)
	var tmp, tmp2 mat64.Vector
	err := tmp.SolveCholeskyVec(&chol, v)
	if err != nil {
		return nil, false
	}
	tmp2.MulVec(sigma21.T(), &tmp)

	// Compute sigma_{2,1}^T * sigma_{2,2}^-1 * sigma_{2,1}.
	// TODO(btracey): Should this be a method of SymDense?
	var tmp3, tmp4 mat64.Dense
	err = tmp3.SolveCholesky(&chol, sigma21)
	if err != nil {
		return nil, false
	}
	tmp4.Mul(sigma21.T(), &tmp3)

	for i := range mu1 {
		mu1[i] += tmp2.At(i, 0)
	}

	// TODO(btracey): If tmp2 can constructed with a method, then this can be
	// replaced with SubSym.
	for i := 0; i < len(unobserved); i++ {
		for j := i; j < len(unobserved); j++ {
			v := sigma11.At(i, j)
			sigma11.SetSym(i, j, v-tmp4.At(i, j))
		}
	}
	return NewNormal(mu1, &sigma11, src)
}