예제 #1
0
파일: km_test.go 프로젝트: neevor/goxmeans
func TestPointProb(t *testing.T) {
	R := 10010.0
	Ri := 100.0
	M := 2.0
	V := 20.000000

	point := matrix.MakeDenseMatrix([]float64{5, 7},
		1, 2)

	mean := matrix.MakeDenseMatrix([]float64{6, 8},
		1, 2)

	var ed EuclidDist

	//	pointProb(R, Ri, M int, V float64, point, mean *matrix.DenseMatrix, measurer VectorMeasurer) (float64, error)
	pp := pointProb(R, Ri, M, V, point, mean, ed)

	E := 0.011473
	epsilon := .000001
	na := math.Nextafter(E, E+1)
	diff := math.Abs(pp - na)

	if diff > epsilon {
		t.Errorf("TestPointProb: expected %f but received %f.  The difference %f exceeds epsilon %f", E, pp, diff, epsilon)
	}
}
예제 #2
0
파일: km_test.go 프로젝트: neevor/goxmeans
func TestRandCentroids(t *testing.T) {
	rows := 3
	cols := 3
	k := 2
	data := []float64{1, 2.0, 3, -4.945, 5, -6.1, 7, 8, 9}
	mat := matrix.MakeDenseMatrix(data, rows, cols)
	choosers := []CentroidChooser{randCentroids{}, DataCentroids{}, EllipseCentroids{0.5}}
	for _, cc := range choosers {
		centroids := cc.ChooseCentroids(mat, k)

		r, c := centroids.GetSize()
		if r != k || c != cols {
			t.Errorf("Returned centroid was %dx%d instead of %dx%d", r, c, rows, cols)
		}
	}

	// This section of the test places ellipse centroids at fraction 1 on a 2x2 box,
	// and asserts that there is a distance of 2 between them (ie, they are diametrically opposite)
	data2 := []float64{1.0, 1.0, -1.0, -1.0}
	mat2 := matrix.MakeDenseMatrix(data2, 2, 2)
	newCentroids := EllipseCentroids{1.0}.ChooseCentroids(mat2, 2)
	dist := EuclidDist{}.CalcDist(newCentroids.GetRowVector(0), newCentroids.GetRowVector(1))
	expectedEd := 2.0 //expected value
	epsilon := .000001
	diff := math.Abs(dist - expectedEd)
	if diff > epsilon {
		t.Errorf("EuclidDist: excpected %f but received %f.  The difference %f exceeds epsilon %f", expectedEd, dist, diff, epsilon)
	}

}
예제 #3
0
func BenchmarkEuclidDist(b *testing.B) {
	var ed EuclidDist
	rows := 1
	columns := 2

	centroid := matrix.MakeDenseMatrix([]float64{4.6, 9.5}, rows, columns)
	point := matrix.MakeDenseMatrix([]float64{3.0, 4.1}, rows, columns)
	for i := 0; i < b.N; i++ {
		_, _ = ed.CalcDist(centroid, point)
	}
}
예제 #4
0
파일: km_test.go 프로젝트: neevor/goxmeans
func TestComputeCentroid(t *testing.T) {
	empty := matrix.Zeros(0, 0)
	_, err := ComputeCentroid(empty)
	if err == nil {
		t.Errorf("Did not raise error on empty matrix")
	}
	twoByTwo := matrix.Ones(2, 2)
	centr, err := ComputeCentroid(twoByTwo)
	if err != nil {
		t.Errorf("Could not compute centroid, err=%v", err)
	}
	expected := matrix.MakeDenseMatrix([]float64{1.0, 1.0}, 1, 2)
	if !matrix.Equals(centr, expected) {
		t.Errorf("Incorrect centroid: was %v, should have been %v", expected, centr)
	}
	twoByTwo.Set(0, 0, 3.0)
	expected.Set(0, 0, 2.0)
	centr, err = ComputeCentroid(twoByTwo)
	if err != nil {
		t.Errorf("Could not compute centroid, err=%v", err)
	}
	if !matrix.Equals(centr, expected) {
		t.Errorf("Incorrect centroid: was %v, should have been %v", expected, centr)
	}
}
예제 #5
0
func TestManhattanDist(t *testing.T) {
	var md ManhattanDist
	rows := 1
	columns := 2

	a := matrix.MakeDenseMatrix([]float64{4.6, 9.5}, rows, columns)
	b := matrix.MakeDenseMatrix([]float64{3.0, 4.1}, rows, columns)

	calcMd, err := md.CalcDist(a, b)
	if err != nil {
		t.Errorf("ManhattandDist: returned an error.  err=%v", err)
	}

	// 1.6 + 5.4 = 7.0
	if calcMd != float64(7.0) {
		t.Errorf("ManhattanDist: should be 7.0, but returned %f", calcMd)
	}
}
예제 #6
0
func TestGetBoundaries(t *testing.T) {
	// first test with single point
	rows := 1
	columns := 2

	a := matrix.MakeDenseMatrix([]float64{4.6, 9.5}, rows, columns)
	xmin, xmax, ymin, ymax := GetBoundaries(a)
	if xmin != 4.6 || xmax != 4.6 || ymin != 9.5 || ymax != 9.5 {
		t.Errorf("GetBoundaries failed on single item matrix")
	}

	b := matrix.MakeDenseMatrix([]float64{3.0, 4.1}, rows, columns)
	c, err := a.Stack(b)
	if err != nil {
		t.Errorf(err.Error())
	}
	xmin, xmax, ymin, ymax = GetBoundaries(c)
	if xmin != 3.0 || xmax != 4.6 || ymin != 4.1 || ymax != 9.5 {
		t.Errorf("GetBoundaries failed on two item matrix")
	}
}
예제 #7
0
func TestVariance(t *testing.T) {
	points := matrix.MakeDenseMatrix([]float64{1.2, 2.1,
		3.3, 2.2,
		4.1, 3.2,
		5.3, 4.1},
		4, 2)

	centroid := matrix.MakeDenseMatrix([]float64{6.5, 4},
		1, 2)

	var ed matutil.EuclidDist

	v, err := variance(points, centroid, ed)
	if err != nil {
		t.Errorf("TestVaricance:variance returned error %v.", err)
	}
	//fmt.Printf("v=%f\n",v)
	if v != 8.452500 {
		t.Errorf("TestVariance: variance should be 8.452500 but received %f", v)
	}
}
예제 #8
0
func TestEuclidDist(t *testing.T) {
	var ed EuclidDist
	rows := 1
	columns := 2

	centroid := matrix.MakeDenseMatrix([]float64{4.6, 9.5}, rows, columns)
	point := matrix.MakeDenseMatrix([]float64{3.0, 4.1}, rows, columns)
	calcEd, err := ed.CalcDist(centroid, point)
	if err != nil {
		t.Errorf("EuclidDist: returned an error.  err=%v", err)
	}

	expectedEd := 5.632051 //expected value
	epsilon := .000001

	na := math.Nextafter(expectedEd, expectedEd+1)
	diff := math.Abs(calcEd - na)

	if diff > epsilon {
		t.Errorf("EuclidDist: excpected %f but received %f.  The difference %f exceeds epsilon %f", expectedEd, calcEd, diff, epsilon)
	}
}
예제 #9
0
파일: km_test.go 프로젝트: neevor/goxmeans
func TestKmeans(t *testing.T) {
	//	datapoints, err := Load("./testSetSmall.txt")
	//	if err != nil {
	//		t.Errorf("Load returned: %v", err)
	//		return
	//	}
	var cc DataCentroids
	var ed EuclidDist
	datapoints := matrix.MakeDenseMatrix([]float64{2, 3, 3, 2, 3, 4, 4, 3, 8, 7, 9, 6, 9, 8, 10, 7, 3, 5}, 9, 2)
	centroids := cc.ChooseCentroids(datapoints, 2)

	model, err := kmeans(datapoints, centroids, ed)
	if err != nil {
		t.Errorf("Kmeans returned: %v", err)
		return
	}

	if len(model.Clusters) != 2 {
		t.Errorf("TestKemansp: expected 2 clusters and received %d.", len(model.Clusters))
	}

	variances := make([]float64, 2)
	for i, clust := range model.Clusters {
		variances[i] = clust.Variance
	}

	// Insure that all points are referenced.
	pref := 0
	for _, clust := range model.Clusters {
		pref += clust.Numpoints()
	}

	cpoints, _ := datapoints.GetSize()
	if pref != cpoints {
		t.Errorf("Points referenced in clusters=%d, should be %d\n", pref, cpoints)
		//fmt.Printf("%d: points=%v\n",i, clust.Points)
		//fmt.Printf("%d: centroid=%v\n", i, clust.Centroid)
		//fmt.Printf("%d: variance:%f\n\n", i, clust.Variance)
	}
}
예제 #10
0
파일: km_test.go 프로젝트: neevor/goxmeans
import (
	"bufio"
	"fmt"
	"github.com/bobhancock/gomatrix/matrix"
	"math"
	"os"
	"testing"
)

var DATAPOINTS = matrix.MakeDenseMatrix([]float64{3.0, 2.0,
	-3.0, 2.0,
	0.355083, -3.376585,
	1.852435, 3.547351,
	-2.078973, 2.552013,
	-0.993756, -0.884433,
	2.682252, 4.007573,
	-3.087776, 2.878713,
	-1.565978, -1.256985,
	2.441611, 0.444826,
	10.29, 20.6594,
	12.93, 23.3988,
	120.1, 202.18}, 13, 2)

var CENTROIDS = matrix.MakeDenseMatrix([]float64{4.5, 11.3,
	6.1, 12.0,
	12.1, 9.6}, 3, 2)

var DATAPOINTS_D = matrix.MakeDenseMatrix([]float64{2, 3, 3, 2, 3, 4, 4, 3, 8, 7, 9, 6, 9, 8, 10, 7}, 8, 2)
var CENTROIDS_D = matrix.MakeDenseMatrix([]float64{6, 7}, 1, 2)

var DATAPOINTS_D0 = matrix.MakeDenseMatrix([]float64{2, 3, 3, 2, 3, 4, 4, 3}, 4, 2)
예제 #11
0
파일: km.go 프로젝트: postfix/goxmeans
// Load loads a tab delimited text file of floats into a matrix.
func Load(fname, sep string) (*matrix.DenseMatrix, error) {
	z := matrix.Zeros(1, 1)

	fp, err := os.Open(fname)
	if err != nil {
		return z, err
	}
	defer fp.Close()

	data := make([]float64, 0)
	cols := 0
	r := bufio.NewReader(fp)
	linenum := 0
	eof := false

	for !eof {
		var line string
		var buf []byte

		buf, _, err := r.ReadLine()
		line = string(buf)

		if err == io.EOF {
			err = nil
			eof = true
			break
		} else if err != nil {
			return z, errors.New(fmt.Sprintf("goxmean.Load: reading linenum %d: %v", linenum, err))
		}

		l1 := strings.TrimRight(line, "\n")
		l := strings.Split(l1, sep)

		// If each line does not have the same number of columns then error
		if linenum == 0 {
			cols = len(l)
		}

		if len(l) != cols {
			return z, errors.New(fmt.Sprintf("Load(): linenum %d has %d columns.  It should have %d columns.", linenum, len(line), cols))
		}

		if len(l) < 2 {
			return z, errors.New(fmt.Sprintf("Load(): linenum %d has only %d elements", linenum, len(line)))
		}

		linenum++

		// Convert the strings to  float64 and build up the slice t by appending.
		t := make([]float64, 0)

		for _, v := range l {
			v = strings.TrimSpace(v)
			f, err := strconv.ParseFloat(v, 64)
			if err != nil {
				return z, errors.New(fmt.Sprintf("goxmeans.Load: cannot convert value %s to float64.", v))
			}
			t = append(t, f)
		}
		data = append(data, t...)
	}
	mat := matrix.MakeDenseMatrix(data, linenum, cols)
	//fmt.Println(time.Now())n // flag for debugging
	return mat, nil
}
예제 #12
0
// Load loads a tab delimited text file of floats into a slice.
// Assume last column is the target.
// For now, we limit ourselves to two columns
func Load(fname string) (*matrix.DenseMatrix, error) {
	datamatrix := matrix.Zeros(1, 1)
	data := make([]float64, 2048)
	idx := 0

	fp, err := os.Open(fname)
	if err != nil {
		return datamatrix, err
	}
	defer fp.Close()

	r := bufio.NewReader(fp)
	linenum := 1
	eof := false
	for !eof {
		var line string
		var buf []byte
		//		line, err := r.ReadString('\n')
		buf, _, err := r.ReadLine()
		line = string(buf)
		//	fmt.Printf("linenum=%d buf=%v line=%v\n",linenum,buf, line)

		if err == io.EOF {
			err = nil
			eof = true
			break
		} else if err != nil {
			return datamatrix, errors.New(fmt.Sprintf("means.Load: reading linenum %d: %v", linenum, err))
		}

		linenum++
		l1 := strings.TrimRight(line, "\n")
		l := strings.Split(l1, "\t")

		if len(l) < 2 {
			return datamatrix, errors.New(fmt.Sprintf("means.Load: linenum %d has only %d elements", linenum, len(line)))
		}

		// for now assume 2 dimensions only
		f0, err := Atof64(string(l[0]))
		if err != nil {
			return datamatrix, errors.New(fmt.Sprintf("means.Load: cannot convert f0 %s to float64.", l[0]))
		}
		f1, err := Atof64(string(l[1]))

		if err != nil {
			return datamatrix, errors.New(fmt.Sprintf("means.Load: cannot convert f1 %s to float64.", l[1]))
		}

		if linenum >= len(data) {
			data = append(data, f0, f1)
		} else {
			data[idx] = f0
			idx++
			data[idx] = f1
			idx++
		}
	}
	numcols := 2
	datamatrix = matrix.MakeDenseMatrix(data, linenum-1, numcols)
	return datamatrix, nil
}