func TestPointProb(t *testing.T) { R := 10010.0 Ri := 100.0 M := 2.0 V := 20.000000 point := matrix.MakeDenseMatrix([]float64{5, 7}, 1, 2) mean := matrix.MakeDenseMatrix([]float64{6, 8}, 1, 2) var ed EuclidDist // pointProb(R, Ri, M int, V float64, point, mean *matrix.DenseMatrix, measurer VectorMeasurer) (float64, error) pp := pointProb(R, Ri, M, V, point, mean, ed) E := 0.011473 epsilon := .000001 na := math.Nextafter(E, E+1) diff := math.Abs(pp - na) if diff > epsilon { t.Errorf("TestPointProb: expected %f but received %f. The difference %f exceeds epsilon %f", E, pp, diff, epsilon) } }
func TestRandCentroids(t *testing.T) { rows := 3 cols := 3 k := 2 data := []float64{1, 2.0, 3, -4.945, 5, -6.1, 7, 8, 9} mat := matrix.MakeDenseMatrix(data, rows, cols) choosers := []CentroidChooser{randCentroids{}, DataCentroids{}, EllipseCentroids{0.5}} for _, cc := range choosers { centroids := cc.ChooseCentroids(mat, k) r, c := centroids.GetSize() if r != k || c != cols { t.Errorf("Returned centroid was %dx%d instead of %dx%d", r, c, rows, cols) } } // This section of the test places ellipse centroids at fraction 1 on a 2x2 box, // and asserts that there is a distance of 2 between them (ie, they are diametrically opposite) data2 := []float64{1.0, 1.0, -1.0, -1.0} mat2 := matrix.MakeDenseMatrix(data2, 2, 2) newCentroids := EllipseCentroids{1.0}.ChooseCentroids(mat2, 2) dist := EuclidDist{}.CalcDist(newCentroids.GetRowVector(0), newCentroids.GetRowVector(1)) expectedEd := 2.0 //expected value epsilon := .000001 diff := math.Abs(dist - expectedEd) if diff > epsilon { t.Errorf("EuclidDist: excpected %f but received %f. The difference %f exceeds epsilon %f", expectedEd, dist, diff, epsilon) } }
func BenchmarkEuclidDist(b *testing.B) { var ed EuclidDist rows := 1 columns := 2 centroid := matrix.MakeDenseMatrix([]float64{4.6, 9.5}, rows, columns) point := matrix.MakeDenseMatrix([]float64{3.0, 4.1}, rows, columns) for i := 0; i < b.N; i++ { _, _ = ed.CalcDist(centroid, point) } }
func TestComputeCentroid(t *testing.T) { empty := matrix.Zeros(0, 0) _, err := ComputeCentroid(empty) if err == nil { t.Errorf("Did not raise error on empty matrix") } twoByTwo := matrix.Ones(2, 2) centr, err := ComputeCentroid(twoByTwo) if err != nil { t.Errorf("Could not compute centroid, err=%v", err) } expected := matrix.MakeDenseMatrix([]float64{1.0, 1.0}, 1, 2) if !matrix.Equals(centr, expected) { t.Errorf("Incorrect centroid: was %v, should have been %v", expected, centr) } twoByTwo.Set(0, 0, 3.0) expected.Set(0, 0, 2.0) centr, err = ComputeCentroid(twoByTwo) if err != nil { t.Errorf("Could not compute centroid, err=%v", err) } if !matrix.Equals(centr, expected) { t.Errorf("Incorrect centroid: was %v, should have been %v", expected, centr) } }
func TestManhattanDist(t *testing.T) { var md ManhattanDist rows := 1 columns := 2 a := matrix.MakeDenseMatrix([]float64{4.6, 9.5}, rows, columns) b := matrix.MakeDenseMatrix([]float64{3.0, 4.1}, rows, columns) calcMd, err := md.CalcDist(a, b) if err != nil { t.Errorf("ManhattandDist: returned an error. err=%v", err) } // 1.6 + 5.4 = 7.0 if calcMd != float64(7.0) { t.Errorf("ManhattanDist: should be 7.0, but returned %f", calcMd) } }
func TestGetBoundaries(t *testing.T) { // first test with single point rows := 1 columns := 2 a := matrix.MakeDenseMatrix([]float64{4.6, 9.5}, rows, columns) xmin, xmax, ymin, ymax := GetBoundaries(a) if xmin != 4.6 || xmax != 4.6 || ymin != 9.5 || ymax != 9.5 { t.Errorf("GetBoundaries failed on single item matrix") } b := matrix.MakeDenseMatrix([]float64{3.0, 4.1}, rows, columns) c, err := a.Stack(b) if err != nil { t.Errorf(err.Error()) } xmin, xmax, ymin, ymax = GetBoundaries(c) if xmin != 3.0 || xmax != 4.6 || ymin != 4.1 || ymax != 9.5 { t.Errorf("GetBoundaries failed on two item matrix") } }
func TestVariance(t *testing.T) { points := matrix.MakeDenseMatrix([]float64{1.2, 2.1, 3.3, 2.2, 4.1, 3.2, 5.3, 4.1}, 4, 2) centroid := matrix.MakeDenseMatrix([]float64{6.5, 4}, 1, 2) var ed matutil.EuclidDist v, err := variance(points, centroid, ed) if err != nil { t.Errorf("TestVaricance:variance returned error %v.", err) } //fmt.Printf("v=%f\n",v) if v != 8.452500 { t.Errorf("TestVariance: variance should be 8.452500 but received %f", v) } }
func TestEuclidDist(t *testing.T) { var ed EuclidDist rows := 1 columns := 2 centroid := matrix.MakeDenseMatrix([]float64{4.6, 9.5}, rows, columns) point := matrix.MakeDenseMatrix([]float64{3.0, 4.1}, rows, columns) calcEd, err := ed.CalcDist(centroid, point) if err != nil { t.Errorf("EuclidDist: returned an error. err=%v", err) } expectedEd := 5.632051 //expected value epsilon := .000001 na := math.Nextafter(expectedEd, expectedEd+1) diff := math.Abs(calcEd - na) if diff > epsilon { t.Errorf("EuclidDist: excpected %f but received %f. The difference %f exceeds epsilon %f", expectedEd, calcEd, diff, epsilon) } }
func TestKmeans(t *testing.T) { // datapoints, err := Load("./testSetSmall.txt") // if err != nil { // t.Errorf("Load returned: %v", err) // return // } var cc DataCentroids var ed EuclidDist datapoints := matrix.MakeDenseMatrix([]float64{2, 3, 3, 2, 3, 4, 4, 3, 8, 7, 9, 6, 9, 8, 10, 7, 3, 5}, 9, 2) centroids := cc.ChooseCentroids(datapoints, 2) model, err := kmeans(datapoints, centroids, ed) if err != nil { t.Errorf("Kmeans returned: %v", err) return } if len(model.Clusters) != 2 { t.Errorf("TestKemansp: expected 2 clusters and received %d.", len(model.Clusters)) } variances := make([]float64, 2) for i, clust := range model.Clusters { variances[i] = clust.Variance } // Insure that all points are referenced. pref := 0 for _, clust := range model.Clusters { pref += clust.Numpoints() } cpoints, _ := datapoints.GetSize() if pref != cpoints { t.Errorf("Points referenced in clusters=%d, should be %d\n", pref, cpoints) //fmt.Printf("%d: points=%v\n",i, clust.Points) //fmt.Printf("%d: centroid=%v\n", i, clust.Centroid) //fmt.Printf("%d: variance:%f\n\n", i, clust.Variance) } }
import ( "bufio" "fmt" "github.com/bobhancock/gomatrix/matrix" "math" "os" "testing" ) var DATAPOINTS = matrix.MakeDenseMatrix([]float64{3.0, 2.0, -3.0, 2.0, 0.355083, -3.376585, 1.852435, 3.547351, -2.078973, 2.552013, -0.993756, -0.884433, 2.682252, 4.007573, -3.087776, 2.878713, -1.565978, -1.256985, 2.441611, 0.444826, 10.29, 20.6594, 12.93, 23.3988, 120.1, 202.18}, 13, 2) var CENTROIDS = matrix.MakeDenseMatrix([]float64{4.5, 11.3, 6.1, 12.0, 12.1, 9.6}, 3, 2) var DATAPOINTS_D = matrix.MakeDenseMatrix([]float64{2, 3, 3, 2, 3, 4, 4, 3, 8, 7, 9, 6, 9, 8, 10, 7}, 8, 2) var CENTROIDS_D = matrix.MakeDenseMatrix([]float64{6, 7}, 1, 2) var DATAPOINTS_D0 = matrix.MakeDenseMatrix([]float64{2, 3, 3, 2, 3, 4, 4, 3}, 4, 2)
// Load loads a tab delimited text file of floats into a matrix. func Load(fname, sep string) (*matrix.DenseMatrix, error) { z := matrix.Zeros(1, 1) fp, err := os.Open(fname) if err != nil { return z, err } defer fp.Close() data := make([]float64, 0) cols := 0 r := bufio.NewReader(fp) linenum := 0 eof := false for !eof { var line string var buf []byte buf, _, err := r.ReadLine() line = string(buf) if err == io.EOF { err = nil eof = true break } else if err != nil { return z, errors.New(fmt.Sprintf("goxmean.Load: reading linenum %d: %v", linenum, err)) } l1 := strings.TrimRight(line, "\n") l := strings.Split(l1, sep) // If each line does not have the same number of columns then error if linenum == 0 { cols = len(l) } if len(l) != cols { return z, errors.New(fmt.Sprintf("Load(): linenum %d has %d columns. It should have %d columns.", linenum, len(line), cols)) } if len(l) < 2 { return z, errors.New(fmt.Sprintf("Load(): linenum %d has only %d elements", linenum, len(line))) } linenum++ // Convert the strings to float64 and build up the slice t by appending. t := make([]float64, 0) for _, v := range l { v = strings.TrimSpace(v) f, err := strconv.ParseFloat(v, 64) if err != nil { return z, errors.New(fmt.Sprintf("goxmeans.Load: cannot convert value %s to float64.", v)) } t = append(t, f) } data = append(data, t...) } mat := matrix.MakeDenseMatrix(data, linenum, cols) //fmt.Println(time.Now())n // flag for debugging return mat, nil }
// Load loads a tab delimited text file of floats into a slice. // Assume last column is the target. // For now, we limit ourselves to two columns func Load(fname string) (*matrix.DenseMatrix, error) { datamatrix := matrix.Zeros(1, 1) data := make([]float64, 2048) idx := 0 fp, err := os.Open(fname) if err != nil { return datamatrix, err } defer fp.Close() r := bufio.NewReader(fp) linenum := 1 eof := false for !eof { var line string var buf []byte // line, err := r.ReadString('\n') buf, _, err := r.ReadLine() line = string(buf) // fmt.Printf("linenum=%d buf=%v line=%v\n",linenum,buf, line) if err == io.EOF { err = nil eof = true break } else if err != nil { return datamatrix, errors.New(fmt.Sprintf("means.Load: reading linenum %d: %v", linenum, err)) } linenum++ l1 := strings.TrimRight(line, "\n") l := strings.Split(l1, "\t") if len(l) < 2 { return datamatrix, errors.New(fmt.Sprintf("means.Load: linenum %d has only %d elements", linenum, len(line))) } // for now assume 2 dimensions only f0, err := Atof64(string(l[0])) if err != nil { return datamatrix, errors.New(fmt.Sprintf("means.Load: cannot convert f0 %s to float64.", l[0])) } f1, err := Atof64(string(l[1])) if err != nil { return datamatrix, errors.New(fmt.Sprintf("means.Load: cannot convert f1 %s to float64.", l[1])) } if linenum >= len(data) { data = append(data, f0, f1) } else { data[idx] = f0 idx++ data[idx] = f1 idx++ } } numcols := 2 datamatrix = matrix.MakeDenseMatrix(data, linenum-1, numcols) return datamatrix, nil }