예제 #1
0
파일: knn.go 프로젝트: nickpoorman/golearn
func (KNN *KNNClassifier) canUseOptimisations(what base.FixedDataGrid) bool {
	// Check that the two have exactly the same layout
	if !base.CheckStrictlyCompatible(what, KNN.TrainingData) {
		return false
	}
	// Check that the two are DenseInstances
	whatd, ok1 := what.(*base.DenseInstances)
	_, ok2 := KNN.TrainingData.(*base.DenseInstances)
	if !ok1 || !ok2 {
		return false
	}
	// Check that no Class Attributes are mixed in with the data
	classAttrs := whatd.AllClassAttributes()
	normalAttrs := base.NonClassAttributes(whatd)
	// Retrieve all the AGs
	ags := whatd.AllAttributeGroups()
	classAttrGroups := make([]base.AttributeGroup, 0)
	for agName := range ags {
		ag := ags[agName]
		attrs := ag.Attributes()
		matched := false
		for _, a := range attrs {
			for _, c := range classAttrs {
				if a.Equals(c) {
					matched = true
				}
			}
		}
		if matched {
			classAttrGroups = append(classAttrGroups, ag)
		}
	}
	for _, cag := range classAttrGroups {
		attrs := cag.Attributes()
		common := base.AttributeIntersect(normalAttrs, attrs)
		if len(common) != 0 {
			return false
		}
	}

	// Check that all of the Attributes are numeric
	for _, a := range normalAttrs {
		if _, ok := a.(*base.FloatAttribute); !ok {
			return false
		}
	}
	// If that's fine, return true
	return true
}
예제 #2
0
func TestBinaryFilter(t *testing.T) {

	Convey("Given a contrived dataset...", t, func() {

		// Read the contrived dataset
		inst, err := base.ParseCSVToInstances("./binary_test.csv", true)
		So(err, ShouldEqual, nil)

		// Add Attributes to the filter
		bFilt := NewBinaryConvertFilter()
		bAttrs := base.NonClassAttributes(inst)
		for _, a := range bAttrs {
			bFilt.AddAttribute(a)
		}
		bFilt.Train()

		// Construct a LazilyFilteredInstances to handle it
		instF := base.NewLazilyFilteredInstances(inst, bFilt)

		Convey("All the non-class Attributes should be binary...", func() {
			// Check that all the Attributes are the right type
			for _, a := range base.NonClassAttributes(instF) {
				_, ok := a.(*base.BinaryAttribute)
				So(ok, ShouldEqual, true)
			}
		})

		// Check that all the class Attributes made it
		Convey("All the class Attributes should have survived...", func() {
			origClassAttrs := inst.AllClassAttributes()
			newClassAttrs := instF.AllClassAttributes()
			intersectClassAttrs := base.AttributeIntersect(origClassAttrs, newClassAttrs)
			So(len(intersectClassAttrs), ShouldEqual, len(origClassAttrs))
		})
		// Check that the Attributes have the right names
		Convey("Attribute names should be correct...", func() {
			origNames := []string{"floatAttr", "shouldBe1Binary",
				"shouldBe3Binary_stoicism", "shouldBe3Binary_heroism",
				"shouldBe3Binary_romanticism", "arbitraryClass"}
			origMap := make(map[string]bool)
			for _, a := range origNames {
				origMap[a] = false
			}
			for _, a := range instF.AllAttributes() {
				name := a.GetName()
				_, ok := origMap[name]
				if !ok {
					t.Error(fmt.Sprintf("Weird: %s", name))
				}
				origMap[name] = true
			}
			for a := range origMap {
				So(origMap[a], ShouldEqual, true)
			}
		})

		// Check that the Attributes have been discretised correctly
		Convey("Discretisation should have worked", func() {
			// Build Attribute map
			attrMap := make(map[string]base.Attribute)
			for _, a := range instF.AllAttributes() {
				attrMap[a.GetName()] = a
			}
			// For each attribute
			for name := range attrMap {
				attr := attrMap[name]
				// Retrieve AttributeSpec
				as, err := instF.GetAttribute(attr)
				So(err, ShouldEqual, nil)
				if name == "floatAttr" {
					So(instF.Get(as, 0), ShouldResemble, []byte{1})
					So(instF.Get(as, 1), ShouldResemble, []byte{1})
					So(instF.Get(as, 2), ShouldResemble, []byte{0})
				} else if name == "shouldBe1Binary" {
					So(instF.Get(as, 0), ShouldResemble, []byte{0})
					So(instF.Get(as, 1), ShouldResemble, []byte{1})
					So(instF.Get(as, 2), ShouldResemble, []byte{1})
				} else if name == "shouldBe3Binary_stoicism" {
					So(instF.Get(as, 0), ShouldResemble, []byte{1})
					So(instF.Get(as, 1), ShouldResemble, []byte{0})
					So(instF.Get(as, 2), ShouldResemble, []byte{0})
				} else if name == "shouldBe3Binary_heroism" {
					So(instF.Get(as, 0), ShouldResemble, []byte{0})
					So(instF.Get(as, 1), ShouldResemble, []byte{1})
					So(instF.Get(as, 2), ShouldResemble, []byte{0})
				} else if name == "shouldBe3Binary_romanticism" {
					So(instF.Get(as, 0), ShouldResemble, []byte{0})
					So(instF.Get(as, 1), ShouldResemble, []byte{0})
					So(instF.Get(as, 2), ShouldResemble, []byte{1})
				} else if name == "arbitraryClass" {
				} else {
					t.Error("Shouldn't have %s", name)
				}

			}
		})

	})

}
예제 #3
0
func TestFloatFilter(t *testing.T) {

	Convey("Given a contrived dataset...", t, func() {

		// Read the contrived dataset
		inst, err := base.ParseCSVToInstances("./binary_test.csv", true)
		So(err, ShouldEqual, nil)

		// Add Attributes to the filter
		bFilt := NewFloatConvertFilter()
		bAttrs := base.NonClassAttributes(inst)
		for _, a := range bAttrs {
			bFilt.AddAttribute(a)
		}
		bFilt.Train()

		// Construct a LazilyFilteredInstances to handle it
		instF := base.NewLazilyFilteredInstances(inst, bFilt)

		Convey("All the non-class Attributes should be floats...", func() {
			// Check that all the Attributes are the right type
			for _, a := range base.NonClassAttributes(instF) {
				_, ok := a.(*base.FloatAttribute)
				So(ok, ShouldEqual, true)
			}
		})

		// Check that all the class Attributes made it
		Convey("All the class Attributes should have survived...", func() {
			origClassAttrs := inst.AllClassAttributes()
			newClassAttrs := instF.AllClassAttributes()
			intersectClassAttrs := base.AttributeIntersect(origClassAttrs, newClassAttrs)
			So(len(intersectClassAttrs), ShouldEqual, len(origClassAttrs))
		})
		// Check that the Attributes have the right names
		Convey("Attribute names should be correct...", func() {
			origNames := []string{"floatAttr", "shouldBe1Binary",
				"shouldBe3Binary_stoicism", "shouldBe3Binary_heroism",
				"shouldBe3Binary_romanticism", "arbitraryClass"}
			origMap := make(map[string]bool)
			for _, a := range origNames {
				origMap[a] = false
			}
			for _, a := range instF.AllAttributes() {
				name := a.GetName()
				_, ok := origMap[name]
				So(ok, ShouldBeTrue)

				origMap[name] = true
			}
			for a := range origMap {
				So(origMap[a], ShouldEqual, true)
			}
		})

		Convey("All Attributes should be the correct type...", func() {
			for _, a := range instF.AllAttributes() {
				if a.GetName() == "arbitraryClass" {
					_, ok := a.(*base.CategoricalAttribute)
					So(ok, ShouldEqual, true)
				} else {
					_, ok := a.(*base.FloatAttribute)
					So(ok, ShouldEqual, true)
				}
			}
		})

		// Check that the Attributes have been discretised correctly
		Convey("FloatConversion should have worked", func() {
			// Build Attribute map
			attrMap := make(map[string]base.Attribute)
			for _, a := range instF.AllAttributes() {
				attrMap[a.GetName()] = a
			}
			// For each attribute
			for name := range attrMap {
				So(name, ShouldBeIn, []string{
					"floatAttr",
					"shouldBe1Binary",
					"shouldBe3Binary_stoicism",
					"shouldBe3Binary_heroism",
					"shouldBe3Binary_romanticism",
					"arbitraryClass",
				})

				attr := attrMap[name]
				as, err := instF.GetAttribute(attr)
				So(err, ShouldEqual, nil)

				if name == "floatAttr" {
					So(instF.Get(as, 0), ShouldResemble, base.PackFloatToBytes(1.0))
					So(instF.Get(as, 1), ShouldResemble, base.PackFloatToBytes(1.0))
					So(instF.Get(as, 2), ShouldResemble, base.PackFloatToBytes(0.0))
				} else if name == "shouldBe1Binary" {
					So(instF.Get(as, 0), ShouldResemble, base.PackFloatToBytes(0.0))
					So(instF.Get(as, 1), ShouldResemble, base.PackFloatToBytes(1.0))
					So(instF.Get(as, 2), ShouldResemble, base.PackFloatToBytes(1.0))
				} else if name == "shouldBe3Binary_stoicism" {
					So(instF.Get(as, 0), ShouldResemble, base.PackFloatToBytes(1.0))
					So(instF.Get(as, 1), ShouldResemble, base.PackFloatToBytes(0.0))
					So(instF.Get(as, 2), ShouldResemble, base.PackFloatToBytes(0.0))
				} else if name == "shouldBe3Binary_heroism" {
					So(instF.Get(as, 0), ShouldResemble, base.PackFloatToBytes(0.0))
					So(instF.Get(as, 1), ShouldResemble, base.PackFloatToBytes(1.0))
					So(instF.Get(as, 2), ShouldResemble, base.PackFloatToBytes(0.0))
				} else if name == "shouldBe3Binary_romanticism" {
					So(instF.Get(as, 0), ShouldResemble, base.PackFloatToBytes(0.0))
					So(instF.Get(as, 1), ShouldResemble, base.PackFloatToBytes(0.0))
					So(instF.Get(as, 2), ShouldResemble, base.PackFloatToBytes(1.0))
				} else if name == "arbitraryClass" {
				}
			}
		})
	})
}
예제 #4
0
func (KNN *KNNClassifier) optimisedEuclideanPredict(d *base.DenseInstances) base.FixedDataGrid {

	// Create return vector
	ret := base.GeneratePredictionVector(d)
	// Type-assert training data
	tr := KNN.TrainingData.(*base.DenseInstances)
	// Enumeration of AttributeGroups
	agPos := make(map[string]int)
	agTrain := tr.AllAttributeGroups()
	agPred := d.AllAttributeGroups()
	classAttrs := tr.AllClassAttributes()
	counter := 0
	for ag := range agTrain {
		// Detect whether the AttributeGroup has any classes in it
		attrs := agTrain[ag].Attributes()
		//matched := false
		if len(base.AttributeIntersect(classAttrs, attrs)) == 0 {
			agPos[ag] = counter
		}
		counter++
	}
	// Pointers to the start of each prediction row
	rowPointers := make([]*C.double, len(agPred))
	trainPointers := make([]*C.double, len(agPred))
	rowSizes := make([]int, len(agPred))
	for ag := range agPred {
		if ap, ok := agPos[ag]; ok {

			rowPointers[ap] = (*C.double)(unsafe.Pointer(&(agPred[ag].Storage()[0])))
			trainPointers[ap] = (*C.double)(unsafe.Pointer(&(agTrain[ag].Storage()[0])))
			rowSizes[ap] = agPred[ag].RowSizeInBytes() / 8
		}
	}
	_, predRows := d.Size()
	_, trainRows := tr.Size()
	// Crete the distance vector
	distanceVec := distanceRecs(make([]_Ctype_struct_dist, trainRows))
	// Additional datastructures
	voteVec := make([]int, KNN.NearestNeighbours)
	maxMap := make(map[string]int)

	for row := 0; row < predRows; row++ {
		for i := 0; i < trainRows; i++ {
			distanceVec[i].dist = 0
		}
		for ag := range agPred {
			if ap, ok := agPos[ag]; ok {
				C.euclidean_distance(
					&(distanceVec[0]),
					C.int(trainRows),
					C.int(len(agPred[ag].Attributes())),
					C.int(row),
					trainPointers[ap],
					rowPointers[ap],
				)
			}
		}
		sort.Sort(distanceVec)
		votes := distanceVec[:KNN.NearestNeighbours]
		for i, v := range votes {
			voteVec[i] = int(v.p)
		}
		maxClass := KNN.vote(maxMap, voteVec)
		base.SetClass(ret, row, maxClass)
	}
	return ret
}