func (KNN *KNNClassifier) canUseOptimisations(what base.FixedDataGrid) bool { // Check that the two have exactly the same layout if !base.CheckStrictlyCompatible(what, KNN.TrainingData) { return false } // Check that the two are DenseInstances whatd, ok1 := what.(*base.DenseInstances) _, ok2 := KNN.TrainingData.(*base.DenseInstances) if !ok1 || !ok2 { return false } // Check that no Class Attributes are mixed in with the data classAttrs := whatd.AllClassAttributes() normalAttrs := base.NonClassAttributes(whatd) // Retrieve all the AGs ags := whatd.AllAttributeGroups() classAttrGroups := make([]base.AttributeGroup, 0) for agName := range ags { ag := ags[agName] attrs := ag.Attributes() matched := false for _, a := range attrs { for _, c := range classAttrs { if a.Equals(c) { matched = true } } } if matched { classAttrGroups = append(classAttrGroups, ag) } } for _, cag := range classAttrGroups { attrs := cag.Attributes() common := base.AttributeIntersect(normalAttrs, attrs) if len(common) != 0 { return false } } // Check that all of the Attributes are numeric for _, a := range normalAttrs { if _, ok := a.(*base.FloatAttribute); !ok { return false } } // If that's fine, return true return true }
func TestBinaryFilter(t *testing.T) { Convey("Given a contrived dataset...", t, func() { // Read the contrived dataset inst, err := base.ParseCSVToInstances("./binary_test.csv", true) So(err, ShouldEqual, nil) // Add Attributes to the filter bFilt := NewBinaryConvertFilter() bAttrs := base.NonClassAttributes(inst) for _, a := range bAttrs { bFilt.AddAttribute(a) } bFilt.Train() // Construct a LazilyFilteredInstances to handle it instF := base.NewLazilyFilteredInstances(inst, bFilt) Convey("All the non-class Attributes should be binary...", func() { // Check that all the Attributes are the right type for _, a := range base.NonClassAttributes(instF) { _, ok := a.(*base.BinaryAttribute) So(ok, ShouldEqual, true) } }) // Check that all the class Attributes made it Convey("All the class Attributes should have survived...", func() { origClassAttrs := inst.AllClassAttributes() newClassAttrs := instF.AllClassAttributes() intersectClassAttrs := base.AttributeIntersect(origClassAttrs, newClassAttrs) So(len(intersectClassAttrs), ShouldEqual, len(origClassAttrs)) }) // Check that the Attributes have the right names Convey("Attribute names should be correct...", func() { origNames := []string{"floatAttr", "shouldBe1Binary", "shouldBe3Binary_stoicism", "shouldBe3Binary_heroism", "shouldBe3Binary_romanticism", "arbitraryClass"} origMap := make(map[string]bool) for _, a := range origNames { origMap[a] = false } for _, a := range instF.AllAttributes() { name := a.GetName() _, ok := origMap[name] if !ok { t.Error(fmt.Sprintf("Weird: %s", name)) } origMap[name] = true } for a := range origMap { So(origMap[a], ShouldEqual, true) } }) // Check that the Attributes have been discretised correctly Convey("Discretisation should have worked", func() { // Build Attribute map attrMap := make(map[string]base.Attribute) for _, a := range instF.AllAttributes() { attrMap[a.GetName()] = a } // For each attribute for name := range attrMap { attr := attrMap[name] // Retrieve AttributeSpec as, err := instF.GetAttribute(attr) So(err, ShouldEqual, nil) if name == "floatAttr" { So(instF.Get(as, 0), ShouldResemble, []byte{1}) So(instF.Get(as, 1), ShouldResemble, []byte{1}) So(instF.Get(as, 2), ShouldResemble, []byte{0}) } else if name == "shouldBe1Binary" { So(instF.Get(as, 0), ShouldResemble, []byte{0}) So(instF.Get(as, 1), ShouldResemble, []byte{1}) So(instF.Get(as, 2), ShouldResemble, []byte{1}) } else if name == "shouldBe3Binary_stoicism" { So(instF.Get(as, 0), ShouldResemble, []byte{1}) So(instF.Get(as, 1), ShouldResemble, []byte{0}) So(instF.Get(as, 2), ShouldResemble, []byte{0}) } else if name == "shouldBe3Binary_heroism" { So(instF.Get(as, 0), ShouldResemble, []byte{0}) So(instF.Get(as, 1), ShouldResemble, []byte{1}) So(instF.Get(as, 2), ShouldResemble, []byte{0}) } else if name == "shouldBe3Binary_romanticism" { So(instF.Get(as, 0), ShouldResemble, []byte{0}) So(instF.Get(as, 1), ShouldResemble, []byte{0}) So(instF.Get(as, 2), ShouldResemble, []byte{1}) } else if name == "arbitraryClass" { } else { t.Error("Shouldn't have %s", name) } } }) }) }
func TestFloatFilter(t *testing.T) { Convey("Given a contrived dataset...", t, func() { // Read the contrived dataset inst, err := base.ParseCSVToInstances("./binary_test.csv", true) So(err, ShouldEqual, nil) // Add Attributes to the filter bFilt := NewFloatConvertFilter() bAttrs := base.NonClassAttributes(inst) for _, a := range bAttrs { bFilt.AddAttribute(a) } bFilt.Train() // Construct a LazilyFilteredInstances to handle it instF := base.NewLazilyFilteredInstances(inst, bFilt) Convey("All the non-class Attributes should be floats...", func() { // Check that all the Attributes are the right type for _, a := range base.NonClassAttributes(instF) { _, ok := a.(*base.FloatAttribute) So(ok, ShouldEqual, true) } }) // Check that all the class Attributes made it Convey("All the class Attributes should have survived...", func() { origClassAttrs := inst.AllClassAttributes() newClassAttrs := instF.AllClassAttributes() intersectClassAttrs := base.AttributeIntersect(origClassAttrs, newClassAttrs) So(len(intersectClassAttrs), ShouldEqual, len(origClassAttrs)) }) // Check that the Attributes have the right names Convey("Attribute names should be correct...", func() { origNames := []string{"floatAttr", "shouldBe1Binary", "shouldBe3Binary_stoicism", "shouldBe3Binary_heroism", "shouldBe3Binary_romanticism", "arbitraryClass"} origMap := make(map[string]bool) for _, a := range origNames { origMap[a] = false } for _, a := range instF.AllAttributes() { name := a.GetName() _, ok := origMap[name] So(ok, ShouldBeTrue) origMap[name] = true } for a := range origMap { So(origMap[a], ShouldEqual, true) } }) Convey("All Attributes should be the correct type...", func() { for _, a := range instF.AllAttributes() { if a.GetName() == "arbitraryClass" { _, ok := a.(*base.CategoricalAttribute) So(ok, ShouldEqual, true) } else { _, ok := a.(*base.FloatAttribute) So(ok, ShouldEqual, true) } } }) // Check that the Attributes have been discretised correctly Convey("FloatConversion should have worked", func() { // Build Attribute map attrMap := make(map[string]base.Attribute) for _, a := range instF.AllAttributes() { attrMap[a.GetName()] = a } // For each attribute for name := range attrMap { So(name, ShouldBeIn, []string{ "floatAttr", "shouldBe1Binary", "shouldBe3Binary_stoicism", "shouldBe3Binary_heroism", "shouldBe3Binary_romanticism", "arbitraryClass", }) attr := attrMap[name] as, err := instF.GetAttribute(attr) So(err, ShouldEqual, nil) if name == "floatAttr" { So(instF.Get(as, 0), ShouldResemble, base.PackFloatToBytes(1.0)) So(instF.Get(as, 1), ShouldResemble, base.PackFloatToBytes(1.0)) So(instF.Get(as, 2), ShouldResemble, base.PackFloatToBytes(0.0)) } else if name == "shouldBe1Binary" { So(instF.Get(as, 0), ShouldResemble, base.PackFloatToBytes(0.0)) So(instF.Get(as, 1), ShouldResemble, base.PackFloatToBytes(1.0)) So(instF.Get(as, 2), ShouldResemble, base.PackFloatToBytes(1.0)) } else if name == "shouldBe3Binary_stoicism" { So(instF.Get(as, 0), ShouldResemble, base.PackFloatToBytes(1.0)) So(instF.Get(as, 1), ShouldResemble, base.PackFloatToBytes(0.0)) So(instF.Get(as, 2), ShouldResemble, base.PackFloatToBytes(0.0)) } else if name == "shouldBe3Binary_heroism" { So(instF.Get(as, 0), ShouldResemble, base.PackFloatToBytes(0.0)) So(instF.Get(as, 1), ShouldResemble, base.PackFloatToBytes(1.0)) So(instF.Get(as, 2), ShouldResemble, base.PackFloatToBytes(0.0)) } else if name == "shouldBe3Binary_romanticism" { So(instF.Get(as, 0), ShouldResemble, base.PackFloatToBytes(0.0)) So(instF.Get(as, 1), ShouldResemble, base.PackFloatToBytes(0.0)) So(instF.Get(as, 2), ShouldResemble, base.PackFloatToBytes(1.0)) } else if name == "arbitraryClass" { } } }) }) }
func (KNN *KNNClassifier) optimisedEuclideanPredict(d *base.DenseInstances) base.FixedDataGrid { // Create return vector ret := base.GeneratePredictionVector(d) // Type-assert training data tr := KNN.TrainingData.(*base.DenseInstances) // Enumeration of AttributeGroups agPos := make(map[string]int) agTrain := tr.AllAttributeGroups() agPred := d.AllAttributeGroups() classAttrs := tr.AllClassAttributes() counter := 0 for ag := range agTrain { // Detect whether the AttributeGroup has any classes in it attrs := agTrain[ag].Attributes() //matched := false if len(base.AttributeIntersect(classAttrs, attrs)) == 0 { agPos[ag] = counter } counter++ } // Pointers to the start of each prediction row rowPointers := make([]*C.double, len(agPred)) trainPointers := make([]*C.double, len(agPred)) rowSizes := make([]int, len(agPred)) for ag := range agPred { if ap, ok := agPos[ag]; ok { rowPointers[ap] = (*C.double)(unsafe.Pointer(&(agPred[ag].Storage()[0]))) trainPointers[ap] = (*C.double)(unsafe.Pointer(&(agTrain[ag].Storage()[0]))) rowSizes[ap] = agPred[ag].RowSizeInBytes() / 8 } } _, predRows := d.Size() _, trainRows := tr.Size() // Crete the distance vector distanceVec := distanceRecs(make([]_Ctype_struct_dist, trainRows)) // Additional datastructures voteVec := make([]int, KNN.NearestNeighbours) maxMap := make(map[string]int) for row := 0; row < predRows; row++ { for i := 0; i < trainRows; i++ { distanceVec[i].dist = 0 } for ag := range agPred { if ap, ok := agPos[ag]; ok { C.euclidean_distance( &(distanceVec[0]), C.int(trainRows), C.int(len(agPred[ag].Attributes())), C.int(row), trainPointers[ap], rowPointers[ap], ) } } sort.Sort(distanceVec) votes := distanceVec[:KNN.NearestNeighbours] for i, v := range votes { voteVec[i] = int(v.p) } maxClass := KNN.vote(maxMap, voteVec) base.SetClass(ret, row, maxClass) } return ret }