func TestChiMergeDiscretization(t *testing.T) { Convey("Chi-Merge Discretization", t, func() { chimDatasetPath := "../examples/datasets/chim.csv" Convey(fmt.Sprintf("With the '%s' dataset", chimDatasetPath), func() { instances, err := base.ParseCSVToInstances(chimDatasetPath, true) So(err, ShouldBeNil) _, rows := instances.Size() frequencies := chiMerge(instances, instances.AllAttributes()[0], 0.9, 0, rows) values := []float64{} for _, entry := range frequencies { values = append(values, entry.Value) } Convey("Computes frequencies correctly", func() { So(values, ShouldResemble, []float64{1.3, 56.2, 87.1}) }) }) irisHeadersDatasetpath := "../examples/datasets/iris_headers.csv" Convey(fmt.Sprintf("With the '%s' dataset", irisHeadersDatasetpath), func() { instances, err := base.ParseCSVToInstances(irisHeadersDatasetpath, true) So(err, ShouldBeNil) Convey("Sorting the instances first", func() { allAttributes := instances.AllAttributes() sortedAttributesSpecs := base.ResolveAttributes(instances, allAttributes)[0:1] sortedInstances, err := base.Sort(instances, base.Ascending, sortedAttributesSpecs) So(err, ShouldBeNil) _, rows := sortedInstances.Size() frequencies := chiMerge(sortedInstances, sortedInstances.AllAttributes()[0], 0.9, 0, rows) values := []float64{} for _, entry := range frequencies { values = append(values, entry.Value) } Convey("Computes frequencies correctly", func() { So(values, ShouldResemble, []float64{4.3, 5.5, 5.8, 6.3, 7.1}) }) }) }) }) }
func TestChiMerge2(testEnv *testing.T) { // // See http://sci2s.ugr.es/keel/pdf/algorithm/congreso/1992-Kerber-ChimErge-AAAI92.pdf // Randy Kerber, ChiMerge: Discretisation of Numeric Attributes, 1992 inst, err := base.ParseCSVToInstances("../examples/datasets/iris_headers.csv", true) if err != nil { panic(err) } // Sort the instances allAttrs := inst.AllAttributes() sortAttrSpecs := base.ResolveAttributes(inst, allAttrs)[0:1] instSorted, err := base.Sort(inst, base.Ascending, sortAttrSpecs) if err != nil { panic(err) } // Perform Chi-Merge _, rows := inst.Size() freq := chiMerge(instSorted, allAttrs[0], 0.90, 0, rows) if len(freq) != 5 { testEnv.Errorf("Wrong length (%d)", len(freq)) testEnv.Error(freq) } if freq[0].Value != 4.3 { testEnv.Error(freq[0]) } if freq[1].Value != 5.5 { testEnv.Error(freq[1]) } if freq[2].Value != 5.8 { testEnv.Error(freq[2]) } if freq[3].Value != 6.3 { testEnv.Error(freq[3]) } if freq[4].Value != 7.1 { testEnv.Error(freq[4]) } }