func TestSmote(t *testing.T) { smot := smote.New(PercentOver, K, 5) // Read samples. dataset := tabula.Claset{} _, e := dsv.SimpleRead(fcfg, &dataset) if nil != e { t.Fatal(e) } fmt.Println("[smote_test] Total samples:", dataset.Len()) minorset := dataset.GetMinorityRows() fmt.Println("[smote_test] # minority samples:", minorset.Len()) e = smot.Resampling(*minorset) if e != nil { t.Fatal(e) } fmt.Println("[smote_test] # synthetic:", smot.GetSynthetics().Len()) e = smot.Write("phoneme_smote.csv") if e != nil { t.Fatal(e) } }
func getSamples() (train, test tabula.ClasetInterface) { samples := tabula.Claset{} _, e := dsv.SimpleRead(SampleDsvFile, &samples) if nil != e { log.Fatal(e) } if !DoTest { return &samples, nil } ntrain := int(float32(samples.Len()) * (float32(NBootstrap) / 100.0)) bag, oob, _, _ := tabula.RandomPickRows(&samples, ntrain, false) train = bag.(tabula.ClasetInterface) test = oob.(tabula.ClasetInterface) train.SetClassIndex(samples.GetClassIndex()) test.SetClassIndex(samples.GetClassIndex()) return train, test }