func TestSmote(t *testing.T) { smot := smote.New(PercentOver, K, 5) // Read samples. dataset := tabula.Claset{} _, e := dsv.SimpleRead(fcfg, &dataset) if nil != e { t.Fatal(e) } fmt.Println("[smote_test] Total samples:", dataset.Len()) minorset := dataset.GetMinorityRows() fmt.Println("[smote_test] # minority samples:", minorset.Len()) e = smot.Resampling(*minorset) if e != nil { t.Fatal(e) } fmt.Println("[smote_test] # synthetic:", smot.GetSynthetics().Len()) e = smot.Write("phoneme_smote.csv") if e != nil { t.Fatal(e) } }
func TestCART(t *testing.T) { fds := "../../testdata/iris/iris.dsv" ds := tabula.Claset{} _, e := dsv.SimpleRead(fds, &ds) if nil != e { t.Fatal(e) } fmt.Println("[cart_test] class index:", ds.GetClassIndex()) // copy target to be compared later. targetv := ds.GetClassAsStrings() assert(t, NRows, ds.GetNRow(), true) // Build CART tree. CART, e := cart.New(&ds, cart.SplitMethodGini, 0) if e != nil { t.Fatal(e) } fmt.Println("[cart_test] CART Tree:\n", CART) // Create test set testset := tabula.Claset{} _, e = dsv.SimpleRead(fds, &testset) if nil != e { t.Fatal(e) } testset.GetClassColumn().ClearValues() // Classifiy test set e = CART.ClassifySet(&testset) if nil != e { t.Fatal(e) } assert(t, targetv, testset.GetClassAsStrings(), true) }
func test() { testset := tabula.Claset{} _, e := dsv.SimpleRead(testCfg, &testset) if e != nil { panic(e) } predicts, _, probs := forest.ClassifySet(&testset, nil) forest.Performance(&testset, predicts, probs) e = forest.WritePerformance() if e != nil { panic(e) } }
func TestComputeEuclidianDistance(t *testing.T) { var exp = []string{ `[0.302891 0.608544 0.47413 1.42718 -0.811085 1]`, `[0.243474 0.505146 0.472892 1.34802 -0.844252 1]` + `[0.202343 0.485983 0.527533 1.47307 -0.809672 1]` + `[0.215496 0.523418 0.51719 1.43548 -0.933981 1]` + `[0.214331 0.546086 0.414773 1.38542 -0.702336 1]` + `[0.301676 0.554505 0.594757 1.21258 -0.873084 1]`, } var expDistances = "[0.5257185558832786" + " 0.5690474496911485" + " 0.5888777462258191" + " 0.6007362149895741" + " 0.672666336306493]" // Reading data dataset := tabula.Dataset{} _, e := dsv.SimpleRead("../testdata/phoneme/phoneme.dsv", &dataset) if nil != e { return } // Processing knnIn := knn.Runtime{ DistanceMethod: knn.TEuclidianDistance, ClassIndex: 5, K: 5, } classes := dataset.GetRows().GroupByValue(knnIn.ClassIndex) _, minoritySet := classes.GetMinority() kneighbors := knnIn.FindNeighbors(&minoritySet, minoritySet[0]) var got string rows := kneighbors.Rows() for _, row := range *rows { got += fmt.Sprint(*row) } assert(t, exp[1], got, true) distances := kneighbors.Distances() got = fmt.Sprint(*distances) assert(t, expDistances, got, true) }
func TestLNSmote(t *testing.T) { // Read sample dataset. dataset := tabula.Claset{} _, e := dsv.SimpleRead(fcfg, &dataset) if nil != e { t.Fatal(e) } fmt.Println("[lnsmote_test] Total samples:", dataset.GetNRow()) // Write original samples. writer, e := dsv.NewWriter("") if nil != e { t.Fatal(e) } e = writer.OpenOutput("phoneme_lnsmote.csv") if e != nil { t.Fatal(e) } sep := dsv.DefSeparator _, e = writer.WriteRawRows(dataset.GetRows(), &sep) if e != nil { t.Fatal(e) } // Initialize LN-SMOTE. lnsmoteRun := lnsmote.New(100, 5, 5, "1", "lnsmote.outliers") e = lnsmoteRun.Resampling(&dataset) fmt.Println("[lnsmote_test] # synthetic:", lnsmoteRun.Synthetics.Len()) sep = dsv.DefSeparator _, e = writer.WriteRawRows(lnsmoteRun.Synthetics.GetRows(), &sep) if e != nil { t.Fatal(e) } e = writer.Close() if e != nil { t.Fatal(e) } }
func train() { e := createRandomForest() if e != nil { panic(e) } trainset := tabula.Claset{} _, e = dsv.SimpleRead(trainCfg, &trainset) if e != nil { panic(e) } e = forest.Build(&trainset) if e != nil { panic(e) } }
func TestSimpleReadWrite(t *testing.T) { fcfg := "testdata/config_simpleread.dsv" reader, e := dsv.SimpleRead(fcfg, nil) if e != nil { t.Fatal(e) } fout := "testdata/output.dat" fexp := "testdata/expected.dat" _, e = dsv.SimpleWrite(reader, fcfg) if e != nil { t.Fatal(e) } assertFile(t, fexp, fout, true) }
func main() { defer un(trace("smote")) flag.Parse() if len(flag.Args()) <= 0 { usage() os.Exit(1) } fcfg := flag.Arg(0) // Parsing config file and parameter. smote, e := createSmote(fcfg) if e != nil { panic(e) } // Get dataset. dataset := tabula.Claset{} _, e = dsv.SimpleRead(fcfg, &dataset) if e != nil { panic(e) } fmt.Println("[smote] Dataset:", &dataset) row := dataset.GetRow(0) fmt.Println("[smote] sample:", row) e = runSmote(smote, &dataset) if e != nil { panic(e) } if !merge { return } e = runMerge(smote, &dataset) if e != nil { panic(e) } }
func test() { testset := tabula.Claset{} _, e := dsv.SimpleRead(testCfg, &testset) if e != nil { panic(e) } fmt.Println(tag, "Test set:", &testset) fmt.Println(tag, "Sample test set:", testset.GetRow(0)) predicts, cm, probs := crforest.ClassifySetByWeight(&testset, nil) fmt.Println("[crf] Test set CM:", cm) crforest.Performance(&testset, predicts, probs) e = crforest.WritePerformance() if e != nil { panic(e) } }
func main() { defer un(trace("Unified PAN-WVC-2011")) dataset := tabula.Dataset{} readset, e := dsv.SimpleRead(fEditsDsv, &dataset) if e != nil { panic(e) } fmt.Printf(">>> merging %d rows\n", dataset.GetNRow()) fmt.Println(">>> diffing ...") doDiff(readset, &dataset) fmt.Println(">>> writing ...") n, e := dsv.SimpleWrite(readset, fEditsDsv) if e != nil { panic(e) } fmt.Printf(">>> writing %d rows\n", n) }
func main() { defer un(trace("cart")) flag.Parse() if len(flag.Args()) <= 0 { usage() os.Exit(1) } fcfg := flag.Arg(0) // Parsing config file and check command parameter values. cartrt, e := createCart(fcfg) if e != nil { panic(e) } // Get dataset dataset := tabula.Claset{} _, e = dsv.SimpleRead(fcfg, &dataset) if e != nil { panic(e) } if DEBUG >= 1 { fmt.Printf("[cart] Class index: %v\n", dataset.GetClassIndex()) } e = cartrt.Build(&dataset) if e != nil { panic(e) } if DEBUG >= 1 { fmt.Println("[cart] CART tree:\n", cartrt) } }
func getSamples() (train, test tabula.ClasetInterface) { samples := tabula.Claset{} _, e := dsv.SimpleRead(SampleDsvFile, &samples) if nil != e { log.Fatal(e) } if !DoTest { return &samples, nil } ntrain := int(float32(samples.Len()) * (float32(NBootstrap) / 100.0)) bag, oob, _, _ := tabula.RandomPickRows(&samples, ntrain, false) train = bag.(tabula.ClasetInterface) test = oob.(tabula.ClasetInterface) train.SetClassIndex(samples.GetClassIndex()) test.SetClassIndex(samples.GetClassIndex()) return train, test }