Exemplo n.º 1
0
func TestSmote(t *testing.T) {
	smot := smote.New(PercentOver, K, 5)

	// Read samples.
	dataset := tabula.Claset{}

	_, e := dsv.SimpleRead(fcfg, &dataset)
	if nil != e {
		t.Fatal(e)
	}

	fmt.Println("[smote_test] Total samples:", dataset.Len())

	minorset := dataset.GetMinorityRows()

	fmt.Println("[smote_test] # minority samples:", minorset.Len())

	e = smot.Resampling(*minorset)
	if e != nil {
		t.Fatal(e)
	}

	fmt.Println("[smote_test] # synthetic:", smot.GetSynthetics().Len())

	e = smot.Write("phoneme_smote.csv")
	if e != nil {
		t.Fatal(e)
	}
}
Exemplo n.º 2
0
func TestCART(t *testing.T) {
	fds := "../../testdata/iris/iris.dsv"

	ds := tabula.Claset{}

	_, e := dsv.SimpleRead(fds, &ds)
	if nil != e {
		t.Fatal(e)
	}

	fmt.Println("[cart_test] class index:", ds.GetClassIndex())

	// copy target to be compared later.
	targetv := ds.GetClassAsStrings()

	assert(t, NRows, ds.GetNRow(), true)

	// Build CART tree.
	CART, e := cart.New(&ds, cart.SplitMethodGini, 0)
	if e != nil {
		t.Fatal(e)
	}

	fmt.Println("[cart_test] CART Tree:\n", CART)

	// Create test set
	testset := tabula.Claset{}
	_, e = dsv.SimpleRead(fds, &testset)

	if nil != e {
		t.Fatal(e)
	}

	testset.GetClassColumn().ClearValues()

	// Classifiy test set
	e = CART.ClassifySet(&testset)
	if nil != e {
		t.Fatal(e)
	}

	assert(t, targetv, testset.GetClassAsStrings(), true)
}
Exemplo n.º 3
0
func test() {
	testset := tabula.Claset{}
	_, e := dsv.SimpleRead(testCfg, &testset)
	if e != nil {
		panic(e)
	}

	predicts, _, probs := forest.ClassifySet(&testset, nil)

	forest.Performance(&testset, predicts, probs)

	e = forest.WritePerformance()
	if e != nil {
		panic(e)
	}
}
Exemplo n.º 4
0
func TestComputeEuclidianDistance(t *testing.T) {
	var exp = []string{
		`[0.302891 0.608544 0.47413 1.42718 -0.811085 1]`,
		`[0.243474 0.505146 0.472892 1.34802 -0.844252 1]` +
			`[0.202343 0.485983 0.527533 1.47307 -0.809672 1]` +
			`[0.215496 0.523418 0.51719 1.43548 -0.933981 1]` +
			`[0.214331 0.546086 0.414773 1.38542 -0.702336 1]` +
			`[0.301676 0.554505 0.594757 1.21258 -0.873084 1]`,
	}
	var expDistances = "[0.5257185558832786" +
		" 0.5690474496911485" +
		" 0.5888777462258191" +
		" 0.6007362149895741" +
		" 0.672666336306493]"

	// Reading data
	dataset := tabula.Dataset{}
	_, e := dsv.SimpleRead("../testdata/phoneme/phoneme.dsv", &dataset)
	if nil != e {
		return
	}

	// Processing
	knnIn := knn.Runtime{
		DistanceMethod: knn.TEuclidianDistance,
		ClassIndex:     5,
		K:              5,
	}

	classes := dataset.GetRows().GroupByValue(knnIn.ClassIndex)

	_, minoritySet := classes.GetMinority()

	kneighbors := knnIn.FindNeighbors(&minoritySet, minoritySet[0])

	var got string
	rows := kneighbors.Rows()
	for _, row := range *rows {
		got += fmt.Sprint(*row)
	}

	assert(t, exp[1], got, true)

	distances := kneighbors.Distances()
	got = fmt.Sprint(*distances)
	assert(t, expDistances, got, true)
}
Exemplo n.º 5
0
func TestLNSmote(t *testing.T) {
	// Read sample dataset.
	dataset := tabula.Claset{}
	_, e := dsv.SimpleRead(fcfg, &dataset)
	if nil != e {
		t.Fatal(e)
	}

	fmt.Println("[lnsmote_test] Total samples:", dataset.GetNRow())

	// Write original samples.
	writer, e := dsv.NewWriter("")

	if nil != e {
		t.Fatal(e)
	}

	e = writer.OpenOutput("phoneme_lnsmote.csv")
	if e != nil {
		t.Fatal(e)
	}

	sep := dsv.DefSeparator
	_, e = writer.WriteRawRows(dataset.GetRows(), &sep)
	if e != nil {
		t.Fatal(e)
	}

	// Initialize LN-SMOTE.
	lnsmoteRun := lnsmote.New(100, 5, 5, "1", "lnsmote.outliers")

	e = lnsmoteRun.Resampling(&dataset)

	fmt.Println("[lnsmote_test] # synthetic:", lnsmoteRun.Synthetics.Len())

	sep = dsv.DefSeparator
	_, e = writer.WriteRawRows(lnsmoteRun.Synthetics.GetRows(), &sep)
	if e != nil {
		t.Fatal(e)
	}

	e = writer.Close()
	if e != nil {
		t.Fatal(e)
	}
}
Exemplo n.º 6
0
func train() {
	e := createRandomForest()
	if e != nil {
		panic(e)
	}

	trainset := tabula.Claset{}

	_, e = dsv.SimpleRead(trainCfg, &trainset)
	if e != nil {
		panic(e)
	}

	e = forest.Build(&trainset)
	if e != nil {
		panic(e)
	}
}
Exemplo n.º 7
0
func TestSimpleReadWrite(t *testing.T) {
	fcfg := "testdata/config_simpleread.dsv"

	reader, e := dsv.SimpleRead(fcfg, nil)
	if e != nil {
		t.Fatal(e)
	}

	fout := "testdata/output.dat"
	fexp := "testdata/expected.dat"

	_, e = dsv.SimpleWrite(reader, fcfg)
	if e != nil {
		t.Fatal(e)
	}

	assertFile(t, fexp, fout, true)
}
Exemplo n.º 8
0
func main() {
	defer un(trace("smote"))

	flag.Parse()

	if len(flag.Args()) <= 0 {
		usage()
		os.Exit(1)
	}

	fcfg := flag.Arg(0)

	// Parsing config file and parameter.
	smote, e := createSmote(fcfg)
	if e != nil {
		panic(e)
	}

	// Get dataset.
	dataset := tabula.Claset{}
	_, e = dsv.SimpleRead(fcfg, &dataset)
	if e != nil {
		panic(e)
	}

	fmt.Println("[smote] Dataset:", &dataset)

	row := dataset.GetRow(0)
	fmt.Println("[smote] sample:", row)

	e = runSmote(smote, &dataset)
	if e != nil {
		panic(e)
	}

	if !merge {
		return
	}

	e = runMerge(smote, &dataset)
	if e != nil {
		panic(e)
	}
}
Exemplo n.º 9
0
func test() {
	testset := tabula.Claset{}
	_, e := dsv.SimpleRead(testCfg, &testset)
	if e != nil {
		panic(e)
	}

	fmt.Println(tag, "Test set:", &testset)
	fmt.Println(tag, "Sample test set:", testset.GetRow(0))

	predicts, cm, probs := crforest.ClassifySetByWeight(&testset, nil)

	fmt.Println("[crf] Test set CM:", cm)

	crforest.Performance(&testset, predicts, probs)

	e = crforest.WritePerformance()
	if e != nil {
		panic(e)
	}
}
Exemplo n.º 10
0
func main() {
	defer un(trace("Unified PAN-WVC-2011"))

	dataset := tabula.Dataset{}

	readset, e := dsv.SimpleRead(fEditsDsv, &dataset)
	if e != nil {
		panic(e)
	}
	fmt.Printf(">>> merging %d rows\n", dataset.GetNRow())

	fmt.Println(">>> diffing ...")
	doDiff(readset, &dataset)

	fmt.Println(">>> writing ...")
	n, e := dsv.SimpleWrite(readset, fEditsDsv)
	if e != nil {
		panic(e)
	}
	fmt.Printf(">>> writing %d rows\n", n)
}
Exemplo n.º 11
0
func main() {
	defer un(trace("cart"))

	flag.Parse()

	if len(flag.Args()) <= 0 {
		usage()
		os.Exit(1)
	}

	fcfg := flag.Arg(0)

	// Parsing config file and check command parameter values.
	cartrt, e := createCart(fcfg)
	if e != nil {
		panic(e)
	}

	// Get dataset
	dataset := tabula.Claset{}
	_, e = dsv.SimpleRead(fcfg, &dataset)
	if e != nil {
		panic(e)
	}

	if DEBUG >= 1 {
		fmt.Printf("[cart] Class index: %v\n", dataset.GetClassIndex())
	}

	e = cartrt.Build(&dataset)
	if e != nil {
		panic(e)
	}

	if DEBUG >= 1 {
		fmt.Println("[cart] CART tree:\n", cartrt)
	}
}
Exemplo n.º 12
0
func getSamples() (train, test tabula.ClasetInterface) {
	samples := tabula.Claset{}
	_, e := dsv.SimpleRead(SampleDsvFile, &samples)
	if nil != e {
		log.Fatal(e)
	}

	if !DoTest {
		return &samples, nil
	}

	ntrain := int(float32(samples.Len()) * (float32(NBootstrap) / 100.0))

	bag, oob, _, _ := tabula.RandomPickRows(&samples, ntrain, false)

	train = bag.(tabula.ClasetInterface)
	test = oob.(tabula.ClasetInterface)

	train.SetClassIndex(samples.GetClassIndex())
	test.SetClassIndex(samples.GetClassIndex())

	return train, test
}