Beispiel #1
0
func populateWithColumns(t *testing.T, dataset *tabula.Dataset) {
	for x := range datasetCols {
		col, e := tabula.NewColumnString(datasetCols[x], datasetTypes[x],
			datasetNames[x])
		if e != nil {
			t.Fatal(e)
		}

		dataset.PushColumn(*col)
	}
}
Beispiel #2
0
/*
getAsInputColumn return feature values as in input column.
*/
func getAsInputColumn(reader *wvcgen.Reader, colName string,
	ftrValues *tabula.Dataset,
) {
	ds := reader.GetDataset().(tabula.DatasetInterface)
	ftr := ds.GetColumnByName(colName)

	if ftr == nil {
		return
	}

	// Add column in input as feature
	ftrValues.PushColumn(*ftr)
}
Beispiel #3
0
func populateWithRows(t *testing.T, dataset *tabula.Dataset) {
	for _, rowin := range datasetRows {
		row := make(tabula.Row, len(rowin))

		for x, recin := range rowin {
			rec, e := tabula.NewRecordBy(recin, datasetTypes[x])
			if e != nil {
				t.Fatal(e)
			}

			row[x] = rec
		}

		dataset.PushRow(&row)
	}
}
Beispiel #4
0
func TestComputeEuclidianDistance(t *testing.T) {
	var exp = []string{
		`[0.302891 0.608544 0.47413 1.42718 -0.811085 1]`,
		`[0.243474 0.505146 0.472892 1.34802 -0.844252 1]` +
			`[0.202343 0.485983 0.527533 1.47307 -0.809672 1]` +
			`[0.215496 0.523418 0.51719 1.43548 -0.933981 1]` +
			`[0.214331 0.546086 0.414773 1.38542 -0.702336 1]` +
			`[0.301676 0.554505 0.594757 1.21258 -0.873084 1]`,
	}
	var expDistances = "[0.5257185558832786" +
		" 0.5690474496911485" +
		" 0.5888777462258191" +
		" 0.6007362149895741" +
		" 0.672666336306493]"

	// Reading data
	dataset := tabula.Dataset{}
	_, e := dsv.SimpleRead("../testdata/phoneme/phoneme.dsv", &dataset)
	if nil != e {
		return
	}

	// Processing
	knnIn := knn.Runtime{
		DistanceMethod: knn.TEuclidianDistance,
		ClassIndex:     5,
		K:              5,
	}

	classes := dataset.GetRows().GroupByValue(knnIn.ClassIndex)

	_, minoritySet := classes.GetMinority()

	kneighbors := knnIn.FindNeighbors(&minoritySet, minoritySet[0])

	var got string
	rows := kneighbors.Rows()
	for _, row := range *rows {
		got += fmt.Sprint(*row)
	}

	assert(t, exp[1], got, true)

	distances := kneighbors.Distances()
	got = fmt.Sprint(*distances)
	assert(t, expDistances, got, true)
}
Beispiel #5
0
func main() {
	defer un(trace("Unified PAN-WVC-2011"))

	dataset := tabula.Dataset{}

	readset, e := dsv.SimpleRead(fEditsDsv, &dataset)
	if e != nil {
		panic(e)
	}
	fmt.Printf(">>> merging %d rows\n", dataset.GetNRow())

	fmt.Println(">>> diffing ...")
	doDiff(readset, &dataset)

	fmt.Println(">>> writing ...")
	n, e := dsv.SimpleWrite(readset, fEditsDsv)
	if e != nil {
		panic(e)
	}
	fmt.Printf(">>> writing %d rows\n", n)
}
Beispiel #6
0
func runFeature(reader *wvcgen.Reader, ftrValues *tabula.Dataset,
	md dsv.Metadata,
) {
	defer un(trace(">>> computing feature " + md.Name))

	ftr := feature.GetByName(md.Name)

	// No feature name found, search the column name in
	// input metadata.
	if ftr == nil {
		getAsInputColumn(reader, md.Name, ftrValues)
		return
	}

	ds := reader.GetDataset().(tabula.DatasetInterface)
	ftr.Compute(ds)

	col := ftr.Interface().(*tabula.Column)

	ftrValues.PushColumn(*col)
}