func populateWithColumns(t *testing.T, dataset *tabula.Dataset) { for x := range datasetCols { col, e := tabula.NewColumnString(datasetCols[x], datasetTypes[x], datasetNames[x]) if e != nil { t.Fatal(e) } dataset.PushColumn(*col) } }
/* getAsInputColumn return feature values as in input column. */ func getAsInputColumn(reader *wvcgen.Reader, colName string, ftrValues *tabula.Dataset, ) { ds := reader.GetDataset().(tabula.DatasetInterface) ftr := ds.GetColumnByName(colName) if ftr == nil { return } // Add column in input as feature ftrValues.PushColumn(*ftr) }
func populateWithRows(t *testing.T, dataset *tabula.Dataset) { for _, rowin := range datasetRows { row := make(tabula.Row, len(rowin)) for x, recin := range rowin { rec, e := tabula.NewRecordBy(recin, datasetTypes[x]) if e != nil { t.Fatal(e) } row[x] = rec } dataset.PushRow(&row) } }
func TestComputeEuclidianDistance(t *testing.T) { var exp = []string{ `[0.302891 0.608544 0.47413 1.42718 -0.811085 1]`, `[0.243474 0.505146 0.472892 1.34802 -0.844252 1]` + `[0.202343 0.485983 0.527533 1.47307 -0.809672 1]` + `[0.215496 0.523418 0.51719 1.43548 -0.933981 1]` + `[0.214331 0.546086 0.414773 1.38542 -0.702336 1]` + `[0.301676 0.554505 0.594757 1.21258 -0.873084 1]`, } var expDistances = "[0.5257185558832786" + " 0.5690474496911485" + " 0.5888777462258191" + " 0.6007362149895741" + " 0.672666336306493]" // Reading data dataset := tabula.Dataset{} _, e := dsv.SimpleRead("../testdata/phoneme/phoneme.dsv", &dataset) if nil != e { return } // Processing knnIn := knn.Runtime{ DistanceMethod: knn.TEuclidianDistance, ClassIndex: 5, K: 5, } classes := dataset.GetRows().GroupByValue(knnIn.ClassIndex) _, minoritySet := classes.GetMinority() kneighbors := knnIn.FindNeighbors(&minoritySet, minoritySet[0]) var got string rows := kneighbors.Rows() for _, row := range *rows { got += fmt.Sprint(*row) } assert(t, exp[1], got, true) distances := kneighbors.Distances() got = fmt.Sprint(*distances) assert(t, expDistances, got, true) }
func main() { defer un(trace("Unified PAN-WVC-2011")) dataset := tabula.Dataset{} readset, e := dsv.SimpleRead(fEditsDsv, &dataset) if e != nil { panic(e) } fmt.Printf(">>> merging %d rows\n", dataset.GetNRow()) fmt.Println(">>> diffing ...") doDiff(readset, &dataset) fmt.Println(">>> writing ...") n, e := dsv.SimpleWrite(readset, fEditsDsv) if e != nil { panic(e) } fmt.Printf(">>> writing %d rows\n", n) }
func runFeature(reader *wvcgen.Reader, ftrValues *tabula.Dataset, md dsv.Metadata, ) { defer un(trace(">>> computing feature " + md.Name)) ftr := feature.GetByName(md.Name) // No feature name found, search the column name in // input metadata. if ftr == nil { getAsInputColumn(reader, md.Name, ftrValues) return } ds := reader.GetDataset().(tabula.DatasetInterface) ftr.Compute(ds) col := ftr.Interface().(*tabula.Column) ftrValues.PushColumn(*col) }