// Train converts the Attributes into equivalently named FloatAttributes, // leaves FloatAttributes unmodified and processes // CategoricalAttributes as follows. // // If the CategoricalAttribute has two values, one of them is // designated 0.0 and the other 1.0, and a single identically-named // FloatAttribute is returned. // // If the CategoricalAttribute has more than two (n) values, the Filter // generates n FloatAttributes and sets each of them if the value's observed. func (f *FloatConvertFilter) Train() error { for _, a := range f.attrs { if ac, ok := a.(*base.CategoricalAttribute); ok { vals := ac.GetValues() if len(vals) <= 2 { nAttr := base.NewFloatAttribute(ac.GetName()) fAttr := base.FilteredAttribute{ac, nAttr} f.converted = append(f.converted, fAttr) f.twoValuedCategoricalAttributes[a] = true } else { if _, ok := f.nValuedCategoricalAttributeMap[a]; !ok { f.nValuedCategoricalAttributeMap[a] = make(map[uint64]base.Attribute) } for i := uint64(0); i < uint64(len(vals)); i++ { v := vals[i] newName := fmt.Sprintf("%s_%s", ac.GetName(), v) newAttr := base.NewFloatAttribute(newName) fAttr := base.FilteredAttribute{ac, newAttr} f.converted = append(f.converted, fAttr) f.nValuedCategoricalAttributeMap[a][i] = newAttr } } } else if ab, ok := a.(*base.FloatAttribute); ok { fAttr := base.FilteredAttribute{ab, ab} f.converted = append(f.converted, fAttr) } else if af, ok := a.(*base.BinaryAttribute); ok { newAttr := base.NewFloatAttribute(af.GetName()) fAttr := base.FilteredAttribute{af, newAttr} f.converted = append(f.converted, fAttr) } else { return fmt.Errorf("Unsupported Attribute type: %v", a) } } return nil }
func main() { // Instances can be read using ParseCsvToInstances rawData, err := base.ParseCSVToInstances("../datasets/iris_headers.csv", true) if err != nil { panic(err) } // Instances can be printed, and you'll see a human-readable summary // if you do so. The first section is a line like // Instances with 150 row(s) and 5 attribute(s) // // It next prints all the attributes // FloatAttribute(Sepal length) // FloatAttribute(Sepal width) // FloatAttribute(Petal length) // FloatAttribute(Petal width) // CategoricalAttribute([Iris-setosa Iris-versicolor Iris-viriginica]) // The final attribute has an asterisk (*) printed before it, // meaning that it is the class variable. It then prints out up to // 30 rows which correspond to those attributes. // 5.10 3.50 1.40 0.20 Iris-setosa // 4.90 3.00 1.40 0.20 Iris-setosa fmt.Println(rawData) // If two decimal places isn't enough, you can update the // Precision field on any FloatAttribute if attr, ok := rawData.GetAttr(0).(*base.FloatAttribute); !ok { panic("Invalid cast") } else { attr.Precision = 4 } // Now the first column has more precision fmt.Println(rawData) // We can update the set of Instances, although the API // for doing so is not very sophisticated. rawData.SetAttrStr(0, 0, "1.00") rawData.SetAttrStr(0, rawData.ClassIndex, "Iris-unusual") fmt.Println(rawData) // There is a way of creating new Instances from scratch. // Inside an Instance, everything's stored as float64 newData := make([]float64, 2) newData[0] = 1.0 newData[1] = 0.0 // Let's create some attributes attrs := make([]base.Attribute, 2) attrs[0] = base.NewFloatAttribute() attrs[0].SetName("Arbitrary Float Quantity") attrs[1] = new(base.CategoricalAttribute) attrs[1].SetName("Class") // Insert a standard class attrs[1].GetSysValFromString("A") // Now let's create the final instances set newInst := base.NewInstancesFromRaw(attrs, 1, newData) fmt.Println(newInst) }
func (m *OneVsAllModel) generateAttributes(from base.FixedDataGrid) map[base.Attribute]base.Attribute { attrs := from.AllAttributes() classAttrs := from.AllClassAttributes() if len(classAttrs) != 1 { panic("Only 1 class Attribute is supported!") } ret := make(map[base.Attribute]base.Attribute) for _, a := range attrs { ret[a] = a for _, b := range classAttrs { if a.Equals(b) { cur := base.NewFloatAttribute(b.GetName()) ret[a] = cur } } } return ret }