// Predict issues predictions. Each class-specific classifier is expected // to output a value between 0 (indicating that a given instance is not // a given class) and 1 (indicating that the given instance is definitely // that class). For each instance, the class with the highest value is chosen. // The result is undefined if several underlying models output the same value. func (m *OneVsAllModel) Predict(what base.FixedDataGrid) (base.FixedDataGrid, error) { ret := base.GeneratePredictionVector(what) vecs := make([]base.FixedDataGrid, m.maxClassVal+1) specs := make([]base.AttributeSpec, m.maxClassVal+1) for i := uint64(0); i <= m.maxClassVal; i++ { f := m.filters[i] c := base.NewLazilyFilteredInstances(what, f) p, err := m.classifiers[i].Predict(c) if err != nil { return nil, err } vecs[i] = p specs[i] = base.ResolveAttributes(p, p.AllClassAttributes())[0] } _, rows := ret.Size() spec := base.ResolveAttributes(ret, ret.AllClassAttributes())[0] for i := 0; i < rows; i++ { class := uint64(0) best := 0.0 for j := uint64(0); j <= m.maxClassVal; j++ { val := base.UnpackBytesToFloat(vecs[j].Get(specs[j], i)) if val > best { class = j best = val } } ret.Set(spec, i, base.PackU64ToBytes(class)) } return ret, nil }
func (lr *LogisticRegression) Predict(X base.FixedDataGrid) base.FixedDataGrid { // Only support 1 class Attribute classAttrs := X.AllClassAttributes() if len(classAttrs) != 1 { panic(fmt.Sprintf("%d Wrong number of classes", len(classAttrs))) } // Generate return structure ret := base.GeneratePredictionVector(X) classAttrSpecs := base.ResolveAttributes(ret, classAttrs) // Retrieve numeric non-class Attributes numericAttrs := base.NonClassFloatAttributes(X) numericAttrSpecs := base.ResolveAttributes(X, numericAttrs) // Allocate row storage row := make([]float64, len(numericAttrSpecs)) X.MapOverRows(numericAttrSpecs, func(rowBytes [][]byte, rowNo int) (bool, error) { for i, r := range rowBytes { row[i] = base.UnpackBytesToFloat(r) } val := Predict(lr.model, row) vals := base.PackFloatToBytes(val) ret.Set(classAttrSpecs[0], rowNo, vals) return true, nil }) return ret }
func findBestSplit(partition base.FixedDataGrid) { var delta float64 delta = math.MinInt64 attrs := partition.AllAttributes() classAttrs := partition.AllClassAttributes() candidates := base.AttributeDifferenceReferences(attrs, classAttrs) fmt.Println(delta) fmt.Println(classAttrs) fmt.Println(reflect.TypeOf(partition)) fmt.Println(reflect.TypeOf(candidates)) for i, n := range attrs { fmt.Println(i) //fmt.Println(partition) fmt.Println(reflect.TypeOf(n)) attributeSpec, _ := partition.GetAttribute(n) fmt.Println(partition.GetAttribute(n)) _, rows := partition.Size() for j := 0; j < rows; j++ { data := partition.Get(attributeSpec, j) fmt.Println(base.UnpackBytesToFloat(data)) } } }
// Predict outputs a base.Instances containing predictions from this tree func (d *DecisionTreeNode) Predict(what base.FixedDataGrid) (base.FixedDataGrid, error) { predictions := base.GeneratePredictionVector(what) classAttr := getClassAttr(predictions) classAttrSpec, err := predictions.GetAttribute(classAttr) if err != nil { panic(err) } predAttrs := base.AttributeDifferenceReferences(what.AllAttributes(), predictions.AllClassAttributes()) predAttrSpecs := base.ResolveAttributes(what, predAttrs) what.MapOverRows(predAttrSpecs, func(row [][]byte, rowNo int) (bool, error) { cur := d for { if cur.Children == nil { predictions.Set(classAttrSpec, rowNo, classAttr.GetSysValFromString(cur.Class)) break } else { splitVal := cur.SplitRule.SplitVal at := cur.SplitRule.SplitAttr ats, err := what.GetAttribute(at) if err != nil { //predictions.Set(classAttrSpec, rowNo, classAttr.GetSysValFromString(cur.Class)) //break panic(err) } var classVar string if _, ok := ats.GetAttribute().(*base.FloatAttribute); ok { // If it's a numeric Attribute (e.g. FloatAttribute) check that // the value of the current node is greater than the old one classVal := base.UnpackBytesToFloat(what.Get(ats, rowNo)) if classVal > splitVal { classVar = "1" } else { classVar = "0" } } else { classVar = ats.GetAttribute().GetStringFromSysVal(what.Get(ats, rowNo)) } if next, ok := cur.Children[classVar]; ok { cur = next } else { // Suspicious of this var bestChild string for c := range cur.Children { bestChild = c if c > classVar { break } } cur = cur.Children[bestChild] } } } return true, nil }) return predictions, nil }
func getNumericAttributeEntropy(f base.FixedDataGrid, attr *base.FloatAttribute) (float64, float64) { // Resolve Attribute attrSpec, err := f.GetAttribute(attr) if err != nil { panic(err) } // Build sortable vector _, rows := f.Size() refs := make([]numericSplitRef, rows) f.MapOverRows([]base.AttributeSpec{attrSpec}, func(val [][]byte, row int) (bool, error) { cls := base.GetClass(f, row) v := base.UnpackBytesToFloat(val[0]) refs[row] = numericSplitRef{v, cls} return true, nil }) // Sort sort.Sort(splitVec(refs)) generateCandidateSplitDistribution := func(val float64) map[string]map[string]int { presplit := make(map[string]int) postplit := make(map[string]int) for _, i := range refs { if i.val < val { presplit[i.class]++ } else { postplit[i.class]++ } } ret := make(map[string]map[string]int) ret["0"] = presplit ret["1"] = postplit return ret } minSplitEntropy := math.Inf(1) minSplitVal := math.Inf(1) // Consider each possible function for i := 0; i < len(refs)-1; i++ { val := refs[i].val + refs[i+1].val val /= 2 splitDist := generateCandidateSplitDistribution(val) splitEntropy := getSplitEntropy(splitDist) if splitEntropy < minSplitEntropy { minSplitEntropy = splitEntropy minSplitVal = val } } return minSplitEntropy, minSplitVal }
func processData(x base.FixedDataGrid) instances { _, rows := x.Size() result := make(instances, rows) // Retrieve numeric non-class Attributes numericAttrs := base.NonClassFloatAttributes(x) numericAttrSpecs := base.ResolveAttributes(x, numericAttrs) // Retrieve class Attributes classAttrs := x.AllClassAttributes() if len(classAttrs) != 1 { panic("Only one classAttribute supported!") } // Check that the class Attribute is categorical // (with two values) or binary classAttr := classAttrs[0] if attr, ok := classAttr.(*base.CategoricalAttribute); ok { if len(attr.GetValues()) != 2 { panic("To many values for Attribute!") } } else if _, ok := classAttr.(*base.BinaryAttribute); ok { } else { panic("Wrong class Attribute type!") } // Convert each row x.MapOverRows(numericAttrSpecs, func(row [][]byte, rowNo int) (bool, error) { // Allocate a new row probRow := make([]float64, len(numericAttrSpecs)) // Read out the row for i, _ := range numericAttrSpecs { probRow[i] = base.UnpackBytesToFloat(row[i]) } // Get the class for the values class := base.GetClass(x, rowNo) instance := instance{class, probRow} result[rowNo] = instance return true, nil }) return result }
// Transform converts the given byte sequence using the old Attribute into the new // byte sequence. // // If the old Attribute has a categorical value of at most two items, then a zero or // non-zero byte sequence is returned. // // If the old Attribute has a categorical value of at most n-items, then a non-zero // or zero byte sequence is returned based on the value of the new Attribute passed in. // // If the old Attribute is a float, it's value's unpacked and we check for non-zeroness // // If the old Attribute is a BinaryAttribute, just return the input func (b *BinaryConvertFilter) Transform(a base.Attribute, n base.Attribute, attrBytes []byte) []byte { ret := make([]byte, 1) // Check for CategoricalAttribute if _, ok := a.(*base.CategoricalAttribute); ok { // Unpack byte value val := base.UnpackBytesToU64(attrBytes) // If it's a two-valued one, check for non-zero if b.twoValuedCategoricalAttributes[a] { if val > 0 { ret[0] = 1 } else { ret[0] = 0 } } else if an, ok := b.nValuedCategoricalAttributeMap[a]; ok { // If it's an n-valued one, check the new Attribute maps onto // the unpacked value if af, ok := an[val]; ok { if af.Equals(n) { ret[0] = 1 } else { ret[0] = 0 } } else { panic("Categorical value not defined!") } } else { panic(fmt.Sprintf("Not a recognised Attribute %v", a)) } } else if _, ok := a.(*base.BinaryAttribute); ok { // Binary: just return the original value ret = attrBytes } else if _, ok := a.(*base.FloatAttribute); ok { // Float: check for non-zero val := base.UnpackBytesToFloat(attrBytes) if val > 0 { ret[0] = 1 } else { ret[0] = 0 } } else { panic(fmt.Sprintf("Unrecognised Attribute: %v", a)) } return ret }
// Transform returns the byte sequence after discretisation func (c *ChiMergeFilter) Transform(a base.Attribute, n base.Attribute, field []byte) []byte { // Do we use this Attribute? if !c.attrs[a] { return field } // Find the Attribute value in the table table := c.tables[a] dis := 0 val := base.UnpackBytesToFloat(field) for j, k := range table { if k.Value < val { dis = j continue } break } return base.PackU64ToBytes(uint64(dis)) }
func convertInstancesToLabelVec(X base.FixedDataGrid) []float64 { // Get the class Attributes classAttrs := X.AllClassAttributes() // Only support 1 class Attribute if len(classAttrs) != 1 { panic(fmt.Sprintf("%d ClassAttributes (1 expected)", len(classAttrs))) } // ClassAttribute must be numeric if _, ok := classAttrs[0].(*base.FloatAttribute); !ok { panic(fmt.Sprintf("%s: ClassAttribute must be a FloatAttribute", classAttrs[0])) } // Allocate return structure _, rows := X.Size() labelVec := make([]float64, rows) // Resolve class Attribute specification classAttrSpecs := base.ResolveAttributes(X, classAttrs) X.MapOverRows(classAttrSpecs, func(row [][]byte, rowNo int) (bool, error) { labelVec[rowNo] = base.UnpackBytesToFloat(row[0]) return true, nil }) return labelVec }
func convertInstancesToProblemVec(X base.FixedDataGrid) [][]float64 { // Allocate problem array _, rows := X.Size() problemVec := make([][]float64, rows) // Retrieve numeric non-class Attributes numericAttrs := base.NonClassFloatAttributes(X) numericAttrSpecs := base.ResolveAttributes(X, numericAttrs) // Convert each row X.MapOverRows(numericAttrSpecs, func(row [][]byte, rowNo int) (bool, error) { // Allocate a new row probRow := make([]float64, len(numericAttrSpecs)) // Read out the row for i, _ := range numericAttrSpecs { probRow[i] = base.UnpackBytesToFloat(row[i]) } // Add the row problemVec[rowNo] = probRow return true, nil }) return problemVec }
func (lr *LinearRegression) Predict(X base.FixedDataGrid) (base.FixedDataGrid, error) { if !lr.fitted { return nil, NoTrainingDataError } ret := base.GeneratePredictionVector(X) attrSpecs := base.ResolveAttributes(X, lr.attrs) clsSpec, err := ret.GetAttribute(lr.cls) if err != nil { return nil, err } X.MapOverRows(attrSpecs, func(row [][]byte, i int) (bool, error) { var prediction float64 = lr.disturbance for j, r := range row { prediction += base.UnpackBytesToFloat(r) * lr.regressionCoefficients[j] } ret.Set(clsSpec, i, base.PackFloatToBytes(prediction)) return true, nil }) return ret, nil }
// Predict returns a classification for the vector, based on a vector input, using the KNN algorithm. func (KNN *KNNClassifier) Predict(what base.FixedDataGrid) base.FixedDataGrid { // Check what distance function we are using var distanceFunc pairwise.PairwiseDistanceFunc switch KNN.DistanceFunc { case "euclidean": distanceFunc = pairwise.NewEuclidean() case "manhattan": distanceFunc = pairwise.NewManhattan() default: panic("unsupported distance function") } // Check Compatibility allAttrs := base.CheckCompatible(what, KNN.TrainingData) if allAttrs == nil { // Don't have the same Attributes return nil } // Remove the Attributes which aren't numeric allNumericAttrs := make([]base.Attribute, 0) for _, a := range allAttrs { if fAttr, ok := a.(*base.FloatAttribute); ok { allNumericAttrs = append(allNumericAttrs, fAttr) } } // Generate return vector ret := base.GeneratePredictionVector(what) // Resolve Attribute specifications for both whatAttrSpecs := base.ResolveAttributes(what, allNumericAttrs) trainAttrSpecs := base.ResolveAttributes(KNN.TrainingData, allNumericAttrs) // Reserve storage for most the most similar items distances := make(map[int]float64) // Reserve storage for voting map maxmap := make(map[string]int) // Reserve storage for row computations trainRowBuf := make([]float64, len(allNumericAttrs)) predRowBuf := make([]float64, len(allNumericAttrs)) // Iterate over all outer rows what.MapOverRows(whatAttrSpecs, func(predRow [][]byte, predRowNo int) (bool, error) { // Read the float values out for i, _ := range allNumericAttrs { predRowBuf[i] = base.UnpackBytesToFloat(predRow[i]) } predMat := utilities.FloatsToMatrix(predRowBuf) // Find the closest match in the training data KNN.TrainingData.MapOverRows(trainAttrSpecs, func(trainRow [][]byte, srcRowNo int) (bool, error) { // Read the float values out for i, _ := range allNumericAttrs { trainRowBuf[i] = base.UnpackBytesToFloat(trainRow[i]) } // Compute the distance trainMat := utilities.FloatsToMatrix(trainRowBuf) distances[srcRowNo] = distanceFunc.Distance(predMat, trainMat) return true, nil }) sorted := utilities.SortIntMap(distances) values := sorted[:KNN.NearestNeighbours] // Reset maxMap for a := range maxmap { maxmap[a] = 0 } // Refresh maxMap for _, elem := range values { label := base.GetClass(KNN.TrainingData, elem) if _, ok := maxmap[label]; ok { maxmap[label]++ } else { maxmap[label] = 1 } } // Sort the maxMap var maxClass string maxVal := -1 for a := range maxmap { if maxmap[a] > maxVal { maxVal = maxmap[a] maxClass = a } } base.SetClass(ret, predRowNo, maxClass) return true, nil }) return ret }
// Predict returns a classification for the vector, based on a vector input, using the KNN algorithm. func (KNN *KNNClassifier) Predict(what base.FixedDataGrid) base.FixedDataGrid { // Check what distance function we are using var distanceFunc pairwise.PairwiseDistanceFunc switch KNN.DistanceFunc { case "euclidean": distanceFunc = pairwise.NewEuclidean() case "manhattan": distanceFunc = pairwise.NewManhattan() default: panic("unsupported distance function") } // Check Compatibility allAttrs := base.CheckCompatible(what, KNN.TrainingData) if allAttrs == nil { // Don't have the same Attributes return nil } // Use optimised version if permitted if KNN.AllowOptimisations { if KNN.DistanceFunc == "euclidean" { if KNN.canUseOptimisations(what) { return KNN.optimisedEuclideanPredict(what.(*base.DenseInstances)) } } } fmt.Println("Optimisations are switched off") // Remove the Attributes which aren't numeric allNumericAttrs := make([]base.Attribute, 0) for _, a := range allAttrs { if fAttr, ok := a.(*base.FloatAttribute); ok { allNumericAttrs = append(allNumericAttrs, fAttr) } } // Generate return vector ret := base.GeneratePredictionVector(what) // Resolve Attribute specifications for both whatAttrSpecs := base.ResolveAttributes(what, allNumericAttrs) trainAttrSpecs := base.ResolveAttributes(KNN.TrainingData, allNumericAttrs) // Reserve storage for most the most similar items distances := make(map[int]float64) // Reserve storage for voting map maxmap := make(map[string]int) // Reserve storage for row computations trainRowBuf := make([]float64, len(allNumericAttrs)) predRowBuf := make([]float64, len(allNumericAttrs)) _, maxRow := what.Size() curRow := 0 // Iterate over all outer rows what.MapOverRows(whatAttrSpecs, func(predRow [][]byte, predRowNo int) (bool, error) { if (curRow%1) == 0 && curRow > 0 { fmt.Printf("KNN: %.2f %% done\n", float64(curRow)*100.0/float64(maxRow)) } curRow++ // Read the float values out for i, _ := range allNumericAttrs { predRowBuf[i] = base.UnpackBytesToFloat(predRow[i]) } predMat := utilities.FloatsToMatrix(predRowBuf) // Find the closest match in the training data KNN.TrainingData.MapOverRows(trainAttrSpecs, func(trainRow [][]byte, srcRowNo int) (bool, error) { // Read the float values out for i, _ := range allNumericAttrs { trainRowBuf[i] = base.UnpackBytesToFloat(trainRow[i]) } // Compute the distance trainMat := utilities.FloatsToMatrix(trainRowBuf) distances[srcRowNo] = distanceFunc.Distance(predMat, trainMat) return true, nil }) sorted := utilities.SortIntMap(distances) values := sorted[:KNN.NearestNeighbours] maxClass := KNN.vote(maxmap, values) base.SetClass(ret, predRowNo, maxClass) return true, nil }) return ret }
// Predict uses the underlying network to produce predictions for the // class variables of X. // // Can only predict one CategoricalAttribute at a time, or up to n // FloatAttributes. Set or unset ClassAttributes to work around this // limitation. func (m *MultiLayerNet) Predict(X base.FixedDataGrid) base.FixedDataGrid { // Create the return vector ret := base.GeneratePredictionVector(X) // Make sure everything's a FloatAttribute insts := m.convertToFloatInsts(X) // Get the input/output Attributes inputAttrs := base.NonClassAttributes(insts) outputAttrs := ret.AllClassAttributes() // Compute layers layers := 2 + len(m.layers) // Check that we're operating in a singular mode floatMode := 0 categoricalMode := 0 for _, a := range outputAttrs { if _, ok := a.(*base.CategoricalAttribute); ok { categoricalMode++ } else if _, ok := a.(*base.FloatAttribute); ok { floatMode++ } else { panic("Unsupported output Attribute type!") } } if floatMode > 0 && categoricalMode > 0 { panic("Can't predict a mix of float and categorical Attributes") } else if categoricalMode > 1 { panic("Can't predict more than one categorical class Attribute") } // Create the activation vector a := mat64.NewDense(m.network.size, 1, make([]float64, m.network.size)) // Resolve the input AttributeSpecs inputAs := base.ResolveAttributes(insts, inputAttrs) // Resolve the output Attributespecs outputAs := base.ResolveAttributes(ret, outputAttrs) // Map over each input row insts.MapOverRows(inputAs, func(row [][]byte, rc int) (bool, error) { // Clear the activation vector for i := 0; i < m.network.size; i++ { a.Set(i, 0, 0.0) } // Build the activation vector for i, vb := range row { if cIndex, ok := m.attrs[inputAs[i].GetAttribute()]; !ok { panic("Can't resolve the Attribute!") } else { a.Set(cIndex, 0, base.UnpackBytesToFloat(vb)) } } // Robots, activate! m.network.Activate(a, layers) // Decide which class to set if floatMode > 0 { for _, as := range outputAs { cIndex := m.attrs[as.GetAttribute()] ret.Set(as, rc, base.PackFloatToBytes(a.At(cIndex, 0))) } } else { maxIndex := 0 maxVal := 0.0 for i := m.classAttrOffset; i < m.classAttrOffset+m.classAttrCount; i++ { val := a.At(i, 0) if val > maxVal { maxIndex = i maxVal = val } } maxIndex -= m.classAttrOffset ret.Set(outputAs[0], rc, base.PackU64ToBytes(uint64(maxIndex))) } return true, nil }) return ret }
// Fit trains the neural network on the given fixed datagrid. // // Training stops when the mean-squared error acheived is less // than the Convergence value, or when back-propagation has occured // more times than the value set by MaxIterations. func (m *MultiLayerNet) Fit(X base.FixedDataGrid) { // Make sure everything's a FloatAttribute insts := m.convertToFloatInsts(X) // The size of the first layer is the number of things // in the revised instances which aren't class Attributes inputAttrsVec := base.NonClassAttributes(insts) // The size of the output layer is the number of things // in the revised instances which are class Attributes classAttrsVec := insts.AllClassAttributes() // The total number of layers is input layer + output layer // plus number of layers specified totalLayers := 2 + len(m.layers) // The size is then augmented by the number of nodes // in the centre size := len(inputAttrsVec) size += len(classAttrsVec) hiddenSize := 0 for _, a := range m.layers { size += a hiddenSize += a } // Enumerate the Attributes trainingAttrs := make(map[base.Attribute]int) classAttrs := make(map[base.Attribute]int) attrCounter := 0 for i, a := range inputAttrsVec { attrCounter = i m.attrs[a] = attrCounter trainingAttrs[a] = attrCounter } m.classAttrOffset = attrCounter + 1 for _, a := range classAttrsVec { attrCounter++ m.attrs[a] = attrCounter + hiddenSize classAttrs[a] = attrCounter + hiddenSize m.classAttrCount++ } // Create the underlying Network m.network = NewNetwork(size, len(inputAttrsVec), Sigmoid) // Initialise inter-hidden layer weights and biases to small random values layerOffset := len(inputAttrsVec) for i := 0; i < len(m.layers)-1; i++ { // Get the size of this layer thisLayerSize := m.layers[i] // Next layer size nextLayerSize := m.layers[i+1] // For every node in this layer for j := 1; j <= thisLayerSize; j++ { // Compute the offset nodeOffset1 := layerOffset + j // For every node in the next layer for k := 1; k <= nextLayerSize; k++ { // Compute offset nodeOffset2 := layerOffset + thisLayerSize + k // Set weight randomly m.network.SetWeight(nodeOffset1, nodeOffset2, rand.NormFloat64()*0.1) } } layerOffset += thisLayerSize } // Initialise biases with each hidden layer layerOffset = len(inputAttrsVec) for _, l := range m.layers { for j := 1; j <= l; j++ { nodeOffset := layerOffset + j m.network.SetBias(nodeOffset, rand.NormFloat64()*0.1) } layerOffset += l } // Initialise biases for output layer for i := 0; i < len(classAttrsVec); i++ { nodeOffset := layerOffset + i m.network.SetBias(nodeOffset, rand.NormFloat64()*0.1) } // Connect final hidden layer with the output layer layerOffset = len(inputAttrsVec) for i, l := range m.layers { if i == len(m.layers)-1 { for j := 1; j <= l; j++ { nodeOffset1 := layerOffset + j for k := 1; k <= len(classAttrsVec); k++ { nodeOffset2 := layerOffset + l + k m.network.SetWeight(nodeOffset1, nodeOffset2, rand.NormFloat64()*0.1) } } } layerOffset += l } // Connect input layer with first hidden layer (or output layer for i := 1; i <= len(inputAttrsVec); i++ { nextLayerLen := 0 if len(m.layers) > 0 { nextLayerLen = m.layers[0] } else { nextLayerLen = len(classAttrsVec) } for j := 1; j <= nextLayerLen; j++ { nodeOffset := len(inputAttrsVec) + j v := rand.NormFloat64() * 0.1 m.network.SetWeight(i, nodeOffset, v) } } // Create the training activation vector trainVec := mat64.NewDense(size, 1, make([]float64, size)) // Create the error vector errVec := mat64.NewDense(size, 1, make([]float64, size)) // Resolve training AttributeSpecs trainAs := base.ResolveAllAttributes(insts) // Feed-forward, compute error and update for each training example // until convergence (what's that) for iteration := 0; iteration < m.MaxIterations; iteration++ { totalError := 0.0 maxRow := 0 insts.MapOverRows(trainAs, func(row [][]byte, i int) (bool, error) { maxRow = i // Clear vectors for i := 0; i < size; i++ { trainVec.Set(i, 0, 0.0) errVec.Set(i, 0, 0.0) } // Build vectors for i, vb := range row { v := base.UnpackBytesToFloat(vb) if attrIndex, ok := trainingAttrs[trainAs[i].GetAttribute()]; ok { // Add to Activation vector trainVec.Set(attrIndex, 0, v) } else if attrIndex, ok := classAttrs[trainAs[i].GetAttribute()]; ok { // Set to error vector errVec.Set(attrIndex, 0, v) } else { panic("Should be able to find this Attribute!") } } // Activate the network m.network.Activate(trainVec, totalLayers-1) // Compute the error for a := range classAttrs { cIndex := classAttrs[a] errVec.Set(cIndex, 0, errVec.At(cIndex, 0)-trainVec.At(cIndex, 0)) } // Update total error totalError += math.Abs(errVec.Sum()) // Back-propagate the error b := m.network.Error(trainVec, errVec, totalLayers) // Update the weights m.network.UpdateWeights(trainVec, b, m.LearningRate) // Update the biases m.network.UpdateBias(b, m.LearningRate) return true, nil }) totalError /= float64(maxRow) // If we've converged, no need to carry on if totalError < m.Convergence { break } } }
func (lr *LinearRegression) Fit(inst base.FixedDataGrid) error { // Retrieve row size _, rows := inst.Size() // Validate class Attribute count classAttrs := inst.AllClassAttributes() if len(classAttrs) != 1 { return fmt.Errorf("Only 1 class variable is permitted") } classAttrSpecs := base.ResolveAttributes(inst, classAttrs) // Retrieve relevant Attributes allAttrs := base.NonClassAttributes(inst) attrs := make([]base.Attribute, 0) for _, a := range allAttrs { if _, ok := a.(*base.FloatAttribute); ok { attrs = append(attrs, a) } } cols := len(attrs) + 1 if rows < cols { return NotEnoughDataError } // Retrieve relevant Attribute specifications attrSpecs := base.ResolveAttributes(inst, attrs) // Split into two matrices, observed results (dependent variable y) // and the explanatory variables (X) - see http://en.wikipedia.org/wiki/Linear_regression observed := mat64.NewDense(rows, 1, nil) explVariables := mat64.NewDense(rows, cols, nil) // Build the observed matrix inst.MapOverRows(classAttrSpecs, func(row [][]byte, i int) (bool, error) { val := base.UnpackBytesToFloat(row[0]) observed.Set(i, 0, val) return true, nil }) // Build the explainatory variables inst.MapOverRows(attrSpecs, func(row [][]byte, i int) (bool, error) { // Set intercepts to 1.0 explVariables.Set(i, 0, 1.0) for j, r := range row { explVariables.Set(i, j+1, base.UnpackBytesToFloat(r)) } return true, nil }) n := cols qr := new(mat64.QR) qr.Factorize(explVariables) var q, reg mat64.Dense q.QFromQR(qr) reg.RFromQR(qr) var transposed, qty mat64.Dense transposed.Clone(q.T()) qty.Mul(&transposed, observed) regressionCoefficients := make([]float64, n) for i := n - 1; i >= 0; i-- { regressionCoefficients[i] = qty.At(i, 0) for j := i + 1; j < n; j++ { regressionCoefficients[i] -= regressionCoefficients[j] * reg.At(i, j) } regressionCoefficients[i] /= reg.At(i, i) } lr.disturbance = regressionCoefficients[0] lr.regressionCoefficients = regressionCoefficients[1:] lr.fitted = true lr.attrs = attrs lr.cls = classAttrs[0] return nil }