// same as above but with StochasticGA func TestThreeDimensionalPlaneNormalizedShouldPass2(t *testing.T) { var err error model := NewLogistic(base.StochasticGA, .0001, 0, 3000, nX, nY) err = model.Learn() assert.Nil(t, err, "Learning error should be nil") var guess []float64 for i := -20; i < 20; i++ { for j := -20; j < 20; j++ { x := []float64{float64(i), float64(j)} base.NormalizePoint(x) guess, err = model.Predict(x, true) if x[0]+x[1] > 5 { assert.True(t, guess[0] > 0.5, "Guess should be more likely to be 1") assert.True(t, guess[0] < 1.001, "Guess should not exceed 1 ever") } else { assert.True(t, guess[0] < 0.5, "Guess should be more likely to be 0") assert.True(t, guess[0] > 0.0, "Guess should not be below 0 even") } assert.Len(t, guess, 1, "Length of a Logistic model output from the hypothesis should always be a 1 dimensional vector. Never multidimensional.") assert.Nil(t, err, "Prediction error should be nil") } } }
// Predict takes in a variable x (an array of floats,) and // finds the value of the hypothesis function given the // current parameter vector θ func (s *Softmax) Predict(x []float64, normalize ...bool) ([]float64, error) { if len(s.Parameters) != 0 && len(x)+1 != len(s.Parameters[0]) { return nil, fmt.Errorf("Error: Parameter vector should be 1 longer than input vector!\n\tLength of x given: %v\n\tLength of parameters: %v (len(theta[0]) = %v)\n", len(x), len(s.Parameters), len(s.Parameters[0])) } if len(normalize) != 0 && normalize[0] { base.NormalizePoint(x) } result := make([]float64, s.k) var denom float64 for i := 0; i < s.k; i++ { // include constant term in sum sum := s.Parameters[i][0] for j := range x { sum += x[j] * s.Parameters[i][j+1] } result[i] = math.Exp(sum) denom += result[i] } for i := range result { result[i] /= denom } return result, nil }
// Predict takes in a variable x (an array of floats,) and // finds the value of the hypothesis function given the // current parameter vector θ // // if normalize is given as true, then the input will // first be normalized to unit length. Only use this if // you trained off of normalized inputs and are feeding // an un-normalized input func (k *KNN) Predict(x []float64, normalize ...bool) ([]float64, error) { if k.K > len(k.trainingSet) { return nil, fmt.Errorf("Given K (%v) is greater than the length of the training set", k.K) } if len(x) != len(k.trainingSet[0]) { return nil, fmt.Errorf("Given x (len %v) does not match dimensions of training set", len(x)) } if len(normalize) != 0 && normalize[0] { base.NormalizePoint(x) } // initialize neighbors with first k // training examples neighbors := make([]nn, k.K) length := len(k.trainingSet) for i := range neighbors { index := rand.Intn(length) neighbors[i] = nn{ X: k.trainingSet[index], Y: k.expectedResults[index], Distance: k.Distance(x, k.trainingSet[index]), } } // calculate nearest neighbors var count int for i := range k.trainingSet { dist := k.Distance(x, k.trainingSet[i]) neighbors = insertSorted(nn{ X: k.trainingSet[i], Y: k.expectedResults[i], Distance: dist, }, neighbors) count++ } // take weighted vote sum := 0.0 for i := range neighbors { sum += neighbors[i].Y } return []float64{round(sum / float64(k.K))}, nil }
// Predict takes in a variable x (an array of floats,) and // finds the value of the hypothesis function given the // current parameter vector θ func (p *KernelPerceptron) Predict(x []float64, normalize ...bool) ([]float64, error) { if len(normalize) != 0 && normalize[0] { base.NormalizePoint(x) } var sum float64 for i := range p.SV { sum += p.SV[i].Y[0] * p.Kernel(p.SV[i].X, x) } result := -1.0 if sum > 0 { result = 1 } return []float64{result}, nil }
// Predict takes in a variable x (an array of floats,) and // finds the value of the hypothesis function given the // current parameter vector θ // // if normalize is given as true, then the input will // first be normalized to unit length. Only use this if // you trained off of normalized inputs and are feeding // an un-normalized input func (l *LeastSquares) Predict(x []float64, normalize ...bool) ([]float64, error) { if len(x)+1 != len(l.Parameters) { return nil, fmt.Errorf("Error: Parameter vector should be 1 longer than input vector!\n\tLength of x given: %v\n\tLength of parameters: %v\n", len(x), len(l.Parameters)) } if len(normalize) != 0 && normalize[0] { base.NormalizePoint(x) } // include constant term in sum sum := l.Parameters[0] for i := range x { sum += x[i] * l.Parameters[i+1] } return []float64{sum}, nil }
// Predict takes in a variable x (an array of floats,) and // finds the value of the hypothesis function given the // current parameter vector θ // // if normalize is given as true, then the input will // first be normalized to unit length. Only use this if // you trained off of normalized inputs and are feeding // an un-normalized input func (k *TriangleKMeans) Predict(x []float64, normalize ...bool) ([]float64, error) { if len(x) != len(k.Centroids[0]) { return nil, fmt.Errorf("Error: Centroid vector should be the same length as input vector!\n\tLength of x given: %v\n\tLength of centroid: %v\n", len(x), len(k.Centroids[0])) } if len(normalize) != 0 && normalize[0] { base.NormalizePoint(x) } var guess int minDiff := diff(x, k.Centroids[0]) for j := 1; j < len(k.Centroids); j++ { difference := diff(x, k.Centroids[j]) if difference < minDiff { minDiff = difference guess = j } } return []float64{float64(guess)}, nil }
// Predict takes in a variable x (an array of floats,) and // finds the value of the hypothesis function given the // current parameter vector θ func (p *Perceptron) Predict(x []float64, normalize ...bool) ([]float64, error) { if len(x)+1 != len(p.Parameters) { return nil, fmt.Errorf("Error: Parameter vector should be 1 longer than input vector!\n\tLength of x given: %v\n\tLength of parameters: %v\n", len(x), len(p.Parameters)) } if len(normalize) != 0 && normalize[0] { base.NormalizePoint(x) } // include constant term in sum sum := p.Parameters[0] for i := range x { sum += x[i] * p.Parameters[i+1] } result := -1.0 if sum > 0 { result = 1 } return []float64{result}, nil }
// Predict takes in a variable x (an array of floats,) and // finds the value of the hypothesis function given the // current parameter vector θ // // if normalize is given as true, then the input will // first be normalized to unit length. Only use this if // you trained off of normalized inputs and are feeding // an un-normalized input func (k *KNN) Predict(x []float64, normalize ...bool) ([]float64, error) { if k.K > len(k.trainingSet) { return nil, fmt.Errorf("Given K (%v) is greater than the length of the training set", k.K) } if len(x) != len(k.trainingSet[0]) { return nil, fmt.Errorf("Given x (len %v) does not match dimensions of training set", len(x)) } if len(normalize) != 0 && normalize[0] { base.NormalizePoint(x) } // initialize the neighbors as an empty // slice of Neighbors. insertSorted will // take care of capping the neighbors at // K. neighbors := []nn{} // calculate nearest neighbors for i := range k.trainingSet { dist := k.Distance(x, k.trainingSet[i]) neighbors = insertSorted(nn{ X: k.trainingSet[i], Y: k.expectedResults[i], Distance: dist, }, neighbors, k.K) } // take weighted vote sum := 0.0 for i := range neighbors { sum += neighbors[i].Y } return []float64{round(sum / float64(k.K))}, nil }
// OnlineLearn runs off of the datastream within the Perceptron // structure. Whenever the model makes a wrong prediction // the parameter vector theta is updated to reflect that, // as discussed in the documentation for the Perceptron // struct itself, and the OnUpdate function is called with // the newly updated parameter vector. Learning will stop // when the data channel is closed and all remaining // datapoints within the channel have been read. // // The errors channel will be closed when learning is // completed so you know when it's done if you're relying // on that for whatever reason // // onUpdate func ([]float64): // // onUpdate is a function that is called whenever // the perceptron updates it's parameter vector // theta. This acts almost like a callback and // passes the newly updated parameter vector // theta as a slice of floats. // // This might be useful is you want to maintain // an up to date persisted model in a database of // your choosing and you'd like to update it // constantly. // // This will be spawned into a new goroutine, so // don't worry about the function taking a long // time, or blocking. // // If you want to monitor errors happening within // this function, just have a channel of errors // you send do within this channel, or some other // method if it fits your scenario better. // // NOTE that there is an optional last parameter which, // when true, will normalize all data given on the // stream. This will potentially help gradient descent // converge faster. This is given as a parameter because // you won't have direct access to the dataset before // hand like you would in batch/stochastic settings. // // Example Online, Binary Perceptron (no layers, etc.): // // // create the channel of data and errors // stream := make(chan base.Datapoint, 100) // errors := make(chan error) // // model := NewPerceptron(0.1, 1, stream) // // go model.OnlineLearn(errors, stream, func (theta []float64) { // // do something with the new theta (persist // // to database?) in here. // fmt.Printf("Theta updated to %v!\n", theta) // }) // // // start passing data to our datastream // // // // we could have data already in our channel // // when we instantiated the Perceptron, though // // // // and note that this data could be coming from // // some web server, or whatever!! // go func() { // for i := -500.0; abs(i) > 1; i *= -0.997 { // if 10 + (i-20)/2 > 0 { // stream <- base.Datapoint{ // X: []float64{i-20}, // Y: []float64{1.0}, // } // } else { // stream <- base.Datapoint{ // X: []float64{i-20}, // Y: []float64{0}, // } // } // } // }() // // // close the dataset // close(stream) // for { // err, more := <- errors // if err != nil { // fmt.Printf("Error passed: %v", err) // } else { // // training is done! // break // } // } // // // now you can predict!! // // note that guess is a []float64 of len() == 1 // // when it isn't nil // guess, err := model.Predict([]float64{i}) // if err != nil { // panic("EGATZ!! I FOUND AN ERROR! BETTER CHECK YOUR INPUT DIMENSIONS!") // } func (p *Perceptron) OnlineLearn(errors chan error, dataset chan base.Datapoint, onUpdate func([][]float64), normalize ...bool) { if dataset == nil { errors <- fmt.Errorf("ERROR: Attempting to learn with a nil data stream!\n") close(errors) return } if errors == nil { errors = make(chan error) } fmt.Printf("Training:\n\tModel: Perceptron Classifier\n\tOptimization Method: Online Perceptron\n\tFeatures: %v\n\tLearning Rate α: %v\n...\n\n", len(p.Parameters), p.alpha) norm := len(normalize) != 0 && normalize[0] var point base.Datapoint var more bool for { point, more = <-dataset if more { // have a datapoint, predict and update! // // Predict also checks if the point is of the // correct dimensions if norm { base.NormalizePoint(point.X) } guess, err := p.Predict(point.X) if err != nil { // send the error channel some info and // skip this datapoint errors <- err continue } if len(point.Y) != 1 { errors <- fmt.Errorf("The binary perceptron model requires that the data results (y) have length 1 - given %v", len(point.Y)) continue } if len(point.X) != len(p.Parameters)-1 { errors <- fmt.Errorf("The binary perceptron model requires that the length of input data (currently %v) be one less than the length of the parameter vector (%v)", len(point.X), len(p.Parameters)) continue } // update the parameters if the guess // is wrong if guess[0] != point.Y[0] { p.Parameters[0] += p.alpha * (point.Y[0] - guess[0]) for i := 1; i < len(p.Parameters); i++ { p.Parameters[i] += p.alpha * (point.Y[0] - guess[0]) * point.X[i-1] } // call the OnUpdate callback with the new theta // appended to a blank slice so the vector is // passed by value and not by reference go onUpdate([][]float64{p.Parameters}) } } else { fmt.Printf("Training Completed.\n%v\n\n", p) close(errors) return } } }
// OnlineLearn runs similar to using a fixed dataset with // Stochastic Gradient Descent, but it handles data by // passing it as a channal, and returns errors through // a channel, which lets it run responsive to inputted data // from outside the model itself (like using data from the // stock market at timed intervals or using realtime data // about the weather.) // // The onUpdate callback is called whenever the parameter // vector theta is changed, so you are able to persist the // model with the most up to date vector at all times (you // could persist to a database within the callback, for // example.) Don't worry about it taking too long and blocking, // because the callback is spawned into another goroutine. // // NOTE that this function is suggested to run in it's own // goroutine, or at least is designed as such. // // NOTE part 2: You can pass in an empty dataset, so long // as it's not nil, and start pushing after. // // NOTE part 3: each example is only looked at as it goes // through the channel, so if you want to have each example // looked at more than once you must manually pass the data // yourself. // // NOTE part 4: the optional parameter 'normalize' will // , if true, normalize all data streamed through the // channel to unit length. This will affect the outcome // of the hypothesis, though it could be favorable if // your data comes in drastically different scales. // // Example Online Logistic Regression: // // // create the channel of data and errors // stream := make(chan base.Datapoint, 100) // errors := make(chan error) // // // notice how we are adding another integer // // to the end of the NewSoftmax call. This // // tells the model to use that number of features // // (2) in leu of finding that from the dataset // // like you would with batch/stochastic GD // // // // Also – the 'base.StochasticGA' doesn't affect // // anything. You could put batch or any other model. // model := NewSoftmax(base.StochasticGA, 5e-5, 0, 3, 0, nil, nil, 2) // // go model.OnlineLearn(errors, stream, func(theta [][]float64) { // // do something with the new theta (persist // // to database?) in here. // }) // // go model.OnlineLearn(errors, stream, func(theta [][]float64) {}) // // // start passing data to our datastream // // // // we could have data already in our channel // // when we instantiated the Perceptron, though // go func() { // for iter := 0; iter < 3; iter++ { // for i := -2.0; i < 2.0; i += 0.15 { // for j := -2.0; j < 2.0; j += 0.15 { // // if -2*i+j/2-0.5 > 0 && -1*i-j < 0 { // stream <- base.Datapoint{ // X: []float64{float64(i), float64(j)}, // Y: []float64{2.0}, // } // } else if -2*i+j/2-0.5 > 0 && -1*i-j > 0 { // stream <- base.Datapoint{ // X: []float64{float64(i), float64(j)}, // Y: []float64{1.0}, // } // } else { // stream <- base.Datapoint{ // X: []float64{float64(i), float64(j)}, // Y: []float64{0.0}, // } // } // } // } // } // // // close the dataset // close(stream) // }() // // // this will block until the error // // channel is closed in the learning // // function (it will, don't worry!) // for { // err, more := <-errors // if err != nil { // panic("THERE WAS AN ERROR!!! RUN!!!!") // } // if !more { // break // } // } // // // Below here all the learning is completed // // // predict like usual // guess, err = model.Predict([]float64{42,6,10,-32}) // if err != nil { // panic("AAAARGGGH! SHIVER ME TIMBERS! THESE ROTTEN SCOUNDRELS FOUND AN ERROR!!!") // } func (s *Softmax) OnlineLearn(errors chan error, dataset chan base.Datapoint, onUpdate func([][]float64), normalize ...bool) { if errors == nil { errors = make(chan error) } if dataset == nil { errors <- fmt.Errorf("ERROR: Attempting to learn with a nil data stream!\n") close(errors) return } fmt.Fprintf(s.Output, "Training:\n\tModel: Softmax Classifier (%v classes)\n\tOptimization Method: Online Stochastic Gradient Descent\n\tFeatures: %v\n\tLearning Rate α: %v\n...\n\n", s.k, len(s.Parameters), s.alpha) norm := len(normalize) != 0 && normalize[0] var point base.Datapoint var more bool for { point, more = <-dataset if more { if len(point.Y) != 1 { errors <- fmt.Errorf("ERROR: point.Y must have a length of 1. Point: %v", point) continue } if norm { base.NormalizePoint(point.X) } // go over each parameter vector for each // classification value for k, theta := range s.Parameters { dj, err := func(point base.Datapoint, j int) ([]float64, error) { grad := make([]float64, len(s.Parameters[0])) // account for constant term x := append([]float64{1}, point.X...) var ident float64 if abs(point.Y[0]-float64(k)) < 1e-3 { ident = 1 } var numerator float64 var denom float64 for a := 0; a < s.k; a++ { var inside float64 // calculate theta * x for l, val := range s.Parameters[int(k)] { inside += val * x[l] } if a == k { numerator = math.Exp(inside) } denom += math.Exp(inside) } for a := range grad { grad[a] += x[a] * (ident - numerator/denom) } // add in the regularization term // λ*θ[j] // // notice that we don't count the // constant term for j := range grad { grad[j] += s.regularization * s.Parameters[k][j] } return grad, nil }(point, k) if err != nil { errors <- err return } // now simultaneously update theta for j := range theta { newθ := theta[j] + s.alpha*dj[j] if math.IsInf(newθ, 0) || math.IsNaN(newθ) { errors <- fmt.Errorf("Sorry dude! Learning diverged. Some value of the parameter vector theta is ±Inf or NaN") close(errors) return } s.Parameters[k][j] = newθ } } go onUpdate(s.Parameters) } else { fmt.Fprintf(s.Output, "Training Completed.\n%v\n\n", s) close(errors) return } } }
// OnlineLearn runs similar to using a fixed dataset with // Stochastic Gradient Descent, but it handles data by // passing it as a channal, and returns errors through // a channel, which lets it run responsive to inputted data // from outside the model itself (like using data from the // stock market at timed intervals or using realtime data // about the weather.) // // The onUpdate callback is called whenever the parameter // vector theta is changed, so you are able to persist the // model with the most up to date vector at all times (you // could persist to a database within the callback, for // example.) Don't worry about it taking too long and blocking, // because the callback is spawned into another goroutine. // // NOTE that this function is suggested to run in it's own // goroutine, or at least is designed as such. // // NOTE part 2: You can pass in an empty dataset, so long // as it's not nil, and start pushing after. // // NOTE part 3: each example is only looked at as it goes // through the channel, so if you want to have each example // looked at more than once you must manually pass the data // yourself. // // NOTE part 4: the optional parameter 'normalize' will // , if true, normalize all data streamed through the // channel to unit length. This will affect the outcome // of the hypothesis, though it could be favorable if // your data comes in drastically different scales. // // Example Online Logistic Regression: // // // create the channel of data and errors // stream := make(chan base.Datapoint, 100) // errors := make(chan error) // // // notice how we are adding another integer // // to the end of the NewLogistic call. This // // tells the model to use that number of features // // (4) in leu of finding that from the dataset // // like you would with batch/stochastic GD // // // // Also – the 'base.StochasticGA' doesn't affect // // anything. You could put batch. // model := NewLogistic(base.StochasticGA, .0001, 0, 0, nil, nil, 4) // // go model.OnlineLearn(errors, stream, func(theta [][]float64) { // // do something with the new theta (persist // // to database?) in here. // }) // // go func() { // for iterations := 0; iterations < 20; iterations++ { // for i := -200.0; abs(i) > 1; i *= -0.75 { // for j := -200.0; abs(j) > 1; j *= -0.75 { // for k := -200.0; abs(k) > 1; k *= -0.75 { // for l := -200.0; abs(l) > 1; l *= -0.75 { // if i/2+2*k-4*j+2*l+3 > 0 { // stream <- base.Datapoint{ // X: []float64{i, j, k, l}, // Y: []float64{1.0}, // } // } else { // stream <- base.Datapoint{ // X: []float64{i, j, k, l}, // Y: []float64{0.0}, // } // } // } // } // } // } // } // // // close the dataset to tell the model // // to stop learning when it finishes reading // // what's left in the channel // close(stream) // }() // // // this will block until the error // // channel is closed in the learning // // function (it will, don't worry!) // for { // err, more := <-errors // if err != nil { // panic("THERE WAS AN ERROR!!! RUN!!!!") // } // if !more { // break // } // } // // // Below here all the learning is completed // // // predict like usual // guess, err = model.Predict([]float64{42,6,10,-32}) // if err != nil { // panic("AAAARGGGH! SHIVER ME TIMBERS! THESE ROTTEN SCOUNDRELS FOUND AN ERROR!!!") // } func (l *Logistic) OnlineLearn(errors chan error, dataset chan base.Datapoint, onUpdate func([][]float64), normalize ...bool) { if errors == nil { errors = make(chan error) } if dataset == nil { errors <- fmt.Errorf("ERROR: Attempting to learn with a nil data stream!\n") close(errors) return } fmt.Fprintf(l.Output, "Training:\n\tModel: Logistic (Binary) Classifier\n\tOptimization Method: Online Stochastic Gradient Descent\n\tFeatures: %v\n\tLearning Rate α: %v\n...\n\n", len(l.Parameters), l.alpha) norm := len(normalize) != 0 && normalize[0] var point base.Datapoint var more bool for { point, more = <-dataset if more { if len(point.Y) != 1 { errors <- fmt.Errorf("ERROR: point.Y must have a length of 1. Point: %v", point) } if norm { base.NormalizePoint(point.X) } newTheta := make([]float64, len(l.Parameters)) for j := range l.Parameters { // find the gradient using the point // from the channel (different than // calling from the dataset so we need // to have a new function instead of calling // Dij(i, j)) dj, err := func(point base.Datapoint, j int) (float64, error) { prediction, err := l.Predict(point.X) if err != nil { return 0, err } // account for constant term // x is x[i][j] via Andrew Ng's terminology var x float64 if j == 0 { x = 1 } else { x = point.X[j-1] } var gradient float64 gradient = (point.Y[0] - prediction[0]) * x // add in the regularization term // λ*θ[j] // // notice that we don't count the // constant term if j != 0 { gradient += l.regularization * l.Parameters[j] } return gradient, nil }(point, j) if err != nil { errors <- err continue } newTheta[j] = l.Parameters[j] + l.alpha*dj } // now simultaneously update Theta for j := range l.Parameters { newθ := newTheta[j] if math.IsInf(newθ, 0) || math.IsNaN(newθ) { errors <- fmt.Errorf("Sorry! Learning diverged. Some value of the parameter vector theta is ±Inf or NaN") continue } l.Parameters[j] = newθ } go onUpdate([][]float64{l.Parameters}) } else { fmt.Fprintf(l.Output, "Training Completed.\n%v\n\n", l) close(errors) return } } }
func TestLinearKernelTwoDXNormalizedShouldPass1(t *testing.T) { // create the channel of data and errors stream := make(chan base.Datapoint, 100) errors := make(chan error) model := NewKernelPerceptron(base.LinearKernel()) go model.OnlineLearn(errors, stream, func(supportVector [][]float64) {}, true) for i := -200.0; abs(i) > 1; i *= -0.57 { for j := -200.0; abs(j) > 1; j *= -0.57 { x := []float64{i, j} base.NormalizePoint(x) if 5*x[0]+10*x[1]-4 > 0 { stream <- base.Datapoint{ X: x, Y: []float64{1.0}, } } else { stream <- base.Datapoint{ X: x, Y: []float64{-1.0}, } } } } // close the dataset close(stream) for { err, more := <-errors assert.False(t, more, "There should not be any errors!") if more { assert.Nil(t, err, "Learning error should be nil") } else { break } } var count int var incorrect int for i := -200.0; abs(i) > 1; i *= -0.53 { for j := -200.0; abs(j) > 1; j *= -0.53 { x := []float64{i, j} base.NormalizePoint(x) guess, err := model.Predict([]float64{i, j}, true) assert.Nil(t, err, "Prediction error should be nil") assert.Len(t, guess, 1, "Guess should have length 1") if 5*x[0]+10*x[1]-4 > 0 && guess[0] != 1.0 { incorrect++ } else if 5*x[0]+10*x[1]-4 <= 0 && guess[0] != -1.0 { incorrect++ } count++ } } fmt.Printf("Predictions: %v\n\tIncorrect: %v\n\tAccuracy Rate: %v percent\n", count, incorrect, 100*(1.0-float64(incorrect)/float64(count))) assert.True(t, float64(incorrect)/float64(count) < 0.14, "Accuracy should be greater than 86%") }
// Predict takes in a variable x (an array of floats,) and // finds the value of the hypothesis function given the // current parameter vector θ // // if normalize is given as true, then the input will // first be normalized to unit length. Only use this if // you trained off of normalized inputs and are feeding // an un-normalized input func (l *LocalLinear) Predict(x []float64, normalize ...bool) ([]float64, error) { if len(x)+1 != len(l.Parameters) { err := fmt.Errorf("ERROR: Parameter vector should be 1 longer than input vector!\n\tLength of x given: %v\n\tLength of parameters: %v\n", len(x), len(l.Parameters)) print(err.Error()) return nil, err } norm := len(normalize) != 0 && normalize[0] if norm { base.NormalizePoint(x) } if l.trainingSet == nil || l.expectedResults == nil { err := fmt.Errorf("ERROR: Attempting to learn with no training examples!\n") print(err.Error()) return nil, err } examples := len(l.trainingSet) if examples == 0 || len(l.trainingSet[0]) == 0 { err := fmt.Errorf("ERROR: Attempting to learn with no training examples!\n") print(err.Error()) return nil, err } if len(l.expectedResults) == 0 { err := fmt.Errorf("ERROR: Attempting to learn with no expected results! This isn't an unsupervised model!! You'll need to include data before you learn :)\n") print(err.Error()) return nil, err } fmt.Printf("Training:\n\tModel: Locally Weighted Linear Regression\n\tOptimization Method: %v\n\tCenter Point: %v\n\tTraining Examples: %v\n\tFeatures: %v\n\tLearning Rate α: %v\n\tRegularization Parameter λ: %v\n...\n\n", l.method, x, examples, len(l.trainingSet[0]), l.alpha, l.regularization) var iter int features := len(l.Parameters) if l.method == base.BatchGA { for ; iter < l.maxIterations; iter++ { newTheta := make([]float64, features) for j := range l.Parameters { dj, err := l.Dj(x, j) if err != nil { return nil, err } newTheta[j] = l.Parameters[j] + l.alpha*dj } // now simultaneously update Theta for j := range l.Parameters { newθ := newTheta[j] if math.IsInf(newθ, 0) || math.IsNaN(newθ) { return nil, fmt.Errorf("Sorry! Learning diverged. Some value of the parameter vector theta is ±Inf or NaN") } l.Parameters[j] = newθ } } } else if l.method == base.StochasticGA { for ; iter < l.maxIterations; iter++ { newTheta := make([]float64, features) for i := 0; i < examples; i++ { for j := range l.Parameters { dj, err := l.Dij(x, i, j) if err != nil { return nil, err } newTheta[j] = l.Parameters[j] + l.alpha*dj } // now simultaneously update Theta for j := range l.Parameters { newθ := newTheta[j] if math.IsInf(newθ, 0) || math.IsNaN(newθ) { return nil, fmt.Errorf("Sorry! Learning diverged. Some value of the parameter vector theta is ±Inf or NaN") } l.Parameters[j] = newθ } } } } else { return nil, fmt.Errorf("Chose a training method not implemented for LocalLinear regression") } fmt.Printf("Training Completed. Went through %v iterations.\n%v\n\n", iter, l) // include constant term in sum sum := l.Parameters[0] for i := range x { sum += x[i] * l.Parameters[i+1] } return []float64{sum}, nil }
func TestOnlineTwoDXNormalizedShouldPass1(t *testing.T) { // create the channel of data and errors stream := make(chan base.Datapoint, 10000) errors := make(chan error, 20) var updates int model := NewLogistic(base.StochasticGA, .0001, 0, 0, nil, nil, 2) go model.OnlineLearn(errors, stream, func(theta [][]float64) { updates++ }, true) var iter int for i := -200.0; abs(i) > 1; i *= -0.82 { for j := -200.0; abs(j) > 1; j *= -0.82 { x := []float64{i, j} base.NormalizePoint(x) if x[0]/2+2*x[1]-4 > 0 { stream <- base.Datapoint{ X: []float64{i, j}, Y: []float64{1.0}, } } else { stream <- base.Datapoint{ X: []float64{i, j}, Y: []float64{-1.0}, } } iter++ } } // close the dataset close(stream) err, more := <-errors assert.Nil(t, err, "Learning error should be nil") assert.False(t, more, "There should be no errors returned") assert.True(t, updates > 100, "There should be more than 100 updates of theta") for i := -200.0; i < 200; i += 100 { for j := -200.0; j < 200; j += 100 { x := []float64{i, j} base.NormalizePoint(x) guess, err := model.Predict([]float64{i, j}, true) assert.Nil(t, err, "Prediction error should be nil") assert.Len(t, guess, 1, "Guess should have length 1") if x[0]/2+2*x[1]-4 > 0 { assert.True(t, 1.0 > guess[0] && guess[0] > 0.5, "Guess should be 1 for %v, %v", i, j) } else { assert.True(t, 0.0 < guess[0] && guess[0] < 0.5, "Guess should be 0 for %v, %v", i, j) } } } }