Пример #1
0
// same as above but with StochasticGA
func TestThreeDimensionalPlaneNormalizedShouldPass2(t *testing.T) {
	var err error

	model := NewLogistic(base.StochasticGA, .0001, 0, 3000, nX, nY)
	err = model.Learn()
	assert.Nil(t, err, "Learning error should be nil")

	var guess []float64

	for i := -20; i < 20; i++ {
		for j := -20; j < 20; j++ {
			x := []float64{float64(i), float64(j)}
			base.NormalizePoint(x)

			guess, err = model.Predict(x, true)

			if x[0]+x[1] > 5 {
				assert.True(t, guess[0] > 0.5, "Guess should be more likely to be 1")
				assert.True(t, guess[0] < 1.001, "Guess should not exceed 1 ever")
			} else {
				assert.True(t, guess[0] < 0.5, "Guess should be more likely to be 0")
				assert.True(t, guess[0] > 0.0, "Guess should not be below 0 even")
			}

			assert.Len(t, guess, 1, "Length of a Logistic model output from the hypothesis should always be a 1 dimensional vector. Never multidimensional.")
			assert.Nil(t, err, "Prediction error should be nil")
		}
	}
}
Пример #2
0
// Predict takes in a variable x (an array of floats,) and
// finds the value of the hypothesis function given the
// current parameter vector θ
func (s *Softmax) Predict(x []float64, normalize ...bool) ([]float64, error) {
	if len(s.Parameters) != 0 && len(x)+1 != len(s.Parameters[0]) {
		return nil, fmt.Errorf("Error: Parameter vector should be 1 longer than input vector!\n\tLength of x given: %v\n\tLength of parameters: %v (len(theta[0]) = %v)\n", len(x), len(s.Parameters), len(s.Parameters[0]))
	}

	if len(normalize) != 0 && normalize[0] {
		base.NormalizePoint(x)
	}

	result := make([]float64, s.k)
	var denom float64

	for i := 0; i < s.k; i++ {
		// include constant term in sum
		sum := s.Parameters[i][0]

		for j := range x {
			sum += x[j] * s.Parameters[i][j+1]
		}

		result[i] = math.Exp(sum)
		denom += result[i]
	}

	for i := range result {
		result[i] /= denom
	}

	return result, nil
}
Пример #3
0
// Predict takes in a variable x (an array of floats,) and
// finds the value of the hypothesis function given the
// current parameter vector θ
//
// if normalize is given as true, then the input will
// first be normalized to unit length. Only use this if
// you trained off of normalized inputs and are feeding
// an un-normalized input
func (k *KNN) Predict(x []float64, normalize ...bool) ([]float64, error) {
	if k.K > len(k.trainingSet) {
		return nil, fmt.Errorf("Given K (%v) is greater than the length of the training set", k.K)
	}
	if len(x) != len(k.trainingSet[0]) {
		return nil, fmt.Errorf("Given x (len %v) does not match dimensions of training set", len(x))
	}

	if len(normalize) != 0 && normalize[0] {
		base.NormalizePoint(x)
	}

	// initialize neighbors with first k
	// training examples
	neighbors := make([]nn, k.K)
	length := len(k.trainingSet)
	for i := range neighbors {
		index := rand.Intn(length)
		neighbors[i] = nn{
			X: k.trainingSet[index],
			Y: k.expectedResults[index],

			Distance: k.Distance(x, k.trainingSet[index]),
		}
	}
	// calculate nearest neighbors
	var count int
	for i := range k.trainingSet {
		dist := k.Distance(x, k.trainingSet[i])
		neighbors = insertSorted(nn{
			X: k.trainingSet[i],
			Y: k.expectedResults[i],

			Distance: dist,
		}, neighbors)
		count++
	}

	// take weighted vote
	sum := 0.0
	for i := range neighbors {
		sum += neighbors[i].Y
	}

	return []float64{round(sum / float64(k.K))}, nil
}
Пример #4
0
// Predict takes in a variable x (an array of floats,) and
// finds the value of the hypothesis function given the
// current parameter vector θ
func (p *KernelPerceptron) Predict(x []float64, normalize ...bool) ([]float64, error) {
	if len(normalize) != 0 && normalize[0] {
		base.NormalizePoint(x)
	}

	var sum float64
	for i := range p.SV {
		sum += p.SV[i].Y[0] * p.Kernel(p.SV[i].X, x)
	}

	result := -1.0
	if sum > 0 {
		result = 1
	}

	return []float64{result}, nil
}
Пример #5
0
// Predict takes in a variable x (an array of floats,) and
// finds the value of the hypothesis function given the
// current parameter vector θ
//
// if normalize is given as true, then the input will
// first be normalized to unit length. Only use this if
// you trained off of normalized inputs and are feeding
// an un-normalized input
func (l *LeastSquares) Predict(x []float64, normalize ...bool) ([]float64, error) {
	if len(x)+1 != len(l.Parameters) {
		return nil, fmt.Errorf("Error: Parameter vector should be 1 longer than input vector!\n\tLength of x given: %v\n\tLength of parameters: %v\n", len(x), len(l.Parameters))
	}

	if len(normalize) != 0 && normalize[0] {
		base.NormalizePoint(x)
	}

	// include constant term in sum
	sum := l.Parameters[0]

	for i := range x {
		sum += x[i] * l.Parameters[i+1]
	}

	return []float64{sum}, nil
}
Пример #6
0
// Predict takes in a variable x (an array of floats,) and
// finds the value of the hypothesis function given the
// current parameter vector θ
//
// if normalize is given as true, then the input will
// first be normalized to unit length. Only use this if
// you trained off of normalized inputs and are feeding
// an un-normalized input
func (k *TriangleKMeans) Predict(x []float64, normalize ...bool) ([]float64, error) {
	if len(x) != len(k.Centroids[0]) {
		return nil, fmt.Errorf("Error: Centroid vector should be the same length as input vector!\n\tLength of x given: %v\n\tLength of centroid: %v\n", len(x), len(k.Centroids[0]))
	}

	if len(normalize) != 0 && normalize[0] {
		base.NormalizePoint(x)
	}

	var guess int
	minDiff := diff(x, k.Centroids[0])
	for j := 1; j < len(k.Centroids); j++ {
		difference := diff(x, k.Centroids[j])
		if difference < minDiff {
			minDiff = difference
			guess = j
		}
	}

	return []float64{float64(guess)}, nil
}
Пример #7
0
// Predict takes in a variable x (an array of floats,) and
// finds the value of the hypothesis function given the
// current parameter vector θ
func (p *Perceptron) Predict(x []float64, normalize ...bool) ([]float64, error) {
	if len(x)+1 != len(p.Parameters) {
		return nil, fmt.Errorf("Error: Parameter vector should be 1 longer than input vector!\n\tLength of x given: %v\n\tLength of parameters: %v\n", len(x), len(p.Parameters))
	}

	if len(normalize) != 0 && normalize[0] {
		base.NormalizePoint(x)
	}

	// include constant term in sum
	sum := p.Parameters[0]

	for i := range x {
		sum += x[i] * p.Parameters[i+1]
	}

	result := -1.0
	if sum > 0 {
		result = 1
	}

	return []float64{result}, nil
}
Пример #8
0
// Predict takes in a variable x (an array of floats,) and
// finds the value of the hypothesis function given the
// current parameter vector θ
//
// if normalize is given as true, then the input will
// first be normalized to unit length. Only use this if
// you trained off of normalized inputs and are feeding
// an un-normalized input
func (k *KNN) Predict(x []float64, normalize ...bool) ([]float64, error) {
	if k.K > len(k.trainingSet) {
		return nil, fmt.Errorf("Given K (%v) is greater than the length of the training set", k.K)
	}
	if len(x) != len(k.trainingSet[0]) {
		return nil, fmt.Errorf("Given x (len %v) does not match dimensions of training set", len(x))
	}

	if len(normalize) != 0 && normalize[0] {
		base.NormalizePoint(x)
	}

	// initialize the neighbors as an empty
	// slice of Neighbors. insertSorted will
	// take care of capping the neighbors at
	// K.
	neighbors := []nn{}

	// calculate nearest neighbors
	for i := range k.trainingSet {
		dist := k.Distance(x, k.trainingSet[i])
		neighbors = insertSorted(nn{
			X: k.trainingSet[i],
			Y: k.expectedResults[i],

			Distance: dist,
		}, neighbors, k.K)
	}

	// take weighted vote
	sum := 0.0
	for i := range neighbors {
		sum += neighbors[i].Y
	}

	return []float64{round(sum / float64(k.K))}, nil
}
Пример #9
0
// OnlineLearn runs off of the datastream within the Perceptron
// structure. Whenever the model makes a wrong prediction
// the parameter vector theta is updated to reflect that,
// as discussed in the documentation for the Perceptron
// struct itself, and the OnUpdate function is called with
// the newly updated parameter vector. Learning will stop
// when the data channel is closed and all remaining
// datapoints within the channel have been read.
//
// The errors channel will be closed when learning is
// completed so you know when it's done if you're relying
// on that for whatever reason
//
// onUpdate func ([]float64):
//
// onUpdate is a function that is called whenever
// the perceptron updates it's parameter vector
// theta. This acts almost like a callback and
// passes the newly updated parameter vector
// theta as a slice of floats.
//
// This might be useful is you want to maintain
// an up to date persisted model in a database of
// your choosing and you'd like to update it
// constantly.
//
// This will be spawned into a new goroutine, so
// don't worry about the function taking a long
// time, or blocking.
//
// If you want to monitor errors happening within
// this function, just have a channel of errors
// you send do within this channel, or some other
// method if it fits your scenario better.
//
// NOTE that there is an optional last parameter which,
// when true, will normalize all data given on the
// stream. This will potentially help gradient descent
// converge faster. This is given as a parameter because
// you won't have direct access to the dataset before
// hand like you would in batch/stochastic settings.
//
// Example Online, Binary Perceptron (no layers, etc.):
//
//      // create the channel of data and errors
//      stream := make(chan base.Datapoint, 100)
//      errors := make(chan error)
//
//      model := NewPerceptron(0.1, 1, stream)
//
//      go model.OnlineLearn(errors, stream, func (theta []float64) {
//          // do something with the new theta (persist
//          // to database?) in here.
//          fmt.Printf("Theta updated to %v!\n", theta)
//      })
//
//      // start passing data to our datastream
//      //
//      // we could have data already in our channel
//      // when we instantiated the Perceptron, though
//      //
//      // and note that this data could be coming from
//      // some web server, or whatever!!
// 	    go func() {
//             for i := -500.0; abs(i) > 1; i *= -0.997 {
//                 if 10 + (i-20)/2 > 0 {
//                     stream <- base.Datapoint{
//                         X: []float64{i-20},
//                         Y: []float64{1.0},
//                     }
//                 } else {
//                     stream <- base.Datapoint{
//                         X: []float64{i-20},
//                         Y: []float64{0},
//                     }
//                 }
//             }
//         }()
//
//      // close the dataset
//      close(stream)
//      for {
//          err, more := <- errors
//          if err != nil {
//              fmt.Printf("Error passed: %v", err)
//          } else {
//              // training is done!
//              break
//          }
//      }
//
//      // now you can predict!!
//      // note that guess is a []float64 of len() == 1
//      // when it isn't nil
//      guess, err := model.Predict([]float64{i})
//      if err != nil {
//           panic("EGATZ!! I FOUND AN ERROR! BETTER CHECK YOUR INPUT DIMENSIONS!")
//      }
func (p *Perceptron) OnlineLearn(errors chan error, dataset chan base.Datapoint, onUpdate func([][]float64), normalize ...bool) {
	if dataset == nil {
		errors <- fmt.Errorf("ERROR: Attempting to learn with a nil data stream!\n")
		close(errors)
		return
	}

	if errors == nil {
		errors = make(chan error)
	}

	fmt.Printf("Training:\n\tModel: Perceptron Classifier\n\tOptimization Method: Online Perceptron\n\tFeatures: %v\n\tLearning Rate α: %v\n...\n\n", len(p.Parameters), p.alpha)

	norm := len(normalize) != 0 && normalize[0]

	var point base.Datapoint
	var more bool

	for {
		point, more = <-dataset

		if more {
			// have a datapoint, predict and update!
			//
			// Predict also checks if the point is of the
			// correct dimensions
			if norm {
				base.NormalizePoint(point.X)
			}

			guess, err := p.Predict(point.X)
			if err != nil {
				// send the error channel some info and
				// skip this datapoint
				errors <- err
				continue
			}

			if len(point.Y) != 1 {
				errors <- fmt.Errorf("The binary perceptron model requires that the data results (y) have length 1 - given %v", len(point.Y))
				continue
			}

			if len(point.X) != len(p.Parameters)-1 {
				errors <- fmt.Errorf("The binary perceptron model requires that the length of input data (currently %v) be one less than the length of the parameter vector (%v)", len(point.X), len(p.Parameters))
				continue
			}

			// update the parameters if the guess
			// is wrong
			if guess[0] != point.Y[0] {
				p.Parameters[0] += p.alpha * (point.Y[0] - guess[0])

				for i := 1; i < len(p.Parameters); i++ {
					p.Parameters[i] += p.alpha * (point.Y[0] - guess[0]) * point.X[i-1]
				}

				// call the OnUpdate callback with the new theta
				// appended to a blank slice so the vector is
				// passed by value and not by reference
				go onUpdate([][]float64{p.Parameters})
			}

		} else {
			fmt.Printf("Training Completed.\n%v\n\n", p)
			close(errors)
			return
		}
	}
}
Пример #10
0
// OnlineLearn runs similar to using a fixed dataset with
// Stochastic Gradient Descent, but it handles data by
// passing it as a channal, and returns errors through
// a channel, which lets it run responsive to inputted data
// from outside the model itself (like using data from the
// stock market at timed intervals or using realtime data
// about the weather.)
//
// The onUpdate callback is called whenever the parameter
// vector theta is changed, so you are able to persist the
// model with the most up to date vector at all times (you
// could persist to a database within the callback, for
// example.) Don't worry about it taking too long and blocking,
// because the callback is spawned into another goroutine.
//
// NOTE that this function is suggested to run in it's own
// goroutine, or at least is designed as such.
//
// NOTE part 2: You can pass in an empty dataset, so long
// as it's not nil, and start pushing after.
//
// NOTE part 3: each example is only looked at as it goes
// through the channel, so if you want to have each example
// looked at more than once you must manually pass the data
// yourself.
//
// NOTE part 4: the optional parameter 'normalize' will
// , if true, normalize all data streamed through the
// channel to unit length. This will affect the outcome
// of the hypothesis, though it could be favorable if
// your data comes in drastically different scales.
//
// Example Online Logistic Regression:
//
//     // create the channel of data and errors
//     stream := make(chan base.Datapoint, 100)
//     errors := make(chan error)
//
//     // notice how we are adding another integer
//     // to the end of the NewSoftmax call. This
//     // tells the model to use that number of features
//     // (2) in leu of finding that from the dataset
//     // like you would with batch/stochastic GD
//     //
//     // Also – the 'base.StochasticGA' doesn't affect
//     // anything. You could put batch or any other model.
//     model := NewSoftmax(base.StochasticGA, 5e-5, 0, 3, 0, nil, nil, 2)
//
//     go model.OnlineLearn(errors, stream, func(theta [][]float64) {
//         // do something with the new theta (persist
//         // to database?) in here.
//     })
//
//     go model.OnlineLearn(errors, stream, func(theta [][]float64) {})
//
//     // start passing data to our datastream
//     //
//     // we could have data already in our channel
//     // when we instantiated the Perceptron, though
//	   go func() {
//         for iter := 0; iter < 3; iter++ {
//             for i := -2.0; i < 2.0; i += 0.15 {
//                 for j := -2.0; j < 2.0; j += 0.15 {
//
//                     if -2*i+j/2-0.5 > 0 && -1*i-j < 0 {
//                              stream <- base.Datapoint{
//                                 X: []float64{float64(i), float64(j)},
//                                 Y: []float64{2.0},
//                             }
//                     } else if -2*i+j/2-0.5 > 0 && -1*i-j > 0 {
//                             stream <- base.Datapoint{
//                                 X: []float64{float64(i), float64(j)},
//                                 Y: []float64{1.0},
//                             }
//                     } else {
//                         stream <- base.Datapoint{
//                                 X: []float64{float64(i), float64(j)},
//                                 Y: []float64{0.0},
//                             }
//                     }
//                 }
//             }
//         }
//
//         // close the dataset
//         close(stream)
//     }()
//
//     // this will block until the error
//     // channel is closed in the learning
//     // function (it will, don't worry!)
//     for {
//         err, more := <-errors
//         if err != nil {
//             panic("THERE WAS AN ERROR!!! RUN!!!!")
//         }
//         if !more {
//             break
//         }
//     }
//
//     // Below here all the learning is completed
//
//     // predict like usual
//     guess, err = model.Predict([]float64{42,6,10,-32})
//     if err != nil {
//         panic("AAAARGGGH! SHIVER ME TIMBERS! THESE ROTTEN SCOUNDRELS FOUND AN ERROR!!!")
//     }
func (s *Softmax) OnlineLearn(errors chan error, dataset chan base.Datapoint, onUpdate func([][]float64), normalize ...bool) {
	if errors == nil {
		errors = make(chan error)
	}
	if dataset == nil {
		errors <- fmt.Errorf("ERROR: Attempting to learn with a nil data stream!\n")
		close(errors)
		return
	}

	fmt.Fprintf(s.Output, "Training:\n\tModel: Softmax Classifier (%v classes)\n\tOptimization Method: Online Stochastic Gradient Descent\n\tFeatures: %v\n\tLearning Rate α: %v\n...\n\n", s.k, len(s.Parameters), s.alpha)

	norm := len(normalize) != 0 && normalize[0]
	var point base.Datapoint
	var more bool

	for {
		point, more = <-dataset

		if more {
			if len(point.Y) != 1 {
				errors <- fmt.Errorf("ERROR: point.Y must have a length of 1. Point: %v", point)
				continue
			}

			if norm {
				base.NormalizePoint(point.X)
			}

			// go over each parameter vector for each
			// classification value
			for k, theta := range s.Parameters {
				dj, err := func(point base.Datapoint, j int) ([]float64, error) {
					grad := make([]float64, len(s.Parameters[0]))

					// account for constant term
					x := append([]float64{1}, point.X...)

					var ident float64
					if abs(point.Y[0]-float64(k)) < 1e-3 {
						ident = 1
					}

					var numerator float64
					var denom float64
					for a := 0; a < s.k; a++ {
						var inside float64

						// calculate theta * x
						for l, val := range s.Parameters[int(k)] {
							inside += val * x[l]
						}

						if a == k {
							numerator = math.Exp(inside)
						}

						denom += math.Exp(inside)
					}

					for a := range grad {
						grad[a] += x[a] * (ident - numerator/denom)
					}

					// add in the regularization term
					// λ*θ[j]
					//
					// notice that we don't count the
					// constant term
					for j := range grad {
						grad[j] += s.regularization * s.Parameters[k][j]
					}

					return grad, nil
				}(point, k)
				if err != nil {
					errors <- err
					return
				}

				// now simultaneously update theta
				for j := range theta {
					newθ := theta[j] + s.alpha*dj[j]
					if math.IsInf(newθ, 0) || math.IsNaN(newθ) {
						errors <- fmt.Errorf("Sorry dude! Learning diverged. Some value of the parameter vector theta is ±Inf or NaN")
						close(errors)
						return
					}
					s.Parameters[k][j] = newθ
				}
			}

			go onUpdate(s.Parameters)

		} else {
			fmt.Fprintf(s.Output, "Training Completed.\n%v\n\n", s)
			close(errors)
			return
		}
	}
}
Пример #11
0
// OnlineLearn runs similar to using a fixed dataset with
// Stochastic Gradient Descent, but it handles data by
// passing it as a channal, and returns errors through
// a channel, which lets it run responsive to inputted data
// from outside the model itself (like using data from the
// stock market at timed intervals or using realtime data
// about the weather.)
//
// The onUpdate callback is called whenever the parameter
// vector theta is changed, so you are able to persist the
// model with the most up to date vector at all times (you
// could persist to a database within the callback, for
// example.) Don't worry about it taking too long and blocking,
// because the callback is spawned into another goroutine.
//
// NOTE that this function is suggested to run in it's own
// goroutine, or at least is designed as such.
//
// NOTE part 2: You can pass in an empty dataset, so long
// as it's not nil, and start pushing after.
//
// NOTE part 3: each example is only looked at as it goes
// through the channel, so if you want to have each example
// looked at more than once you must manually pass the data
// yourself.
//
// NOTE part 4: the optional parameter 'normalize' will
// , if true, normalize all data streamed through the
// channel to unit length. This will affect the outcome
// of the hypothesis, though it could be favorable if
// your data comes in drastically different scales.
//
// Example Online Logistic Regression:
//
//     // create the channel of data and errors
//     stream := make(chan base.Datapoint, 100)
//     errors := make(chan error)
//
//     // notice how we are adding another integer
//     // to the end of the NewLogistic call. This
//     // tells the model to use that number of features
//     // (4) in leu of finding that from the dataset
//     // like you would with batch/stochastic GD
//     //
//     // Also – the 'base.StochasticGA' doesn't affect
//     // anything. You could put batch.
//     model := NewLogistic(base.StochasticGA, .0001, 0, 0, nil, nil, 4)
//
//     go model.OnlineLearn(errors, stream, func(theta [][]float64) {
//         // do something with the new theta (persist
//         // to database?) in here.
//     })
//
//     go func() {
//         for iterations := 0; iterations < 20; iterations++ {
//             for i := -200.0; abs(i) > 1; i *= -0.75 {
//                 for j := -200.0; abs(j) > 1; j *= -0.75 {
//                     for k := -200.0; abs(k) > 1; k *= -0.75 {
//                         for l := -200.0; abs(l) > 1; l *= -0.75 {
//                             if i/2+2*k-4*j+2*l+3 > 0 {
//                                 stream <- base.Datapoint{
//                                     X: []float64{i, j, k, l},
//                                     Y: []float64{1.0},
//                                 }
//                             } else {
//                                 stream <- base.Datapoint{
//                                     X: []float64{i, j, k, l},
//                                     Y: []float64{0.0},
//                                 }
//                             }
//                         }
//                     }
//                 }
//             }
//         }
//
//         // close the dataset to tell the model
//         // to stop learning when it finishes reading
//         // what's left in the channel
//         close(stream)
//     }()
//
//     // this will block until the error
//     // channel is closed in the learning
//     // function (it will, don't worry!)
//     for {
//         err, more := <-errors
//         if err != nil {
//             panic("THERE WAS AN ERROR!!! RUN!!!!")
//         }
//         if !more {
//             break
//         }
//     }
//
//     // Below here all the learning is completed
//
//     // predict like usual
//     guess, err = model.Predict([]float64{42,6,10,-32})
//     if err != nil {
//         panic("AAAARGGGH! SHIVER ME TIMBERS! THESE ROTTEN SCOUNDRELS FOUND AN ERROR!!!")
//     }
func (l *Logistic) OnlineLearn(errors chan error, dataset chan base.Datapoint, onUpdate func([][]float64), normalize ...bool) {
	if errors == nil {
		errors = make(chan error)
	}
	if dataset == nil {
		errors <- fmt.Errorf("ERROR: Attempting to learn with a nil data stream!\n")
		close(errors)
		return
	}

	fmt.Fprintf(l.Output, "Training:\n\tModel: Logistic (Binary) Classifier\n\tOptimization Method: Online Stochastic Gradient Descent\n\tFeatures: %v\n\tLearning Rate α: %v\n...\n\n", len(l.Parameters), l.alpha)

	norm := len(normalize) != 0 && normalize[0]
	var point base.Datapoint
	var more bool

	for {
		point, more = <-dataset

		if more {
			if len(point.Y) != 1 {
				errors <- fmt.Errorf("ERROR: point.Y must have a length of 1. Point: %v", point)
			}

			if norm {
				base.NormalizePoint(point.X)
			}

			newTheta := make([]float64, len(l.Parameters))
			for j := range l.Parameters {

				// find the gradient using the point
				// from the channel (different than
				// calling from the dataset so we need
				// to have a new function instead of calling
				// Dij(i, j))
				dj, err := func(point base.Datapoint, j int) (float64, error) {
					prediction, err := l.Predict(point.X)
					if err != nil {
						return 0, err
					}

					// account for constant term
					// x is x[i][j] via Andrew Ng's terminology
					var x float64
					if j == 0 {
						x = 1
					} else {
						x = point.X[j-1]
					}

					var gradient float64
					gradient = (point.Y[0] - prediction[0]) * x

					// add in the regularization term
					// λ*θ[j]
					//
					// notice that we don't count the
					// constant term
					if j != 0 {
						gradient += l.regularization * l.Parameters[j]
					}

					return gradient, nil
				}(point, j)
				if err != nil {
					errors <- err
					continue
				}

				newTheta[j] = l.Parameters[j] + l.alpha*dj
			}

			// now simultaneously update Theta
			for j := range l.Parameters {
				newθ := newTheta[j]
				if math.IsInf(newθ, 0) || math.IsNaN(newθ) {
					errors <- fmt.Errorf("Sorry! Learning diverged. Some value of the parameter vector theta is ±Inf or NaN")
					continue
				}
				l.Parameters[j] = newθ
			}

			go onUpdate([][]float64{l.Parameters})

		} else {
			fmt.Fprintf(l.Output, "Training Completed.\n%v\n\n", l)
			close(errors)
			return
		}
	}
}
Пример #12
0
func TestLinearKernelTwoDXNormalizedShouldPass1(t *testing.T) {
	// create the channel of data and errors
	stream := make(chan base.Datapoint, 100)
	errors := make(chan error)

	model := NewKernelPerceptron(base.LinearKernel())

	go model.OnlineLearn(errors, stream, func(supportVector [][]float64) {}, true)

	for i := -200.0; abs(i) > 1; i *= -0.57 {
		for j := -200.0; abs(j) > 1; j *= -0.57 {
			x := []float64{i, j}
			base.NormalizePoint(x)

			if 5*x[0]+10*x[1]-4 > 0 {
				stream <- base.Datapoint{
					X: x,
					Y: []float64{1.0},
				}
			} else {
				stream <- base.Datapoint{
					X: x,
					Y: []float64{-1.0},
				}
			}
		}
	}

	// close the dataset
	close(stream)

	for {
		err, more := <-errors
		assert.False(t, more, "There should not be any errors!")

		if more {
			assert.Nil(t, err, "Learning error should be nil")
		} else {
			break
		}
	}

	var count int
	var incorrect int

	for i := -200.0; abs(i) > 1; i *= -0.53 {
		for j := -200.0; abs(j) > 1; j *= -0.53 {
			x := []float64{i, j}
			base.NormalizePoint(x)

			guess, err := model.Predict([]float64{i, j}, true)
			assert.Nil(t, err, "Prediction error should be nil")
			assert.Len(t, guess, 1, "Guess should have length 1")

			if 5*x[0]+10*x[1]-4 > 0 && guess[0] != 1.0 {
				incorrect++
			} else if 5*x[0]+10*x[1]-4 <= 0 && guess[0] != -1.0 {
				incorrect++
			}

			count++
		}
	}

	fmt.Printf("Predictions: %v\n\tIncorrect: %v\n\tAccuracy Rate: %v percent\n", count, incorrect, 100*(1.0-float64(incorrect)/float64(count)))
	assert.True(t, float64(incorrect)/float64(count) < 0.14, "Accuracy should be greater than 86%")
}
Пример #13
0
// Predict takes in a variable x (an array of floats,) and
// finds the value of the hypothesis function given the
// current parameter vector θ
//
// if normalize is given as true, then the input will
// first be normalized to unit length. Only use this if
// you trained off of normalized inputs and are feeding
// an un-normalized input
func (l *LocalLinear) Predict(x []float64, normalize ...bool) ([]float64, error) {
	if len(x)+1 != len(l.Parameters) {
		err := fmt.Errorf("ERROR: Parameter vector should be 1 longer than input vector!\n\tLength of x given: %v\n\tLength of parameters: %v\n", len(x), len(l.Parameters))
		print(err.Error())
		return nil, err
	}

	norm := len(normalize) != 0 && normalize[0]
	if norm {
		base.NormalizePoint(x)
	}

	if l.trainingSet == nil || l.expectedResults == nil {
		err := fmt.Errorf("ERROR: Attempting to learn with no training examples!\n")
		print(err.Error())
		return nil, err
	}

	examples := len(l.trainingSet)
	if examples == 0 || len(l.trainingSet[0]) == 0 {
		err := fmt.Errorf("ERROR: Attempting to learn with no training examples!\n")
		print(err.Error())
		return nil, err
	}
	if len(l.expectedResults) == 0 {
		err := fmt.Errorf("ERROR: Attempting to learn with no expected results! This isn't an unsupervised model!! You'll need to include data before you learn :)\n")
		print(err.Error())
		return nil, err
	}

	fmt.Printf("Training:\n\tModel: Locally Weighted Linear Regression\n\tOptimization Method: %v\n\tCenter Point: %v\n\tTraining Examples: %v\n\tFeatures: %v\n\tLearning Rate α: %v\n\tRegularization Parameter λ: %v\n...\n\n", l.method, x, examples, len(l.trainingSet[0]), l.alpha, l.regularization)

	var iter int
	features := len(l.Parameters)

	if l.method == base.BatchGA {
		for ; iter < l.maxIterations; iter++ {
			newTheta := make([]float64, features)
			for j := range l.Parameters {
				dj, err := l.Dj(x, j)
				if err != nil {
					return nil, err
				}

				newTheta[j] = l.Parameters[j] + l.alpha*dj
			}

			// now simultaneously update Theta
			for j := range l.Parameters {
				newθ := newTheta[j]
				if math.IsInf(newθ, 0) || math.IsNaN(newθ) {
					return nil, fmt.Errorf("Sorry! Learning diverged. Some value of the parameter vector theta is ±Inf or NaN")
				}
				l.Parameters[j] = newθ
			}
		}
	} else if l.method == base.StochasticGA {
		for ; iter < l.maxIterations; iter++ {
			newTheta := make([]float64, features)
			for i := 0; i < examples; i++ {
				for j := range l.Parameters {
					dj, err := l.Dij(x, i, j)
					if err != nil {
						return nil, err
					}

					newTheta[j] = l.Parameters[j] + l.alpha*dj
				}

				// now simultaneously update Theta
				for j := range l.Parameters {
					newθ := newTheta[j]
					if math.IsInf(newθ, 0) || math.IsNaN(newθ) {
						return nil, fmt.Errorf("Sorry! Learning diverged. Some value of the parameter vector theta is ±Inf or NaN")
					}
					l.Parameters[j] = newθ
				}
			}
		}
	} else {
		return nil, fmt.Errorf("Chose a training method not implemented for LocalLinear regression")
	}

	fmt.Printf("Training Completed. Went through %v iterations.\n%v\n\n", iter, l)

	// include constant term in sum
	sum := l.Parameters[0]

	for i := range x {
		sum += x[i] * l.Parameters[i+1]
	}

	return []float64{sum}, nil
}
Пример #14
0
func TestOnlineTwoDXNormalizedShouldPass1(t *testing.T) {
	// create the channel of data and errors
	stream := make(chan base.Datapoint, 10000)
	errors := make(chan error, 20)

	var updates int

	model := NewLogistic(base.StochasticGA, .0001, 0, 0, nil, nil, 2)

	go model.OnlineLearn(errors, stream, func(theta [][]float64) {
		updates++
	}, true)

	var iter int
	for i := -200.0; abs(i) > 1; i *= -0.82 {
		for j := -200.0; abs(j) > 1; j *= -0.82 {
			x := []float64{i, j}
			base.NormalizePoint(x)

			if x[0]/2+2*x[1]-4 > 0 {
				stream <- base.Datapoint{
					X: []float64{i, j},
					Y: []float64{1.0},
				}
			} else {
				stream <- base.Datapoint{
					X: []float64{i, j},
					Y: []float64{-1.0},
				}
			}

			iter++
		}
	}

	// close the dataset
	close(stream)

	err, more := <-errors
	assert.Nil(t, err, "Learning error should be nil")
	assert.False(t, more, "There should be no errors returned")

	assert.True(t, updates > 100, "There should be more than 100 updates of theta")

	for i := -200.0; i < 200; i += 100 {
		for j := -200.0; j < 200; j += 100 {
			x := []float64{i, j}
			base.NormalizePoint(x)

			guess, err := model.Predict([]float64{i, j}, true)
			assert.Nil(t, err, "Prediction error should be nil")
			assert.Len(t, guess, 1, "Guess should have length 1")

			if x[0]/2+2*x[1]-4 > 0 {
				assert.True(t, 1.0 > guess[0] && guess[0] > 0.5, "Guess should be 1 for %v, %v", i, j)
			} else {
				assert.True(t, 0.0 < guess[0] && guess[0] < 0.5, "Guess should be 0 for %v, %v", i, j)
			}
		}
	}
}