func SplitDataset(ds dataset.Dataset, trainingRatio float64, source rand.Source) (dataset.Dataset, dataset.Dataset, error) { if trainingRatio < 0 || trainingRatio > 1 { return nil, nil, fmt.Errorf("Unable to split dataset with invalid ratio %.2f", trainingRatio) } numRows := ds.NumRows() if numRows == 0 { return nil, nil, errors.New("Cannot split empty dataset") } r := rand.New(source) perm := r.Perm(numRows) trainingRowMap := make([]int, 0, numRows) testRowMap := make([]int, 0, numRows) for _, rowIndex := range perm { if r.Float64() < trainingRatio { trainingRowMap = append(trainingRowMap, rowIndex) } else { testRowMap = append(testRowMap, rowIndex) } } return dataset.NewSubset(ds, trainingRowMap), dataset.NewSubset(ds, testRowMap), nil }
func (classifier *kNNClassifier) Train(trainingData dataset.Dataset) error { if !trainingData.AllFeaturesFloats() { return knnerrors.NewNonFloatFeaturesTrainingSetError() } if trainingData.NumRows() == 0 { return knnerrors.NewEmptyTrainingDatasetError() } classifier.trainingData = trainingData return nil }
func (gdpe *gradientDescentParameterEstimator) Train(ds dataset.Dataset) error { if !ds.AllFeaturesFloats() { return gdeErrors.NewNonFloatFeaturesError() } if !ds.AllTargetsFloats() { return gdeErrors.NewNonFloatTargetError() } if ds.NumTargets() != 1 { return gdeErrors.NewInvalidNumberOfTargetsError(ds.NumTargets()) } if ds.NumFeatures() == 0 { return gdeErrors.NewNoFeaturesError() } gdpe.trainingSet = ds return nil }
"github.com/amitkgupta/goodlearn/data/columntype" "github.com/amitkgupta/goodlearn/data/dataset" "github.com/amitkgupta/goodlearn/data/slice" "github.com/amitkgupta/goodlearn/evaluation/crossvalidation" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" ) var _ = Describe("CrossValidation", func() { Describe("SplitDataset", func() { var ( originalSet dataset.Dataset trainingRatio float64 trainingSet dataset.Dataset testSet dataset.Dataset err error ) JustBeforeEach(func() { trainingSet, testSet, err = crossvalidation.SplitDataset( originalSet, trainingRatio, rand.NewSource(5330), // SEED ) }) BeforeEach(func() { columnTypes, columnTypesError := columntype.StringsToColumnTypes([]string{"0"}) Ω(columnTypesError).ShouldNot(HaveOccurred())
package dataset_test import ( "github.com/amitkgupta/goodlearn/data/columntype" "github.com/amitkgupta/goodlearn/data/dataset" "github.com/amitkgupta/goodlearn/data/row" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" ) var _ = Describe("Dataset", func() { var ds dataset.Dataset Describe("AllFeaturesFloats", func() { Context("When all features are floats", func() { BeforeEach(func() { columnTypes, err := columntype.StringsToColumnTypes([]string{"1.0", "1.0"}) Ω(err).ShouldNot(HaveOccurred()) ds = dataset.NewDataset([]int{0, 1}, []int{}, columnTypes) }) It("Returns true", func() { Ω(ds.AllFeaturesFloats()).Should(BeTrue()) }) }) Context("When not all features are floats", func() { BeforeEach(func() { columnTypes, err := columntype.StringsToColumnTypes([]string{"x", "1.0"})
func (regressor *linearRegressor) Train(trainingData dataset.Dataset) error { if !trainingData.AllFeaturesFloats() { return linearerrors.NewNonFloatFeaturesError() } if !trainingData.AllTargetsFloats() { return linearerrors.NewNonFloatTargetsError() } if trainingData.NumTargets() != 1 { return linearerrors.NewInvalidNumberOfTargetsError(trainingData.NumTargets()) } if trainingData.NumFeatures() == 0 { return linearerrors.NewNoFeaturesError() } estimator, err := gradientdescentestimator.NewGradientDescentParameterEstimator( defaultLearningRate, defaultPrecision, defaultMaxIterations, gradientdescentestimator.LinearModelLeastSquaresLossGradient, ) if err != nil { return linearerrors.NewEstimatorConstructionError(err) } err = estimator.Train(trainingData) if err != nil { return linearerrors.NewEstimatorTrainingError(err) } coefficients, err := estimator.Estimate(defaultInitialCoefficientEstimate(trainingData.NumFeatures())) if err != nil { return linearerrors.NewEstimatorEstimationError(err) } regressor.coefficients = coefficients return nil }
It("Returns an error", func() { _, err := knn.NewKNNClassifier(-3) Ω(err).Should(HaveOccurred()) }) }) Context("When given a positive value for k", func() { It("Returns an error", func() { _, err := knn.NewKNNClassifier(5) Ω(err).ShouldNot(HaveOccurred()) }) }) }) Describe("Train", func() { var trainingData dataset.Dataset BeforeEach(func() { kNNClassifier, _ = knn.NewKNNClassifier(1) }) Context("When the dataset is empty", func() { BeforeEach(func() { columnTypes, err := columntype.StringsToColumnTypes([]string{"hi", "0", "0"}) Ω(err).ShouldNot(HaveOccurred()) trainingData = dataset.NewDataset([]int{1, 2}, []int{0}, columnTypes) }) It("Returns an error", func() { err := kNNClassifier.Train(trainingData)
Context("Given positive learning rate, precisions, and max iterations", func() { It("Does not return an error", func() { _, err := gradientdescentestimator.NewGradientDescentParameterEstimator( 0.3, 0.3, 100, lossGradient, ) Ω(err).ShouldNot(HaveOccurred()) }) }) }) Describe("Train", func() { var trainingSet dataset.Dataset var estimator parameterestimator.ParameterEstimator BeforeEach(func() { var err error estimator, err = gradientdescentestimator.NewGradientDescentParameterEstimator( 0.3, 0.3, 100, lossGradient, ) Ω(err).ShouldNot(HaveOccurred()) }) Context("Given a dataset with non-float features", func() { BeforeEach(func() {
"github.com/amitkgupta/goodlearn/data/dataset" "github.com/amitkgupta/goodlearn/data/row" "github.com/amitkgupta/goodlearn/data/slice" "github.com/amitkgupta/goodlearn/errors/regressor/linearerrors" "github.com/amitkgupta/goodlearn/regressor" "github.com/amitkgupta/goodlearn/regressor/linear" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" ) var _ = Describe("LinearRegressor", func() { var linearRegressor regressor.Regressor Describe("Train", func() { var trainingData dataset.Dataset BeforeEach(func() { linearRegressor = linear.NewLinearRegressor() }) Context("When the dataset's features are not all floats", func() { BeforeEach(func() { columnTypes, err := columntype.StringsToColumnTypes([]string{"3.3", "bye", "0"}) Ω(err).ShouldNot(HaveOccurred()) trainingData = dataset.NewDataset([]int{1, 2}, []int{0}, columnTypes) }) It("Returns an error", func() { err := linearRegressor.Train(trainingData)