Esempio n. 1
0
func SplitDataset(ds dataset.Dataset, trainingRatio float64, source rand.Source) (dataset.Dataset, dataset.Dataset, error) {
	if trainingRatio < 0 || trainingRatio > 1 {
		return nil, nil, fmt.Errorf("Unable to split dataset with invalid ratio %.2f", trainingRatio)
	}

	numRows := ds.NumRows()
	if numRows == 0 {
		return nil, nil, errors.New("Cannot split empty dataset")
	}

	r := rand.New(source)
	perm := r.Perm(numRows)

	trainingRowMap := make([]int, 0, numRows)
	testRowMap := make([]int, 0, numRows)

	for _, rowIndex := range perm {
		if r.Float64() < trainingRatio {
			trainingRowMap = append(trainingRowMap, rowIndex)
		} else {
			testRowMap = append(testRowMap, rowIndex)
		}
	}

	return dataset.NewSubset(ds, trainingRowMap), dataset.NewSubset(ds, testRowMap), nil
}
Esempio n. 2
0
func (classifier *kNNClassifier) Train(trainingData dataset.Dataset) error {
	if !trainingData.AllFeaturesFloats() {
		return knnerrors.NewNonFloatFeaturesTrainingSetError()
	}

	if trainingData.NumRows() == 0 {
		return knnerrors.NewEmptyTrainingDatasetError()
	}

	classifier.trainingData = trainingData
	return nil
}
func (gdpe *gradientDescentParameterEstimator) Train(ds dataset.Dataset) error {
	if !ds.AllFeaturesFloats() {
		return gdeErrors.NewNonFloatFeaturesError()
	}

	if !ds.AllTargetsFloats() {
		return gdeErrors.NewNonFloatTargetError()
	}

	if ds.NumTargets() != 1 {
		return gdeErrors.NewInvalidNumberOfTargetsError(ds.NumTargets())
	}

	if ds.NumFeatures() == 0 {
		return gdeErrors.NewNoFeaturesError()
	}

	gdpe.trainingSet = ds
	return nil
}
Esempio n. 4
0
	"github.com/amitkgupta/goodlearn/data/columntype"
	"github.com/amitkgupta/goodlearn/data/dataset"
	"github.com/amitkgupta/goodlearn/data/slice"
	"github.com/amitkgupta/goodlearn/evaluation/crossvalidation"

	. "github.com/onsi/ginkgo"
	. "github.com/onsi/gomega"
)

var _ = Describe("CrossValidation", func() {
	Describe("SplitDataset", func() {
		var (
			originalSet   dataset.Dataset
			trainingRatio float64

			trainingSet dataset.Dataset
			testSet     dataset.Dataset
			err         error
		)

		JustBeforeEach(func() {
			trainingSet, testSet, err = crossvalidation.SplitDataset(
				originalSet,
				trainingRatio,
				rand.NewSource(5330), // SEED
			)
		})

		BeforeEach(func() {
			columnTypes, columnTypesError := columntype.StringsToColumnTypes([]string{"0"})
			Ω(columnTypesError).ShouldNot(HaveOccurred())
Esempio n. 5
0
package dataset_test

import (
	"github.com/amitkgupta/goodlearn/data/columntype"
	"github.com/amitkgupta/goodlearn/data/dataset"
	"github.com/amitkgupta/goodlearn/data/row"

	. "github.com/onsi/ginkgo"
	. "github.com/onsi/gomega"
)

var _ = Describe("Dataset", func() {
	var ds dataset.Dataset

	Describe("AllFeaturesFloats", func() {
		Context("When all features are floats", func() {
			BeforeEach(func() {
				columnTypes, err := columntype.StringsToColumnTypes([]string{"1.0", "1.0"})
				Ω(err).ShouldNot(HaveOccurred())

				ds = dataset.NewDataset([]int{0, 1}, []int{}, columnTypes)
			})

			It("Returns true", func() {
				Ω(ds.AllFeaturesFloats()).Should(BeTrue())
			})
		})

		Context("When not all features are floats", func() {
			BeforeEach(func() {
				columnTypes, err := columntype.StringsToColumnTypes([]string{"x", "1.0"})
Esempio n. 6
0
func (regressor *linearRegressor) Train(trainingData dataset.Dataset) error {
	if !trainingData.AllFeaturesFloats() {
		return linearerrors.NewNonFloatFeaturesError()
	}

	if !trainingData.AllTargetsFloats() {
		return linearerrors.NewNonFloatTargetsError()
	}

	if trainingData.NumTargets() != 1 {
		return linearerrors.NewInvalidNumberOfTargetsError(trainingData.NumTargets())
	}

	if trainingData.NumFeatures() == 0 {
		return linearerrors.NewNoFeaturesError()
	}

	estimator, err := gradientdescentestimator.NewGradientDescentParameterEstimator(
		defaultLearningRate,
		defaultPrecision,
		defaultMaxIterations,
		gradientdescentestimator.LinearModelLeastSquaresLossGradient,
	)
	if err != nil {
		return linearerrors.NewEstimatorConstructionError(err)
	}

	err = estimator.Train(trainingData)
	if err != nil {
		return linearerrors.NewEstimatorTrainingError(err)
	}

	coefficients, err := estimator.Estimate(defaultInitialCoefficientEstimate(trainingData.NumFeatures()))
	if err != nil {
		return linearerrors.NewEstimatorEstimationError(err)
	}

	regressor.coefficients = coefficients
	return nil
}
Esempio n. 7
0
			It("Returns an error", func() {
				_, err := knn.NewKNNClassifier(-3)
				Ω(err).Should(HaveOccurred())
			})
		})

		Context("When given a positive value for k", func() {
			It("Returns an error", func() {
				_, err := knn.NewKNNClassifier(5)
				Ω(err).ShouldNot(HaveOccurred())
			})
		})
	})

	Describe("Train", func() {
		var trainingData dataset.Dataset

		BeforeEach(func() {
			kNNClassifier, _ = knn.NewKNNClassifier(1)
		})

		Context("When the dataset is empty", func() {
			BeforeEach(func() {
				columnTypes, err := columntype.StringsToColumnTypes([]string{"hi", "0", "0"})
				Ω(err).ShouldNot(HaveOccurred())

				trainingData = dataset.NewDataset([]int{1, 2}, []int{0}, columnTypes)
			})

			It("Returns an error", func() {
				err := kNNClassifier.Train(trainingData)
		Context("Given positive learning rate, precisions, and max iterations", func() {
			It("Does not return an error", func() {
				_, err := gradientdescentestimator.NewGradientDescentParameterEstimator(
					0.3,
					0.3,
					100,
					lossGradient,
				)

				Ω(err).ShouldNot(HaveOccurred())
			})
		})
	})

	Describe("Train", func() {
		var trainingSet dataset.Dataset
		var estimator parameterestimator.ParameterEstimator

		BeforeEach(func() {
			var err error
			estimator, err = gradientdescentestimator.NewGradientDescentParameterEstimator(
				0.3,
				0.3,
				100,
				lossGradient,
			)
			Ω(err).ShouldNot(HaveOccurred())
		})

		Context("Given a dataset with non-float features", func() {
			BeforeEach(func() {
Esempio n. 9
0
	"github.com/amitkgupta/goodlearn/data/dataset"
	"github.com/amitkgupta/goodlearn/data/row"
	"github.com/amitkgupta/goodlearn/data/slice"
	"github.com/amitkgupta/goodlearn/errors/regressor/linearerrors"
	"github.com/amitkgupta/goodlearn/regressor"
	"github.com/amitkgupta/goodlearn/regressor/linear"

	. "github.com/onsi/ginkgo"
	. "github.com/onsi/gomega"
)

var _ = Describe("LinearRegressor", func() {
	var linearRegressor regressor.Regressor

	Describe("Train", func() {
		var trainingData dataset.Dataset

		BeforeEach(func() {
			linearRegressor = linear.NewLinearRegressor()
		})

		Context("When the dataset's features are not all floats", func() {
			BeforeEach(func() {
				columnTypes, err := columntype.StringsToColumnTypes([]string{"3.3", "bye", "0"})
				Ω(err).ShouldNot(HaveOccurred())

				trainingData = dataset.NewDataset([]int{1, 2}, []int{0}, columnTypes)
			})

			It("Returns an error", func() {
				err := linearRegressor.Train(trainingData)