func DatasetFromPath(filepath string, targetStartInclusive, targetEndExclusive int) (dataset.Dataset, error) { file, err := os.Open(filepath) if err != nil { return nil, csvparseerrors.NewUnableToOpenFileError(filepath, err) } reader := csv.NewReader(file) _, err = reader.Read() line, err := reader.Read() if err != nil { return nil, csvparseerrors.NewUnableToReadTwoLinesError(filepath, err) } columnTypes, err := columntype.StringsToColumnTypes(line) if err != nil { return nil, csvparseerrors.NewUnableToParseColumnTypesError(filepath, err) } numColumns := len(columnTypes) if targetOutOfBounds(targetStartInclusive, targetEndExclusive, numColumns) { return nil, csvparseerrors.NewTargetOutOfBoundsError(filepath, targetStartInclusive, targetEndExclusive, numColumns) } newDataset := dataset.NewDataset( featureColumnIndices(targetStartInclusive, targetEndExclusive, numColumns), targetColumnIndices(targetStartInclusive, targetEndExclusive, numColumns), columnTypes, ) for ; err == nil; line, err = reader.Read() { err = newDataset.AddRowFromStrings(line) if err != nil { return nil, csvparseerrors.NewUnableToParseRowError(filepath, err) } } if err != nil && err != io.EOF { return nil, csvparseerrors.NewGenericError(filepath, err) } return newDataset, nil }
err error ) JustBeforeEach(func() { trainingSet, testSet, err = crossvalidation.SplitDataset( originalSet, trainingRatio, rand.NewSource(5330), // SEED ) }) BeforeEach(func() { columnTypes, columnTypesError := columntype.StringsToColumnTypes([]string{"0"}) Ω(columnTypesError).ShouldNot(HaveOccurred()) originalSet = dataset.NewDataset([]int{}, []int{0}, columnTypes) }) Context("when the training ratio negative", func() { BeforeEach(func() { trainingRatio = -0.67 }) It("errors", func() { Ω(err).Should(HaveOccurred()) }) }) Context("when the training ratio is greater than 1", func() { BeforeEach(func() { trainingRatio = 1.67
"github.com/amitkgupta/goodlearn/data/row" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" ) var _ = Describe("Dataset", func() { var ds dataset.Dataset Describe("AllFeaturesFloats", func() { Context("When all features are floats", func() { BeforeEach(func() { columnTypes, err := columntype.StringsToColumnTypes([]string{"1.0", "1.0"}) Ω(err).ShouldNot(HaveOccurred()) ds = dataset.NewDataset([]int{0, 1}, []int{}, columnTypes) }) It("Returns true", func() { Ω(ds.AllFeaturesFloats()).Should(BeTrue()) }) }) Context("When not all features are floats", func() { BeforeEach(func() { columnTypes, err := columntype.StringsToColumnTypes([]string{"x", "1.0"}) Ω(err).ShouldNot(HaveOccurred()) ds = dataset.NewDataset([]int{0, 1}, []int{}, columnTypes) })
}) }) Describe("Train", func() { var trainingData dataset.Dataset BeforeEach(func() { kNNClassifier, _ = knn.NewKNNClassifier(1) }) Context("When the dataset is empty", func() { BeforeEach(func() { columnTypes, err := columntype.StringsToColumnTypes([]string{"hi", "0", "0"}) Ω(err).ShouldNot(HaveOccurred()) trainingData = dataset.NewDataset([]int{1, 2}, []int{0}, columnTypes) }) It("Returns an error", func() { err := kNNClassifier.Train(trainingData) Ω(err).Should(HaveOccurred()) Ω(err).Should(BeAssignableToTypeOf(knnerrors.EmptyTrainingDatasetError{})) }) }) Context("When the dataset's features are not all floats", func() { BeforeEach(func() { columnTypes, err := columntype.StringsToColumnTypes([]string{"hi", "bye", "0"}) Ω(err).ShouldNot(HaveOccurred()) trainingData = dataset.NewDataset([]int{1, 2}, []int{0}, columnTypes)
var err error estimator, err = gradientdescentestimator.NewGradientDescentParameterEstimator( 0.3, 0.3, 100, lossGradient, ) Ω(err).ShouldNot(HaveOccurred()) }) Context("Given a dataset with non-float features", func() { BeforeEach(func() { columnTypes, err := columntype.StringsToColumnTypes([]string{"x", "1.0"}) Ω(err).ShouldNot(HaveOccurred()) trainingSet = dataset.NewDataset([]int{0}, []int{1}, columnTypes) err = trainingSet.AddRowFromStrings([]string{"hi", "24"}) Ω(err).ShouldNot(HaveOccurred()) }) It("Returns an error", func() { err := estimator.Train(trainingSet) Ω(err).Should(BeAssignableToTypeOf(gdeErrors.NonFloatFeaturesError{})) }) }) Context("Given a dataset with a non-float target", func() { BeforeEach(func() { columnTypes, err := columntype.StringsToColumnTypes([]string{"x", "1.0"}) Ω(err).ShouldNot(HaveOccurred())
BeforeEach(func() { trueParameters = []float64{2, -3, 4} var err error estimator, err = gradientdescentestimator.NewGradientDescentParameterEstimator( 0.001, 0.000005, 1000, gradientdescentestimator.LinearModelLeastSquaresLossGradient, ) Ω(err).ShouldNot(HaveOccurred()) columnTypes, err := columntype.StringsToColumnTypes([]string{"1.0", "1.0", "1.0"}) Ω(err).ShouldNot(HaveOccurred()) trainingSet := dataset.NewDataset([]int{0, 1}, []int{2}, columnTypes) for i := 0; i < 20; i++ { for j := 0; j < 20; j++ { x0 := -1.9 + 0.2*float64(i) x1 := -1.9 + 0.2*float64(j) y := trueParameters[0]*x0 + trueParameters[1]*x1 + trueParameters[2] + 0.1*rand.NormFloat64() err = trainingSet.AddRowFromStrings([]string{ fmt.Sprintf("%.10f", x0), fmt.Sprintf("%.10f", x1), fmt.Sprintf("%.10f", y), }) Ω(err).ShouldNot(HaveOccurred()) } }