func makeSingleFloatTargets(floats ...float64) []slice.Slice { columnTypes, columnTypesError := columntype.StringsToColumnTypes([]string{"0"}) Ω(columnTypesError).ShouldNot(HaveOccurred()) targets := make([]slice.Slice, len(floats)) for i, f := range floats { target, err := slice.SliceFromRawValues(true, []int{0}, columnTypes, []float64{f}) Ω(err).ShouldNot(HaveOccurred()) targets[i] = target } return targets }
func DatasetFromPath(filepath string, targetStartInclusive, targetEndExclusive int) (dataset.Dataset, error) { file, err := os.Open(filepath) if err != nil { return nil, csvparseerrors.NewUnableToOpenFileError(filepath, err) } reader := csv.NewReader(file) _, err = reader.Read() line, err := reader.Read() if err != nil { return nil, csvparseerrors.NewUnableToReadTwoLinesError(filepath, err) } columnTypes, err := columntype.StringsToColumnTypes(line) if err != nil { return nil, csvparseerrors.NewUnableToParseColumnTypesError(filepath, err) } numColumns := len(columnTypes) if targetOutOfBounds(targetStartInclusive, targetEndExclusive, numColumns) { return nil, csvparseerrors.NewTargetOutOfBoundsError(filepath, targetStartInclusive, targetEndExclusive, numColumns) } newDataset := dataset.NewDataset( featureColumnIndices(targetStartInclusive, targetEndExclusive, numColumns), targetColumnIndices(targetStartInclusive, targetEndExclusive, numColumns), columnTypes, ) for ; err == nil; line, err = reader.Read() { err = newDataset.AddRowFromStrings(line) if err != nil { return nil, csvparseerrors.NewUnableToParseRowError(filepath, err) } } if err != nil && err != io.EOF { return nil, csvparseerrors.NewGenericError(filepath, err) } return newDataset, nil }
package columntype_test import ( "github.com/amitkgupta/goodlearn/data/columntype" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" ) var _ = Describe("Column Type", func() { Describe("StringsToColumnTypes", func() { It("Determines whether entires are floats or strings", func() { columnTypes, err := columntype.StringsToColumnTypes([]string{"hi", "9.0"}) Ω(err).ShouldNot(HaveOccurred()) Ω(isStringColumnType(columnTypes[0])).Should(BeTrue()) Ω(isFloatColumnType(columnTypes[1])).Should(BeTrue()) }) It("Handles scientific notation correctly as floats", func() { columnTypes, err := columntype.StringsToColumnTypes([]string{"1.0e308"}) Ω(err).ShouldNot(HaveOccurred()) Ω(isFloatColumnType(columnTypes[0])).Should(BeTrue()) }) It("Handles quoted numerals correctly as strings", func() { columnTypes, err := columntype.StringsToColumnTypes([]string{`"9.0"`}) Ω(err).ShouldNot(HaveOccurred()) Ω(isStringColumnType(columnTypes[0])).Should(BeTrue())
trainingSet dataset.Dataset testSet dataset.Dataset err error ) JustBeforeEach(func() { trainingSet, testSet, err = crossvalidation.SplitDataset( originalSet, trainingRatio, rand.NewSource(5330), // SEED ) }) BeforeEach(func() { columnTypes, columnTypesError := columntype.StringsToColumnTypes([]string{"0"}) Ω(columnTypesError).ShouldNot(HaveOccurred()) originalSet = dataset.NewDataset([]int{}, []int{0}, columnTypes) }) Context("when the training ratio negative", func() { BeforeEach(func() { trainingRatio = -0.67 }) It("errors", func() { Ω(err).Should(HaveOccurred()) }) })
Ω(stc.MaxDistance()).Should(Equal(initialMin - 1)) }) }) }) }) Describe("Vote", func() { var stc knnutilities.SortedTargetCollection Context("When the collection is not empty", func() { var target1, target2, target3 slice.Slice BeforeEach(func() { stc = knnutilities.NewKNNTargetCollection(5) columnTypes, err := columntype.StringsToColumnTypes([]string{"1.0"}) Ω(err).ShouldNot(HaveOccurred()) raw1, err := columnTypes[0].PersistRawFromString("1.0") Ω(err).ShouldNot(HaveOccurred()) raw2, err := columnTypes[0].PersistRawFromString("2.0") Ω(err).ShouldNot(HaveOccurred()) raw3, err := columnTypes[0].PersistRawFromString("3.0") Ω(err).ShouldNot(HaveOccurred()) target1, err = slice.SliceFromRawValues(true, []int{0}, columnTypes, []float64{raw1}) target2, err = slice.SliceFromRawValues(true, []int{0}, columnTypes, []float64{raw2}) target3, err = slice.SliceFromRawValues(true, []int{0}, columnTypes, []float64{raw3}) })
_, err := knn.NewKNNClassifier(5) Ω(err).ShouldNot(HaveOccurred()) }) }) }) Describe("Train", func() { var trainingData dataset.Dataset BeforeEach(func() { kNNClassifier, _ = knn.NewKNNClassifier(1) }) Context("When the dataset is empty", func() { BeforeEach(func() { columnTypes, err := columntype.StringsToColumnTypes([]string{"hi", "0", "0"}) Ω(err).ShouldNot(HaveOccurred()) trainingData = dataset.NewDataset([]int{1, 2}, []int{0}, columnTypes) }) It("Returns an error", func() { err := kNNClassifier.Train(trainingData) Ω(err).Should(HaveOccurred()) Ω(err).Should(BeAssignableToTypeOf(knnerrors.EmptyTrainingDatasetError{})) }) }) Context("When the dataset's features are not all floats", func() { BeforeEach(func() { columnTypes, err := columntype.StringsToColumnTypes([]string{"hi", "bye", "0"})
) var _ = Describe("Slice", func() { Describe("SliceFromRawValues and Values", func() { var columnIndices []int var columnTypes []columntype.ColumnType var err error var s slice.Slice BeforeEach(func() { columnIndices = []int{1, 4, 3} }) Context("When all the relevant columns store float data", func() { BeforeEach(func() { columnTypes, err = columntype.StringsToColumnTypes([]string{"x", "1.0", "1.0", "1.0", "1.0", "1.0"}) }) Describe("When told all entires are floats", func() { BeforeEach(func() { s, err = slice.SliceFromRawValues(true, columnIndices, columnTypes, []float64{1.2, 0, 0, 1, 4.9, 2.2}) }) It("Does not return an error", func() { Ω(err).ShouldNot(HaveOccurred()) }) It("Returns a float slice with the correct values", func() { floatSlice, ok := s.(slice.FloatSlice) Ω(ok).Should(BeTrue())
. "github.com/onsi/gomega" ) var _ = Describe("LinearRegressor", func() { var linearRegressor regressor.Regressor Describe("Train", func() { var trainingData dataset.Dataset BeforeEach(func() { linearRegressor = linear.NewLinearRegressor() }) Context("When the dataset's features are not all floats", func() { BeforeEach(func() { columnTypes, err := columntype.StringsToColumnTypes([]string{"3.3", "bye", "0"}) Ω(err).ShouldNot(HaveOccurred()) trainingData = dataset.NewDataset([]int{1, 2}, []int{0}, columnTypes) }) It("Returns an error", func() { err := linearRegressor.Train(trainingData) Ω(err).Should(HaveOccurred()) Ω(err).Should(BeAssignableToTypeOf(linearerrors.NonFloatFeaturesTrainingSetError{})) }) }) Context("When the dataset's targets are not all floats", func() { BeforeEach(func() { columnTypes, err := columntype.StringsToColumnTypes([]string{"hi", "2.3", "0"})