func makeSingleFloatTargets(floats ...float64) []slice.Slice {
	columnTypes, columnTypesError := columntype.StringsToColumnTypes([]string{"0"})
	Ω(columnTypesError).ShouldNot(HaveOccurred())

	targets := make([]slice.Slice, len(floats))
	for i, f := range floats {
		target, err := slice.SliceFromRawValues(true, []int{0}, columnTypes, []float64{f})
		Ω(err).ShouldNot(HaveOccurred())
		targets[i] = target
	}
	return targets
}
Beispiel #2
0
func DatasetFromPath(filepath string, targetStartInclusive, targetEndExclusive int) (dataset.Dataset, error) {
	file, err := os.Open(filepath)
	if err != nil {
		return nil, csvparseerrors.NewUnableToOpenFileError(filepath, err)
	}

	reader := csv.NewReader(file)

	_, err = reader.Read()
	line, err := reader.Read()
	if err != nil {
		return nil, csvparseerrors.NewUnableToReadTwoLinesError(filepath, err)
	}

	columnTypes, err := columntype.StringsToColumnTypes(line)
	if err != nil {
		return nil, csvparseerrors.NewUnableToParseColumnTypesError(filepath, err)
	}

	numColumns := len(columnTypes)
	if targetOutOfBounds(targetStartInclusive, targetEndExclusive, numColumns) {
		return nil, csvparseerrors.NewTargetOutOfBoundsError(filepath, targetStartInclusive, targetEndExclusive, numColumns)
	}

	newDataset := dataset.NewDataset(
		featureColumnIndices(targetStartInclusive, targetEndExclusive, numColumns),
		targetColumnIndices(targetStartInclusive, targetEndExclusive, numColumns),
		columnTypes,
	)

	for ; err == nil; line, err = reader.Read() {
		err = newDataset.AddRowFromStrings(line)
		if err != nil {
			return nil, csvparseerrors.NewUnableToParseRowError(filepath, err)
		}
	}
	if err != nil && err != io.EOF {
		return nil, csvparseerrors.NewGenericError(filepath, err)
	}

	return newDataset, nil
}
Beispiel #3
0
package columntype_test

import (
	"github.com/amitkgupta/goodlearn/data/columntype"

	. "github.com/onsi/ginkgo"
	. "github.com/onsi/gomega"
)

var _ = Describe("Column Type", func() {
	Describe("StringsToColumnTypes", func() {
		It("Determines whether entires are floats or strings", func() {
			columnTypes, err := columntype.StringsToColumnTypes([]string{"hi", "9.0"})
			Ω(err).ShouldNot(HaveOccurred())

			Ω(isStringColumnType(columnTypes[0])).Should(BeTrue())
			Ω(isFloatColumnType(columnTypes[1])).Should(BeTrue())
		})

		It("Handles scientific notation correctly as floats", func() {
			columnTypes, err := columntype.StringsToColumnTypes([]string{"1.0e308"})
			Ω(err).ShouldNot(HaveOccurred())

			Ω(isFloatColumnType(columnTypes[0])).Should(BeTrue())
		})

		It("Handles quoted numerals correctly as strings", func() {
			columnTypes, err := columntype.StringsToColumnTypes([]string{`"9.0"`})
			Ω(err).ShouldNot(HaveOccurred())

			Ω(isStringColumnType(columnTypes[0])).Should(BeTrue())
			trainingSet dataset.Dataset
			testSet     dataset.Dataset
			err         error
		)

		JustBeforeEach(func() {
			trainingSet, testSet, err = crossvalidation.SplitDataset(
				originalSet,
				trainingRatio,
				rand.NewSource(5330), // SEED
			)
		})

		BeforeEach(func() {
			columnTypes, columnTypesError := columntype.StringsToColumnTypes([]string{"0"})
			Ω(columnTypesError).ShouldNot(HaveOccurred())

			originalSet = dataset.NewDataset([]int{}, []int{0}, columnTypes)
		})

		Context("when the training ratio negative", func() {
			BeforeEach(func() {
				trainingRatio = -0.67
			})

			It("errors", func() {
				Ω(err).Should(HaveOccurred())
			})
		})
					Ω(stc.MaxDistance()).Should(Equal(initialMin - 1))
				})
			})
		})
	})

	Describe("Vote", func() {
		var stc knnutilities.SortedTargetCollection

		Context("When the collection is not empty", func() {
			var target1, target2, target3 slice.Slice

			BeforeEach(func() {
				stc = knnutilities.NewKNNTargetCollection(5)

				columnTypes, err := columntype.StringsToColumnTypes([]string{"1.0"})
				Ω(err).ShouldNot(HaveOccurred())

				raw1, err := columnTypes[0].PersistRawFromString("1.0")
				Ω(err).ShouldNot(HaveOccurred())

				raw2, err := columnTypes[0].PersistRawFromString("2.0")
				Ω(err).ShouldNot(HaveOccurred())

				raw3, err := columnTypes[0].PersistRawFromString("3.0")
				Ω(err).ShouldNot(HaveOccurred())

				target1, err = slice.SliceFromRawValues(true, []int{0}, columnTypes, []float64{raw1})
				target2, err = slice.SliceFromRawValues(true, []int{0}, columnTypes, []float64{raw2})
				target3, err = slice.SliceFromRawValues(true, []int{0}, columnTypes, []float64{raw3})
			})
Beispiel #6
0
				_, err := knn.NewKNNClassifier(5)
				Ω(err).ShouldNot(HaveOccurred())
			})
		})
	})

	Describe("Train", func() {
		var trainingData dataset.Dataset

		BeforeEach(func() {
			kNNClassifier, _ = knn.NewKNNClassifier(1)
		})

		Context("When the dataset is empty", func() {
			BeforeEach(func() {
				columnTypes, err := columntype.StringsToColumnTypes([]string{"hi", "0", "0"})
				Ω(err).ShouldNot(HaveOccurred())

				trainingData = dataset.NewDataset([]int{1, 2}, []int{0}, columnTypes)
			})

			It("Returns an error", func() {
				err := kNNClassifier.Train(trainingData)
				Ω(err).Should(HaveOccurred())
				Ω(err).Should(BeAssignableToTypeOf(knnerrors.EmptyTrainingDatasetError{}))
			})
		})

		Context("When the dataset's features are not all floats", func() {
			BeforeEach(func() {
				columnTypes, err := columntype.StringsToColumnTypes([]string{"hi", "bye", "0"})
Beispiel #7
0
)

var _ = Describe("Slice", func() {
	Describe("SliceFromRawValues and Values", func() {
		var columnIndices []int
		var columnTypes []columntype.ColumnType
		var err error
		var s slice.Slice

		BeforeEach(func() {
			columnIndices = []int{1, 4, 3}
		})

		Context("When all the relevant columns store float data", func() {
			BeforeEach(func() {
				columnTypes, err = columntype.StringsToColumnTypes([]string{"x", "1.0", "1.0", "1.0", "1.0", "1.0"})
			})

			Describe("When told all entires are floats", func() {
				BeforeEach(func() {
					s, err = slice.SliceFromRawValues(true, columnIndices, columnTypes, []float64{1.2, 0, 0, 1, 4.9, 2.2})
				})

				It("Does not return an error", func() {
					Ω(err).ShouldNot(HaveOccurred())
				})

				It("Returns a float slice with the correct values", func() {
					floatSlice, ok := s.(slice.FloatSlice)
					Ω(ok).Should(BeTrue())
Beispiel #8
0
	. "github.com/onsi/gomega"
)

var _ = Describe("LinearRegressor", func() {
	var linearRegressor regressor.Regressor

	Describe("Train", func() {
		var trainingData dataset.Dataset

		BeforeEach(func() {
			linearRegressor = linear.NewLinearRegressor()
		})

		Context("When the dataset's features are not all floats", func() {
			BeforeEach(func() {
				columnTypes, err := columntype.StringsToColumnTypes([]string{"3.3", "bye", "0"})
				Ω(err).ShouldNot(HaveOccurred())

				trainingData = dataset.NewDataset([]int{1, 2}, []int{0}, columnTypes)
			})

			It("Returns an error", func() {
				err := linearRegressor.Train(trainingData)
				Ω(err).Should(HaveOccurred())
				Ω(err).Should(BeAssignableToTypeOf(linearerrors.NonFloatFeaturesTrainingSetError{}))
			})
		})

		Context("When the dataset's targets are not all floats", func() {
			BeforeEach(func() {
				columnTypes, err := columntype.StringsToColumnTypes([]string{"hi", "2.3", "0"})