// Creates the features from the inputs. Features must be nSamples x nFeatures or nil func FeaturizeTrainable(t Trainable, inputs common.RowMatrix, featurizedInputs *mat64.Dense) *mat64.Dense { nSamples, nDim := inputs.Dims() if featurizedInputs == nil { nFeatures := t.NumFeatures() featurizedInputs = mat64.NewDense(nSamples, nFeatures, nil) } rowViewer, isRowViewer := inputs.(mat64.RawRowViewer) var f func(start, end int) if isRowViewer { f = func(start, end int) { featurizer := t.NewFeaturizer() for i := start; i < end; i++ { featurizer.Featurize(rowViewer.RawRowView(i), featurizedInputs.RawRowView(i)) } } } else { f = func(start, end int) { featurizer := t.NewFeaturizer() input := make([]float64, nDim) for i := start; i < end; i++ { inputs.Row(input, i) featurizer.Featurize(input, featurizedInputs.RawRowView(i)) } } } common.ParallelFor(nSamples, common.GetGrainSize(nSamples, minGrain, maxGrain), f) return featurizedInputs }
func (k *KMeans) Train(inputs *common.RowMatrix) { nSamples, inputDim := inputs.Dims() centroids := make([][]float64, k.Clusters) // Assign the centroids to random data point to start perm := rand.Perm(nSamples) for i := 0; i < k.Clusters; i++ { data := make([]float64, inputDim) idx := perm[i] inputs.Row(data, idx) centroids[i] = data } distances := mat64.NewDense(nSamples, k.Clusters, nil) row := make([]float64, inputDim) for i := 0; i < nSamples; i++ { inputs.Row(row, i) for j := 0; j < k.Clusters; j++ { d := floats.Distance(row, centroids[j]) distances.Set(i, j, d) } } }
func BatchPredict(batch BatchPredictor, inputs common.RowMatrix, outputs common.MutableRowMatrix, inputDim, outputDim int, grainSize int) (common.MutableRowMatrix, error) { // TODO: Add in something about error // Check that the inputs and outputs are the right sizes nSamples, dimInputs := inputs.Dims() if inputDim != dimInputs { return outputs, errors.New("predict batch: input dimension mismatch") } if outputs == nil { outputs = mat64.NewDense(nSamples, outputDim, nil) } else { nOutputSamples, dimOutputs := outputs.Dims() if dimOutputs != outputDim { return outputs, errors.New("predict batch: output dimension mismatch") } if nSamples != nOutputSamples { return outputs, errors.New("predict batch: rows mismatch") } } // Perform predictions in parallel. For each parallel call, form a new predictor so that // memory allocations are saved and no race condition happens. // If the input and/or output is a RowViewer, save time by avoiding a copy inputRVer, inputIsRowViewer := inputs.(mat64.RawRowViewer) outputRVer, outputIsRowViewer := outputs.(mat64.RawRowViewer) var f func(start, end int) // wrapper function to allow parallel prediction. Uses RowView if the type has it switch { default: panic("Shouldn't be here") case inputIsRowViewer, outputIsRowViewer: f = func(start, end int) { p := batch.NewPredictor() for i := start; i < end; i++ { p.Predict(inputRVer.RawRowView(i), outputRVer.RawRowView(i)) } } case inputIsRowViewer && !outputIsRowViewer: f = func(start, end int) { p := batch.NewPredictor() output := make([]float64, outputDim) for i := start; i < end; i++ { outputs.Row(output, i) p.Predict(inputRVer.RawRowView(i), output) outputs.SetRow(i, output) } } case !inputIsRowViewer && outputIsRowViewer: f = func(start, end int) { p := batch.NewPredictor() input := make([]float64, inputDim) for i := start; i < end; i++ { inputs.Row(input, i) p.Predict(input, outputRVer.RawRowView(i)) } } case !inputIsRowViewer && !outputIsRowViewer: f = func(start, end int) { p := batch.NewPredictor() input := make([]float64, inputDim) output := make([]float64, outputDim) for i := start; i < end; i++ { inputs.Row(input, i) outputs.Row(output, i) p.Predict(input, output) outputs.SetRow(i, output) } } } common.ParallelFor(nSamples, grainSize, f) return outputs, nil }