Example #1
0
// Creates the features from the inputs. Features must be nSamples x nFeatures or nil
func FeaturizeTrainable(t Trainable, inputs common.RowMatrix, featurizedInputs *mat64.Dense) *mat64.Dense {
	nSamples, nDim := inputs.Dims()
	if featurizedInputs == nil {
		nFeatures := t.NumFeatures()
		featurizedInputs = mat64.NewDense(nSamples, nFeatures, nil)
	}

	rowViewer, isRowViewer := inputs.(mat64.RawRowViewer)
	var f func(start, end int)
	if isRowViewer {
		f = func(start, end int) {
			featurizer := t.NewFeaturizer()
			for i := start; i < end; i++ {
				featurizer.Featurize(rowViewer.RawRowView(i), featurizedInputs.RawRowView(i))
			}
		}
	} else {
		f = func(start, end int) {
			featurizer := t.NewFeaturizer()
			input := make([]float64, nDim)
			for i := start; i < end; i++ {
				inputs.Row(input, i)
				featurizer.Featurize(input, featurizedInputs.RawRowView(i))
			}
		}
	}

	common.ParallelFor(nSamples, common.GetGrainSize(nSamples, minGrain, maxGrain), f)
	return featurizedInputs
}
Example #2
0
func (k *KMeans) Train(inputs *common.RowMatrix) {
	nSamples, inputDim := inputs.Dims()
	centroids := make([][]float64, k.Clusters)

	// Assign the centroids to random data point to start
	perm := rand.Perm(nSamples)
	for i := 0; i < k.Clusters; i++ {
		data := make([]float64, inputDim)
		idx := perm[i]
		inputs.Row(data, idx)
		centroids[i] = data
	}

	distances := mat64.NewDense(nSamples, k.Clusters, nil)
	row := make([]float64, inputDim)
	for i := 0; i < nSamples; i++ {
		inputs.Row(row, i)
		for j := 0; j < k.Clusters; j++ {
			d := floats.Distance(row, centroids[j])
			distances.Set(i, j, d)
		}
	}

}
Example #3
0
func BatchPredict(batch BatchPredictor, inputs common.RowMatrix, outputs common.MutableRowMatrix, inputDim, outputDim int, grainSize int) (common.MutableRowMatrix, error) {

	// TODO: Add in something about error

	// Check that the inputs and outputs are the right sizes
	nSamples, dimInputs := inputs.Dims()
	if inputDim != dimInputs {
		return outputs, errors.New("predict batch: input dimension mismatch")
	}

	if outputs == nil {
		outputs = mat64.NewDense(nSamples, outputDim, nil)
	} else {
		nOutputSamples, dimOutputs := outputs.Dims()
		if dimOutputs != outputDim {
			return outputs, errors.New("predict batch: output dimension mismatch")
		}
		if nSamples != nOutputSamples {
			return outputs, errors.New("predict batch: rows mismatch")
		}
	}

	// Perform predictions in parallel. For each parallel call, form a new predictor so that
	// memory allocations are saved and no race condition happens.

	// If the input and/or output is a RowViewer, save time by avoiding a copy
	inputRVer, inputIsRowViewer := inputs.(mat64.RawRowViewer)
	outputRVer, outputIsRowViewer := outputs.(mat64.RawRowViewer)

	var f func(start, end int)

	// wrapper function to allow parallel prediction. Uses RowView if the type has it
	switch {
	default:
		panic("Shouldn't be here")
	case inputIsRowViewer, outputIsRowViewer:
		f = func(start, end int) {
			p := batch.NewPredictor()
			for i := start; i < end; i++ {
				p.Predict(inputRVer.RawRowView(i), outputRVer.RawRowView(i))
			}
		}

	case inputIsRowViewer && !outputIsRowViewer:
		f = func(start, end int) {
			p := batch.NewPredictor()
			output := make([]float64, outputDim)
			for i := start; i < end; i++ {
				outputs.Row(output, i)
				p.Predict(inputRVer.RawRowView(i), output)
				outputs.SetRow(i, output)
			}
		}
	case !inputIsRowViewer && outputIsRowViewer:
		f = func(start, end int) {
			p := batch.NewPredictor()
			input := make([]float64, inputDim)
			for i := start; i < end; i++ {
				inputs.Row(input, i)
				p.Predict(input, outputRVer.RawRowView(i))
			}
		}
	case !inputIsRowViewer && !outputIsRowViewer:
		f = func(start, end int) {
			p := batch.NewPredictor()
			input := make([]float64, inputDim)
			output := make([]float64, outputDim)
			for i := start; i < end; i++ {
				inputs.Row(input, i)
				outputs.Row(output, i)
				p.Predict(input, output)
				outputs.SetRow(i, output)
			}
		}
	}

	common.ParallelFor(nSamples, grainSize, f)
	return outputs, nil
}