Пример #1
0
//
// Resampling will run resampling algorithm using values that has been defined
// in `Runtime` and return list of synthetic samples.
//
// The `dataset` must be samples of minority class not the whole dataset.
//
// Algorithms,
//
// (0) If oversampling percentage less than 100, then
// (0.1) replace the input dataset by selecting n random sample from dataset
//       without replacement, where n is
//
//	(percentage-oversampling / 100) * number-of-sample
//
// (1) For each `sample` in dataset,
// (1.1) find k-nearest-neighbors of `sample`,
// (1.2) generate synthetic sample in neighbors.
// (2) Write synthetic samples to file, only if `SyntheticFile` is not empty.
//
func (smote *Runtime) Resampling(dataset tabula.Rows) (e error) {
	smote.Init()

	if smote.PercentOver < 100 {
		// (0.1)
		smote.NSynthetic = (smote.PercentOver / 100.0) * len(dataset)
		dataset, _, _, _ = dataset.RandomPick(smote.NSynthetic, false)
	} else {
		smote.NSynthetic = smote.PercentOver / 100.0
	}

	// (1)
	for x := range dataset {
		sample := dataset[x]

		// (1.1)
		neighbors := smote.FindNeighbors(&dataset, sample)

		// (1.2)
		smote.populate(sample, neighbors)
	}

	// (2)
	if smote.SyntheticFile != "" {
		e = resampling.WriteSynthetics(smote, smote.SyntheticFile)
	}

	return
}
Пример #2
0
//
// Write will write synthetic samples to file defined in `file`.
//
func (smote *Runtime) Write(file string) error {
	return resampling.WriteSynthetics(smote, file)
}