// // Resampling will run resampling algorithm using values that has been defined // in `Runtime` and return list of synthetic samples. // // The `dataset` must be samples of minority class not the whole dataset. // // Algorithms, // // (0) If oversampling percentage less than 100, then // (0.1) replace the input dataset by selecting n random sample from dataset // without replacement, where n is // // (percentage-oversampling / 100) * number-of-sample // // (1) For each `sample` in dataset, // (1.1) find k-nearest-neighbors of `sample`, // (1.2) generate synthetic sample in neighbors. // (2) Write synthetic samples to file, only if `SyntheticFile` is not empty. // func (smote *Runtime) Resampling(dataset tabula.Rows) (e error) { smote.Init() if smote.PercentOver < 100 { // (0.1) smote.NSynthetic = (smote.PercentOver / 100.0) * len(dataset) dataset, _, _, _ = dataset.RandomPick(smote.NSynthetic, false) } else { smote.NSynthetic = smote.PercentOver / 100.0 } // (1) for x := range dataset { sample := dataset[x] // (1.1) neighbors := smote.FindNeighbors(&dataset, sample) // (1.2) smote.populate(sample, neighbors) } // (2) if smote.SyntheticFile != "" { e = resampling.WriteSynthetics(smote, smote.SyntheticFile) } return }
// // Write will write synthetic samples to file defined in `file`. // func (smote *Runtime) Write(file string) error { return resampling.WriteSynthetics(smote, file) }