/* populate will generate new synthetic sample using nearest neighbors. */ func (smote *Runtime) populate(instance *tabula.Row, neighbors knn.Neighbors) { lenAttr := len(*instance) for x := 0; x < smote.NSynthetic; x++ { // choose one of the K nearest neighbors n := rand.Intn(neighbors.Len()) sample := neighbors.Row(n) newSynt := make(tabula.Row, lenAttr) // Compute new synthetic attributes. for attr, sr := range *sample { if attr == smote.ClassIndex { continue } ir := (*instance)[attr] iv := ir.Float() sv := sr.Float() dif := sv - iv gap := rand.Float64() newAttr := iv + (gap * dif) record := &tabula.Record{} record.SetFloat(newAttr) newSynt[attr] = record } newSynt[smote.ClassIndex] = (*instance)[smote.ClassIndex] smote.Synthetics.PushRow(&newSynt) } }
// // createSynthetic will create synthetics row from original row `p` and their // `neighbors`. // func (in *Runtime) createSynthetic(p *tabula.Row, neighbors knn.Neighbors) ( synthetic *tabula.Row, ) { // choose one of the K nearest neighbors randIdx := rand.Intn(neighbors.Len()) n := neighbors.Row(randIdx) // Check if synthetic sample can be created from p and n. canit, slp, sln := in.canCreate(p, n) if !canit { if DEBUG >= 2 { fmt.Println("[lnsmote] can not create synthetic") } if slp.Len() <= 0 { in.outliers.PushBack(p) } // we can not create from p and synthetic. return nil } synthetic = p.Clone() for x, srec := range *synthetic { // Skip class attribute. if x == in.ClassIndex { continue } delta := in.randomGap(p, n, slp.Len(), sln.Len()) pv := (*p)[x].Float() diff := (*n)[x].Float() - pv srec.SetFloat(pv + delta*diff) } return }