示例#1
0
/*
populate will generate new synthetic sample using nearest neighbors.
*/
func (smote *Runtime) populate(instance *tabula.Row, neighbors knn.Neighbors) {
	lenAttr := len(*instance)

	for x := 0; x < smote.NSynthetic; x++ {
		// choose one of the K nearest neighbors
		n := rand.Intn(neighbors.Len())
		sample := neighbors.Row(n)

		newSynt := make(tabula.Row, lenAttr)

		// Compute new synthetic attributes.
		for attr, sr := range *sample {
			if attr == smote.ClassIndex {
				continue
			}

			ir := (*instance)[attr]

			iv := ir.Float()
			sv := sr.Float()

			dif := sv - iv
			gap := rand.Float64()
			newAttr := iv + (gap * dif)

			record := &tabula.Record{}
			record.SetFloat(newAttr)
			newSynt[attr] = record
		}

		newSynt[smote.ClassIndex] = (*instance)[smote.ClassIndex]

		smote.Synthetics.PushRow(&newSynt)
	}
}
示例#2
0
//
// createSynthetic will create synthetics row from original row `p` and their
// `neighbors`.
//
func (in *Runtime) createSynthetic(p *tabula.Row, neighbors knn.Neighbors) (
	synthetic *tabula.Row,
) {
	// choose one of the K nearest neighbors
	randIdx := rand.Intn(neighbors.Len())
	n := neighbors.Row(randIdx)

	// Check if synthetic sample can be created from p and n.
	canit, slp, sln := in.canCreate(p, n)
	if !canit {
		if DEBUG >= 2 {
			fmt.Println("[lnsmote] can not create synthetic")
		}

		if slp.Len() <= 0 {
			in.outliers.PushBack(p)
		}

		// we can not create from p and synthetic.
		return nil
	}

	synthetic = p.Clone()

	for x, srec := range *synthetic {
		// Skip class attribute.
		if x == in.ClassIndex {
			continue
		}

		delta := in.randomGap(p, n, slp.Len(), sln.Len())
		pv := (*p)[x].Float()
		diff := (*n)[x].Float() - pv
		srec.SetFloat(pv + delta*diff)
	}

	return
}