Ejemplo n.º 1
0
// generateTrainingAttrs selects RandomFeatures number of base.Attributes from
// the provided base.Instances.
func (b *BaggedModel) generateTrainingAttrs(model int, from *base.Instances) []base.Attribute {
	ret := make([]base.Attribute, 0)
	if b.RandomFeatures == 0 {
		for j := 0; j < from.Cols; j++ {
			attr := from.GetAttr(j)
			ret = append(ret, attr)
		}
	} else {
		for {
			if len(ret) >= b.RandomFeatures {
				break
			}
			attrIndex := rand.Intn(from.Cols)
			if attrIndex == from.ClassIndex {
				continue
			}
			attr := from.GetAttr(attrIndex)
			matched := false
			for _, a := range ret {
				if a.Equals(attr) {
					matched = true
					break
				}
			}
			if !matched {
				ret = append(ret, attr)
			}
		}
	}
	ret = append(ret, from.GetClassAttr())
	b.lock.Lock()
	b.selectedAttributes[model] = ret
	b.lock.Unlock()
	return ret
}
Ejemplo n.º 2
0
// GetSplitAttributeFromSelection returns the class Attribute which maximises
// the information gain amongst consideredAttributes
//
// IMPORTANT: passing a zero-length consideredAttributes parameter will panic()
func (r *InformationGainRuleGenerator) GetSplitAttributeFromSelection(consideredAttributes []int, f *base.Instances) base.Attribute {

	// Next step is to compute the information gain at this node
	// for each randomly chosen attribute, and pick the one
	// which maximises it
	maxGain := math.Inf(-1)
	selectedAttribute := -1

	// Compute the base entropy
	classDist := f.GetClassDistribution()
	baseEntropy := getBaseEntropy(classDist)

	// Compute the information gain for each attribute
	for _, s := range consideredAttributes {
		proposedClassDist := f.GetClassDistributionAfterSplit(f.GetAttr(s))
		localEntropy := getSplitEntropy(proposedClassDist)
		informationGain := baseEntropy - localEntropy
		if informationGain > maxGain {
			maxGain = informationGain
			selectedAttribute = s
		}
	}

	// Pick the one which maximises IG
	return f.GetAttr(selectedAttribute)
}
Ejemplo n.º 3
0
// Run discretises the set of Instances `on'
//
// IMPORTANT: ChiMergeFilter discretises in place.
func (c *ChiMergeFilter) Run(on *base.Instances) {
	if !c._Trained {
		panic("Call Build() beforehand")
	}
	for attr := range c.Tables {
		table := c.Tables[attr]
		for i := 0; i < on.Rows; i++ {
			val := on.Get(i, attr)
			dis := 0
			for j, k := range table {
				if k.Value < val {
					dis = j
					continue
				}
				break
			}
			on.Set(i, attr, float64(dis))
		}
		newAttribute := new(base.CategoricalAttribute)
		newAttribute.SetName(on.GetAttr(attr).GetName())
		for _, k := range table {
			newAttribute.GetSysValFromString(fmt.Sprintf("%f", k.Value))
		}
		on.ReplaceAttr(attr, newAttribute)
	}
}
Ejemplo n.º 4
0
// Run applies a trained BinningFilter to a set of Instances,
// discretising any numeric attributes added.
//
// IMPORTANT: Run discretises in-place, so make sure to take
// a copy if the original instances are still needed
//
// IMPORTANT: This function panic()s if the filter has not been
// trained. Call Build() before running this function
//
// IMPORTANT: Call Build() after adding any additional attributes.
// Otherwise, the training structure will be out of date from
// the values expected and could cause a panic.
func (b *BinningFilter) Run(on *base.Instances) {
	if !b.trained {
		panic("Call Build() beforehand")
	}
	for attr := range b.Attributes {
		minVal := b.MinVals[attr]
		maxVal := b.MaxVals[attr]
		disc := 0
		// Casts to float32 to replicate a floating point precision error
		delta := float32(maxVal - minVal)
		delta /= float32(b.BinCount)
		for i := 0; i < on.Rows; i++ {
			val := on.Get(i, attr)
			if val <= minVal {
				disc = 0
			} else {
				disc = int(math.Floor(float64(float32(val-minVal) / delta)))
				if disc >= b.BinCount {
					disc = b.BinCount - 1
				}
			}
			on.Set(i, attr, float64(disc))
		}
		newAttribute := new(base.CategoricalAttribute)
		newAttribute.SetName(on.GetAttr(attr).GetName())
		for i := 0; i < b.BinCount; i++ {
			newAttribute.GetSysValFromString(fmt.Sprintf("%d", i))
		}
		on.ReplaceAttr(attr, newAttribute)
	}
}
Ejemplo n.º 5
0
func ChiMBuildFrequencyTable(attr int, inst *base.Instances) []*FrequencyTableEntry {
	ret := make([]*FrequencyTableEntry, 0)
	var attribute *base.FloatAttribute
	attribute, ok := inst.GetAttr(attr).(*base.FloatAttribute)
	if !ok {
		panic("only use Chi-M on numeric stuff")
	}
	for i := 0; i < inst.Rows; i++ {
		value := inst.Get(i, attr)
		valueConv := attribute.GetUsrVal(value)
		class := inst.GetClass(i)
		// Search the frequency table for the value
		found := false
		for _, entry := range ret {
			if entry.Value == valueConv {
				found = true
				entry.Frequency[class] += 1
			}
		}
		if !found {
			newEntry := &FrequencyTableEntry{
				valueConv,
				make(map[string]int),
			}
			newEntry.Frequency[class] = 1
			ret = append(ret, newEntry)
		}
	}

	return ret
}