//Convert a single instance over
func (as *AttributeSelection) convertInstance(inst data.Instance) data.Instance {
	newVasl := make([]float64, 0, len(as.output.Attributes()))
	for i, current := range as.selectedAttributes {
		fmt.Println(current, i, inst.RealValues())
		newVasl = append(newVasl, inst.Value(current))
		//newVasl[i] = inst.Value(current)
		//fmt.Println(newVasl[i], "newVasl[i]")
	}
	fmt.Println("----------------------------------------------")
	newInst := data.NewInstance()
	//	newInst.SetNumAttributes(len(newVasl))
	values_ := make([]float64, len(newVasl))
	indices_ := make([]int, len(newVasl))
	vals := 0
	for i := 0; i < len(newVasl); i++ {
		if newVasl[i] != 0 {
			values_[vals] = newVasl[i]
			indices_[vals] = i
			vals++
		}
	}
	values := make([]float64, vals)
	indices := make([]int, vals)
	copy(values, values_)
	copy(indices, indices_)
	//	fmt.Println(values, "values")
	//	fmt.Println(indices, "indices")
	for k, i := range indices {
		if as.output.Attribute(i).IsNominal() {
			newInst.AddValues(as.output.Attribute(i).Values()[int(values[k])])
		} else {
			newInst.AddValues(as.output.Attribute(i).Name())
		}
	}
	newInst.SetIndices(indices)
	newInst.SetRealValues(values)
	newInst.SetWeight(inst.Weight())
	return newInst
}
func (stwv *StringToWordVector) normalizeInstance(inst *data.Instance, firstCopy int) {
	//fmt.Println("firstcopy ", firstCopy)
	//fmt.Println("avgdoclength ", stwv.avgDocLength)
	docLength := float64(0)
	if stwv.avgDocLength < 0 {
		panic("Average document length not set.")
	}
	//	fmt.Println("valores: ", inst.RealValues())
	// Compute length of document vector
	for j := 0; j < len(inst.RealValues()); j++ {
		if inst.Indices()[j] >= firstCopy {
			docLength += inst.RealValues()[j] * inst.RealValues()[j]
		}
	}
	docLength = math.Sqrt(docLength)
	// Normalize document vector
	for j := 0; j < len(inst.RealValues()); j++ {
		if inst.Indices()[j] >= firstCopy {
			val := inst.RealValues()[j] * stwv.avgDocLength / docLength
			inst.AddRealValuesIndex(j, val)
			if val == 0 {
				fmt.Println("Setting value %d to zero", inst.Indices()[j])
				j--
			}
		}
	}

}
func (stwv *StringToWordVector) convertInstancewoDocNorm(inst data.Instance) (int, data.Instance) {

	// Convert the instance into a sorted set of indexes
	contained := omap.NewIntKeyed()
	mapKeys := make([]float64, 0)
	// Copy all non-converted attributes from input to output
	firstCopy := 0

	for i, _ := range stwv.inputFormat.Attributes() {
		//fmt.Println("input attrs: ", i)
		if !stwv.inputFormat.Attributes()[i].IsString() {
			// Add simple nominal and numeric attributes directly
			if inst.RealValues()[i] != 0 {
				contained.Insert(firstCopy, inst.RealValues()[i])
				mapKeys = append(mapKeys, float64(firstCopy))
				firstCopy++
			} else {
				firstCopy++
			}
		} else if inst.IsMissingValue(i) {
			//fmt.Println("print 1.2")
			contained.Insert(firstCopy, inst.MissingValue)
			mapKeys = append(mapKeys, float64(firstCopy))
			firstCopy++
		} else if stwv.inputFormat.Attributes()[i].IsString() {
			//if i have to implement the range selector then code this part
		}
	}
	//Copy the converted attributes
	//fmt.Println("print 2.0" , inst.NumAttributes())
	for j := 0; j < inst.NumAttributes(); j++ {
		//fmt.Println("print 2.0.1" , stwv.inputFormat.Attributes()[1].IsString())
		if stwv.inputFormat.Attributes()[j].IsString() && inst.IsMissingValue(j) == false {
			//fmt.Println("print 2")
			words := strings.Fields(inst.Values()[j])
			//fmt.Println(stwv.dictionary)
			//fmt.Println("------------------------------------------------")
			for _, word := range words {
				//fmt.Println("print 3", idx)
				if index, present := stwv.dictionary.Find(word); present {
					if stwv.outputsCounts {
						if count, isthere := contained.Find(index); isthere {
							if count, ok := count.(float64); ok { //type assertion
								contained.Insert(int(index.(int)), count+1)
								mapKeys = append(mapKeys, float64(index.(int)))
							}
						} else {
							//fmt.Println(index)
							contained.Insert(int(index.(int)), float64(1))
							mapKeys = append(mapKeys, float64(index.(int)))
						}
					} else {
						//fmt.Println(index)
						contained.Insert(int(index.(int)), float64(1))
						mapKeys = append(mapKeys, float64(index.(int)))
					}
				}
			}
		}
	}
	//To calculate frequencies
	indexes := make([]int, contained.Len())
	_values := make([]float64, contained.Len())
	n := 0
	contained.Do(func(key, value interface{}) {
		//fmt.Println(key, " <-->", value)
		index, _ := key.(int)
		_value, _ := value.(float64)
		indexes[n] = index
		_values[n] = _value
		n++
	})
	//------------
	//TF_freq transform
	if stwv.tf_transformation {
		for i := 0; i < len(indexes); i++ {
			index := indexes[i]
			if index >= firstCopy {
				val := _values[i]
				val = math.Log(val + 1)
				contained.Insert(index, val)
			}
		}
	}
	indexes = make([]int, contained.Len())
	_values = make([]float64, contained.Len())
	n = 0
	contained.Do(func(key, value interface{}) {
		//fmt.Println(key, " <-->", value)
		index, _ := key.(int)
		_value, _ := value.(float64)
		indexes[n] = index
		_values[n] = _value
		n++
	})
	//IDF_freq transform
	if stwv.idf_transformation {
		for i := 0; i < len(indexes); i++ {
			index := indexes[i]
			if index >= firstCopy {
				val := _values[i]
				val = val * math.Log(float64(stwv.numInstances)/float64(stwv.docsCounts[index]))
				contained.Insert(index, val)
			}
		}
		//		contained.Do(func(key, value interface{}) {
		//			k, _ := key.(int)
		//			val, _ := value.(float64)
		//			if k >= firstCopy {
		//				val = val * math.Log(float64(stwv.numInstances)/float64(stwv.docsCounts[k]))
		//				contained.Insert(k, val)
		//			}
		//		})
	}
	//TF_IDF_freq transform
	//	if stwv.transformation == TF_IDF {
	//		for i:= 0; i < len(indexes); i++ {
	//			index := indexes[i]
	//			if index >= firstCopy {
	//				val := _values[i]
	//				val = (val * math.Log(float64(stwv.numInstances)/float64(stwv.docsCounts[index]))) * math.Log(val+1)
	//				contained.Insert(index, val)
	//			}
	//		}
	//		contained.Do(func(key, value interface{}) {
	//			k, _ := key.(int)
	//			val, _ := value.(float64)
	//			if k >= firstCopy {
	//				val = (val * math.Log(float64(stwv.numInstances)/float64(stwv.docsCounts[k]))) * math.Log(val+1)
	//				contained.Insert(k, val)
	//			}
	//		})
	//	}
	//	 contained.Do(func(key, value interface{}) {
	//	 	fmt.Println(key, " ", value)
	//	 })
	// Convert the set to structures needed to create a sparse instance.
	values := make([]float64, contained.Len())
	indices := make([]int, contained.Len())
	i := 0
	//fmt.Println(contained.Len())
	contained.Do(func(key, value interface{}) {
		index, _ := key.(int)
		_value, _ := value.(float64)
		values[i] = _value
		indices[i] = index
		i++
	})
	instSparse := data.NewInstance()
	for k, i := range indices {
		if stwv.outputFormat.Attributes()[i].IsNominal() {
			instSparse.AddValues(stwv.outputFormat.Attributes()[i].Values()[int(values[k])])
		} else if stwv.outputFormat.Attributes()[i].IsNominal() && !stwv.outputFormat.Attributes()[i].IsString() {
			instSparse.AddValues(stwv.outputFormat.Attributes()[i].Values()[i])
		} else {
			instSparse.AddValues(stwv.outputFormat.Attributes()[i].Name())
		}

	}
	instSparse.SetIndices(indices)
	instSparse.SetRealValues(values)
	instSparse.SetWeight(inst.Weight())
	instSparse.SetNumAttributes(len(values))
	return firstCopy, instSparse
}
Example #4
0
func (ntb *NumericToBinary) convertInstance(instance data.Instance) data.Instance {
	inst := data.NewInstance()
	vals := make([]float64, len(instance.RealValues()))
	newIndexes := make([]int, len(instance.RealValues()))
	for j := range instance.RealValues() {
		att := ntb.input.Attribute(instance.Index(j))
		if att.Type() != data.NUMERIC || instance.Index(j) == ntb.input.ClassIndex() {
			//fmt.Println(ntb.input.ClassIndex())
			vals[j] = instance.ValueSparse(j)
		} else {
			if instance.IsMissingValue(j) {
				//fmt.Println("DSAD")
				vals[j] = instance.ValueSparse(j)
			} else {
				//fmt.Println("DSAD---")
				vals[j] = 1
			}
		}
		newIndexes[j] = instance.Index(j)
	}
	inst.SetWeight(instance.Weight())
	inst.SetRealValues(vals)
	inst.SetIndices(newIndexes)
	return inst
}