Golang DatasetInterface.GetColumnByName примеры использования

Язык программирования: Golang

Пространство имен/Пакет: github.com/shuLhan/tabula

Класс/Тип: DatasetInterface

Метод/Функция: GetColumnByName

Примеров на hotexamples.com: 18

Golang DatasetInterface.GetColumnByName - 18 примеров найдено. Это лучшие примеры Golang кода для github.com/shuLhan/tabula.DatasetInterface.GetColumnByName, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

GetColumnByName(18)

GetDataAsColumns(1)

GetDataAsRows(1)

GetMode(1)

MergeColumns(1)

Пример #1

Показать файл

Файл: words_all_frequency.go Проект: shuLhan/wvcgen

/*
Compute frequency of all words.
*/
func (ftr *WordsAllFrequency) Compute(dataset tabula.DatasetInterface) {
	allWords := GetAllWordList()

	col := dataset.GetColumnByName("additions")

	for _, rec := range col.Records {
		r := tabula.NewRecordReal(float64(0))

		s := rec.String()
		if len(s) == 0 {
			ftr.PushBack(r)
			continue
		}

		s = clean.WikiText(s)
		if len(s) == 0 {
			ftr.PushBack(r)
			continue
		}

		inWords := tekstus.StringSplitWords(s, true, false)

		freq := tekstus.WordsFrequenciesOf(inWords, allWords, false)

		r.SetFloat(Round(freq))

		ftr.PushBack(r)
	}
}

Пример #2

Показать файл

Файл: longest_word.go Проект: shuLhan/wvcgen

/*
Compute the longest word in inserted text.
*/
func (ftr *LongestWord) Compute(dataset tabula.DatasetInterface) {
	adds := dataset.GetColumnByName("additions")
	addslen := adds.Len()

	for x, rec := range adds.Records {
		text := rec.String()
		textlen := len(text)

		if textlen == 0 {
			ftr.PushBack(tabula.NewRecordInt(int64(0)))
			continue
		}

		text = clean.WikiText(text)
		inWords := tekstus.StringSplitWords(text, true, true)
		slong, _ := tekstus.WordsFindLongest(inWords)

		if DEBUG >= 2 {
			fmt.Printf("[feature] %d/%d longest word: %q\n", x, addslen,
				slong)
		}

		slonglen := int64(len(slong))

		ftr.PushBack(tabula.NewRecordInt(slonglen))
	}
}

Пример #3

Показать файл

Файл: template.go Проект: shuLhan/wvcgen

/*
Compute describe what this feature do.
*/
func (ftr *Template) Compute(dataset tabula.DatasetInterface) {
	// Get the column from dataset. This is a reference to `InputMetadata`
	// in `features.dsv`.
	// To see the list of column that we can process, see `features.dsv`
	// for an example.
	col := dataset.GetColumnByName("editid")

	for _, rec := range col.Records {
		// This is where the computed value will be saved.
		r := &tabula.Record{}

		// Get the field value from dataset
		s := rec.String()

		// Process the field value `s`, (e.g. cleaning, etc).
		// ...

		// Set the feature value after processing
		e := r.SetValue(s, ftr.GetType())
		if e == nil {
			r.SetInteger(0)
		}

		// Save the record value
		ftr.PushBack(r)
	}
}

Пример #4

Показать файл

Файл: good_token.go Проект: shuLhan/wvcgen

/*
Compute number of good token in inserted text.
*/
func (ftr *GoodToken) Compute(dataset tabula.DatasetInterface) {
	col := dataset.GetColumnByName("additions")

	for _, rec := range col.Records {
		cnt := tekstus.StringCountTokens(rec.String(), tokens, false)

		ftr.PushBack(tabula.NewRecordInt(int64(cnt)))
	}
}

Пример #5

Показать файл

Файл: compress_rate.go Проект: shuLhan/wvcgen

/*
Compute compress rate of inserted text.
*/
func (ftr *CompressRate) Compute(dataset tabula.DatasetInterface) {
	adds := dataset.GetColumnByName("additions")

	for _, rec := range adds.Records {
		v, _ := compressRateLzw(rec.String())

		ftr.PushBack(tabula.NewRecordReal(Round(v)))
	}
}

Пример #6

Показать файл

Файл: non_alnum_ratio.go Проект: shuLhan/wvcgen

/*
Compute non-alphanumeric ratio with all character in inserted text.
*/
func (ftr *NonAlnumRatio) Compute(dataset tabula.DatasetInterface) {
	adds := dataset.GetColumnByName("additions")

	for _, rec := range adds.Records {
		ratio := tekstus.RatioNonAlnumChar(rec.String(), false)

		ftr.PushBack(tabula.NewRecordReal(Round(ratio)))
	}
}

Пример #7

Показать файл

Файл: longest_char_sequence.go Проект: shuLhan/wvcgen

/*
Compute maximum sequence of character at inserted text.
*/
func (ftr *LongestCharSeq) Compute(dataset tabula.DatasetInterface) {
	col := dataset.GetColumnByName("additions")

	for _, rec := range col.Records {
		text := rec.String()

		_, v := tekstus.GetMaxCharSequence(text)

		ftr.PushBack(tabula.NewRecordInt(int64(v)))
	}
}

Пример #8

Показать файл

Файл: words_vulgar_frequency.go Проект: shuLhan/wvcgen

/*
Compute frequency vulgar words in inserted text.
*/
func (ftr *WordsVulgarFrequency) Compute(dataset tabula.DatasetInterface) {
	col := dataset.GetColumnByName("additions")

	for _, rec := range col.Records {
		s := clean.WikiText(rec.String())

		freq := tekstus.StringFrequenciesOf(s, tekstus.VulgarWords,
			false)

		ftr.PushBack(tabula.NewRecordReal(Round(freq)))
	}
}

Пример #9

Показать файл

Файл: char_diversity.go Проект: shuLhan/wvcgen

/*
Compute character diversity.
*/
func (ftr *CharDiversity) Compute(dataset tabula.DatasetInterface) {
	adds := dataset.GetColumnByName("additions")

	for _, rec := range adds.Records {
		intext := rec.String()
		textlen := float64(len(intext))
		nuniq := tekstus.CountUniqChar(intext)
		v := math.Pow(textlen, 1/float64(1+nuniq))

		ftr.PushBack(tabula.NewRecordReal(Round(v)))
	}
}

Пример #10

Показать файл

Файл: comment_length.go Проект: shuLhan/wvcgen

// Compute will count number of bytes that is used in comment, NOT including
// the header content "/* ... */".
func (ftr *CommentLength) Compute(dataset tabula.DatasetInterface) {
	col := dataset.GetColumnByName("editcomment")
	leftcap := []byte("/*")
	rightcap := []byte("*/")

	for _, rec := range col.Records {
		cmt := rec.Bytes()

		cmt, _ = tekstus.BytesRemoveUntil(cmt, leftcap, rightcap)

		ftr.PushBack(tabula.NewRecordInt(int64(len(cmt))))
	}
}

Пример #11

Показать файл

Файл: class.go Проект: shuLhan/wvcgen

/*
Compute change the classification from text to numeric. The "regular" edit
will become 0 and the "vandalism" will become 1.
*/
func (ftr *Class) Compute(dataset tabula.DatasetInterface) {
	col := dataset.GetColumnByName("class")

	for _, rec := range col.Records {
		r := tabula.NewRecordInt(0)

		if rec.String() == "vandalism" {
			r.SetInteger(1)
		}

		ftr.PushBack(r)
	}
}

Пример #12

Показать файл

Файл: anonim.go Проект: shuLhan/wvcgen

/*
Compute if record in column is IP address then it is an anonim and set
their value to 1, otherwise set to 0.
*/
func (anon *Anonim) Compute(dataset tabula.DatasetInterface) {
	col := dataset.GetColumnByName("editor")

	for _, rec := range col.Records {
		r := tabula.NewRecordReal(0)

		IP := net.ParseIP(rec.String())

		if IP != nil {
			r.SetFloat(1.0)
		}

		anon.PushBack(r)
	}
}

Пример #13

Показать файл

Файл: size_ratio.go Проект: shuLhan/wvcgen

/*
Compute ratio of size between new and old revision.
*/
func (ftr *SizeRatio) Compute(dataset tabula.DatasetInterface) {
	oldid := dataset.GetColumnByName("oldrevisionid")
	newid := dataset.GetColumnByName("newrevisionid")

	oldidlen := newid.Len()

	for x, rec := range newid.Records {
		if x >= oldidlen {
			// Just in case additions is greater than deletions
			break
		}

		newlen := revision.GetSize(rec.String())
		oldlen := revision.GetSize(oldid.Records[x].String())
		difflen := float64(1+newlen) / float64(1+oldlen)

		ftr.PushBack(tabula.NewRecordReal(Round(difflen)))
	}
}

Пример #14

Показать файл

Файл: main.go Проект: shuLhan/wvcgen

/*
doDiff read old and new revisions from edit and compare both of them to get
deletions in old rev and additions in new rev.

Deletions and additions then combined into one string and appended to dataset.
*/
func doDiff(readset dsv.ReaderInterface, ds tabula.DatasetInterface) {
	oldids := ds.GetColumnByName("oldrevisionid").ToStringSlice()
	newids := ds.GetColumnByName("newrevisionid").ToStringSlice()

	revision.SetDir(dRevisions)

	diffset, e := revision.Diff(oldids, newids, ".txt")
	if e != nil {
		panic(e)
	}

	// Create input metadata for diff
	md := dsv.NewMetadata("deletions", "string", ",", "\"", "\"", nil)
	readset.AddInputMetadata(md)

	md = dsv.NewMetadata("additions", "string", ",", "\"", "\"", nil)
	readset.AddInputMetadata(md)

	ds.MergeColumns(diffset)
}

Пример #15

Показать файл

Файл: term_frequency.go Проект: shuLhan/wvcgen

/*
Compute the frequency of inserted words.
*/
func (ftr *TermFrequency) Compute(dataset tabula.DatasetInterface) {
	newrevidx := dataset.GetColumnByName("newrevisionid")
	adds := dataset.GetColumnByName("additions")
	recordslen := len(adds.Records)

	for x, rec := range adds.Records {
		r := tabula.NewRecordReal(float64(0))
		// Get inserted words.
		intext := rec.String()

		if len(intext) == 0 {
			ftr.PushBack(r)
			continue
		}

		intext = clean.WikiText(intext)
		inWords := tekstus.StringSplitWords(intext, true, true)

		// Get content of new revision.
		revid := newrevidx.Records[x].String()

		if DEBUG >= 2 {
			fmt.Printf("[feature] term_frequency: %d/%d processing %q\n",
				x, recordslen, revid)
		}

		newtext, e := revision.GetContentClean(revid)
		if e != nil {
			ftr.PushBack(r)
			continue
		}

		newWords := tekstus.StringSplitWords(newtext, true, false)

		freq := tekstus.WordsFrequenciesOf(newWords, inWords, false)

		r.SetFloat(Round(freq))

		ftr.PushBack(r)
	}
}

Пример #16

Показать файл

Файл: words_bias_frequency.go Проект: shuLhan/wvcgen

/*
Compute frequency of biased words.
*/
func (ftr *WordsBiasFrequency) Compute(dataset tabula.DatasetInterface) {
	col := dataset.GetColumnByName("additions")

	for _, rec := range col.Records {
		r := tabula.NewRecordReal(float64(0))
		text := rec.String()
		if len(text) == 0 {
			ftr.PushBack(r)
			continue
		}

		in := clean.WikiText(text)

		freq := tekstus.StringFrequenciesOf(in,
			tekstus.BiasedWords, false)

		r.SetFloat(freq)

		ftr.PushBack(r)
	}
}

Пример #17

Показать файл

Файл: words_bias_impact.go Проект: shuLhan/wvcgen

/*
Compute frequency bias words in inserted text.
*/
func (ftr *WordsBiasImpact) Compute(dataset tabula.DatasetInterface) {
	oldrevs := dataset.GetColumnByName("oldrevisionid")
	newrevs := dataset.GetColumnByName("newrevisionid")
	oldrevslen := oldrevs.Len()

	for x, rec := range oldrevs.Records {
		v := tabula.NewRecordReal(float64(0))

		oldid := rec.String()
		newid := newrevs.Records[x].String()

		freq := ComputeImpact(oldid, newid, tekstus.BiasedWords)

		v.SetFloat(Round(freq))

		if DEBUG >= 2 {
			fmt.Printf("[feature] words_bias_impact: %d/%d freq: %f\n",
				x, oldrevslen, freq)
		}

		ftr.PushBack(v)
	}
}

Пример #18

Показать файл

Файл: char_distribution_insert.go Проект: shuLhan/wvcgen

/*
Compute character distribution of inserted text.
*/
func (ftr *CharDistributionInsert) Compute(dataset tabula.DatasetInterface) {
	oldrevid := dataset.GetColumnByName("oldrevisionid")
	adds := dataset.GetColumnByName("additions")

	for x, rold := range oldrevid.Records {
		r := tabula.NewRecordReal(0.0)
		// count distribution of old revision
		oldText, e := revision.GetContent(rold.String())

		if e != nil {
			ftr.PushBack(r)
			continue
		}

		// count distribution of inserted text
		inText := adds.Records[x].String()

		divergence := KullbackLeiblerDivergence(oldText, inText)

		r.SetFloat(Round(divergence))

		ftr.PushBack(r)
	}
}