/* ComputeImpact return percentage of words in new revision compared to old revision, using count_of_words_in_new / (count_of_words_in_old + count_of_words_in_new) if no words are found in old and new revision, return 0. */ func ComputeImpact(oldrevid, newrevid string, wordlist []string) float64 { oldtext, _ := revision.GetContentClean(oldrevid) newtext, _ := revision.GetContentClean(newrevid) oldCnt := tekstus.StringCountTokens(oldtext, wordlist, false) newCnt := tekstus.StringCountTokens(newtext, wordlist, false) total := float64(oldCnt + newCnt) if total == 0 { return 0 } return float64(newCnt) / total }
func TestStringCountTokens(t *testing.T) { for _, td := range dataStringCountTokens { got := tekstus.StringCountTokens(td.line, td.tokens, false) assert(t, td.exp, got, true) } }
/* Compute number of good token in inserted text. */ func (ftr *GoodToken) Compute(dataset tabula.DatasetInterface) { col := dataset.GetColumnByName("additions") for _, rec := range col.Records { cnt := tekstus.StringCountTokens(rec.String(), tokens, false) ftr.PushBack(tabula.NewRecordInt(int64(cnt))) } }