/
stemmer_test.go
64 lines (55 loc) · 1.24 KB
/
stemmer_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
package cobe
import "testing"
import "bitbucket.org/tebeka/snowball"
func TestCobeStemmer(t *testing.T) {
snow, _ := snowball.New("english")
s := newCobeStemmer(snow)
// Straight port of the Python cobe stemmer.
var tests = []struct {
token string
expected string
}{
{"foo", "foo"},
{"jumping", "jump"},
{"running", "run"},
{"Foo", "foo"},
{"FOO", "foo"},
{"FOO'S'", "foo"},
{"FOOING", "foo"},
{"Fooing", "foo"},
{":)", ":)"},
{":-)", ":)"},
{": )", ":)"},
{":()", ":("},
{":-(", ":("},
{": (", ":("},
{":' (", ":("},
}
for ti, tt := range tests {
stem := s.Stem(tt.token)
if tt.expected != stem {
t.Errorf("[%d] %s\n%s !=\n%s", ti, tt.token, stem, tt.expected)
}
}
}
func TestStripAccents(t *testing.T) {
var tests = []struct {
text string
expected string
}{
{"Queensrÿche", "Queensryche"},
{"Blue Öyster Cult", "Blue Oyster Cult"},
{"Motörhead", "Motorhead"},
{"The Accüsed", "The Accused"},
{"Mötley Crüe", "Motley Crue"},
{"François", "Francois"},
{"ą/ę/ś/ć", "a/e/s/c"},
}
for ti, tt := range tests {
strip := stripAccents(tt.text)
if tt.expected != strip {
t.Errorf("[%d] %s expected %s; was %s", ti, tt.text,
tt.expected, strip)
}
}
}