func Test_r1r2(t *testing.T) { var wordTests = []struct { word string r1 string r2 string }{ {"crepuscular", "uscular", "cular"}, {"beautiful", "iful", "ul"}, {"beauty", "y", ""}, {"eucharist", "harist", "ist"}, {"animadversion", "imadversion", "adversion"}, {"mistresses", "tresses", "ses"}, {"sprinkled", "kled", ""}, // Special cases below {"communism", "ism", "m"}, {"arsenal", "al", ""}, {"generalities", "alities", "ities"}, {"embed", "bed", ""}, } for _, testCase := range wordTests { w := snowballword.New(testCase.word) r1start, r2start := r1r2(w) w.R1start = r1start w.R2start = r2start if w.R1String() != testCase.r1 || w.R2String() != testCase.r2 { t.Errorf("Expected \"{%v, %v}\", but got \"{%v, %v}\"", testCase.r1, testCase.r2, w.R1String(), w.R2String()) } } }
// Stem an English word. This is the only exported // function in this package. // func Stem(word string, stemStopwWords bool) string { word = strings.ToLower(strings.TrimSpace(word)) // Return small words and stop words if len(word) <= 2 || (stemStopwWords == false && isStopWord(word)) { return word } // Return special words immediately if specialVersion := stemSpecialWord(word); specialVersion != "" { word = specialVersion return word } w := snowballword.New(word) // Stem the word. Note, each of these // steps will alter `w` in place. // preprocess(w) step0(w) step1a(w) step1b(w) step1c(w) step2(w) step3(w) step4(w) step5(w) postprocess(w) return w.String() }
// Stem an Spanish word. This is the only exported // function in this package. // func Stem(word string, stemStopwWords bool) string { word = strings.ToLower(strings.TrimSpace(word)) // Return small words and stop words if len(word) <= 2 || (stemStopwWords == false && isStopWord(word)) { return word } w := snowballword.New(word) // Stem the word. Note, each of these // steps will alter `w` in place. // preprocess(w) step0(w) changeInStep1 := step1(w) if changeInStep1 == false { changeInStep2a := step2a(w) if changeInStep2a == false { step2b(w) } } step3(w) postprocess(w) return w.String() }
func runStepTest(t *testing.T, f stepFunc, tcs []stepTest) { for _, testCase := range tcs { w := snowballword.New(testCase.wordIn) w.R1start = testCase.r1start w.R2start = testCase.r2start _ = f(w) if w.String() != testCase.wordOut || w.R1String() != testCase.r1out || w.R2String() != testCase.r2out { t.Errorf("Expected \"{%v, %v, %v}\", but got \"{%v, %v, %v}\"", testCase.wordOut, testCase.r1out, testCase.r2out, w.String(), w.R1String(), w.R2String()) } } }
// Test isLowerVowel for things we know should be true // or false. // func RunFindRegionsTest(t *testing.T, f func(*snowballword.SnowballWord) (int, int, int), tcs []FindRegionsTestCase) { for _, testCase := range tcs { w := snowballword.New(testCase.Word) r1start, r2start, rvstart := f(w) if r1start != testCase.R1start || r2start != testCase.R2start || rvstart != testCase.RVstart { t.Errorf("Expect \"%v\" -> %v, %v, %v, but got %v, %v, %v", testCase.Word, testCase.R1start, testCase.R2start, testCase.RVstart, r1start, r2start, rvstart, ) } } }
func Test_capitalizeYs(t *testing.T) { var wordTests = []struct { in string out string }{ {"ysdcsdeysdfsysdfsdiyoyyyxyxayxey", "YsdcsdeYsdfsysdfsdiYoYyYxyxaYxeY"}, } for _, wt := range wordTests { w := snowballword.New(wt.in) capitalizeYs(w) if w.String() != wt.out { t.Errorf("Expected \"%v\", not \"%v\"", wt.out, w.String()) } } }
func RunStepTest(t *testing.T, f stepFunc, tcs []StepTestCase) { for _, testCase := range tcs { w := snowballword.New(testCase.WordIn) w.R1start = testCase.R1start w.R2start = testCase.R2start w.RVstart = testCase.RVstart retval := f(w) if retval != testCase.Changed || w.String() != testCase.WordOut || w.R1start != testCase.R1startOut || w.R2start != testCase.R2startOut || w.RVstart != testCase.RVstartOut { t.Errorf("Expected %v -> \"{%v, %v, %v, %v, %v}\", but got \"{%v, %v, %v, %v, %v}\"", testCase.WordIn, testCase.WordOut, testCase.R1startOut, testCase.R2startOut, testCase.RVstartOut, testCase.Changed, w.String(), w.R1start, w.R2start, w.RVstart, retval) } if w.String() != testCase.WordOut { fmt.Printf("{\"%v\", %v, %v, %v, true, \"%v\", %v, %v, %v},\n", testCase.WordIn, testCase.R1start, testCase.R2start, testCase.RVstart, testCase.WordOut, w.R1start, w.R2start, w.RVstart) } } }
// Stem an French word. This is the only exported // function in this package. // func Stem(word string, stemStopwWords bool) string { word = strings.ToLower(strings.TrimSpace(word)) // Return small words and stop words if len(word) <= 2 || (stemStopwWords == false && isStopWord(word)) { return word } w := snowballword.New(word) // Stem the word. Note, each of these // steps will alter `w` in place. // preprocess(w) var ( changeInStep1 bool changeInStep2a bool changeInStep2b bool ) changeInStep1 = step1(w) if changeInStep1 == false { changeInStep2a = step2a(w) if changeInStep2a == false { changeInStep2b = step2b(w) } } // If the last step was successful, do step 3. Note that, // since we only do 2a if 1 is unsuccessful, the following // "if" condition tests to see if the previous step was // successful. // if changeInStep1 || changeInStep2a || changeInStep2b { step3(w) } else { step4(w) } step5(w) step6(w) postprocess(w) return w.String() }
func Test_normalizeApostrophes(t *testing.T) { variants := [...]string{ "\u2019xxx\u2019", "\u2018xxx\u2018", "\u201Bxxx\u201B", "’xxx’", "‘xxx‘", "‛xxx‛", } for _, v := range variants { w := snowballword.New(v) normalizeApostrophes(w) if w.String() != "'xxx'" { t.Errorf("Expected \"'xxx'\", not \"%v\"", w.String()) } } }
func Test_vnvSuffix(t *testing.T) { var wordTests = []struct { word string start int pos int }{ {"crepuscular", 0, 4}, {"uscular", 0, 2}, } for _, tc := range wordTests { w := snowballword.New(tc.word) pos := romance.VnvSuffix(w, isLowerVowel, tc.start) if pos != tc.pos { t.Errorf("Expected %v, but got %v", tc.pos, pos) } } }
func Test_preprocess(t *testing.T) { var wordTests = []struct { in string out string }{ {"arguing", "arguing"}, {"'catty", "catty"}, {"kyle’s", "kyle's"}, {"toy", "toY"}, } for _, wt := range wordTests { w := snowballword.New(wt.in) preprocess(w) if w.String() != wt.out { t.Errorf("Expected \"%v\", not \"%v\"", wt.out, w.String()) } } }
// Stem an Russian word. This is the only exported // function in this package. // func Stem(word string, stemStopwWords bool) string { word = strings.ToLower(strings.TrimSpace(word)) w := snowballword.New(word) // Return small words and stop words if len(w.RS) <= 2 || (stemStopwWords == false && isStopWord(word)) { return word } preprocess(w) step1(w) step2(w) step3(w) step4(w) return w.String() }
// Test capitalization of vowels acting as non-vowels. // func Test_capitalizeYUI(t *testing.T) { testCases := []struct { wordIn string wordOut string }{ {"jouer", "joUer"}, {"ennuie", "ennuIe"}, {"yeux", "Yeux"}, {"quand", "qUand"}, } for _, testCase := range testCases { w := snowballword.New(testCase.wordIn) capitalizeYUI(w) if w.String() != testCase.wordOut { t.Errorf("Expect %v -> %v, but got %v", testCase.wordIn, testCase.wordOut, w.String()) } } }
func Test_isShortWord(t *testing.T) { var testCases = []struct { word string isShort bool }{ {"bed", true}, {"shed", true}, {"shred", true}, {"bead", false}, {"embed", false}, {"beds", false}, } for _, testCase := range testCases { w := snowballword.New(testCase.word) r1start, r2start := r1r2(w) w.R1start = r1start w.R2start = r2start isShort := isShortWord(w) if isShort != testCase.isShort { t.Errorf("Expected %v, but got %v for \"{%v, %v}\"", testCase.isShort, isShort, testCase.word, w.R1String()) } } }
func Test_endsShortSyllable(t *testing.T) { var testCases = []struct { word string pos int result bool }{ {"absolute", 7, true}, {"ape", 2, true}, {"rap", 3, true}, {"trap", 4, true}, {"entrap", 6, true}, {"uproot", 6, false}, {"bestow", 6, false}, {"disturb", 7, false}, } for _, testCase := range testCases { w := snowballword.New(testCase.word) result := endsShortSyllable(w, testCase.pos) if result != testCase.result { t.Errorf("Expected endsShortSyllable(%v, %v) to return %v, not %v", testCase.word, testCase.pos, testCase.result, result) } } }