Exemplo n.º 1
0
func Test_r1r2(t *testing.T) {
	var wordTests = []struct {
		word string
		r1   string
		r2   string
	}{
		{"crepuscular", "uscular", "cular"},
		{"beautiful", "iful", "ul"},
		{"beauty", "y", ""},
		{"eucharist", "harist", "ist"},
		{"animadversion", "imadversion", "adversion"},
		{"mistresses", "tresses", "ses"},
		{"sprinkled", "kled", ""},
		// Special cases below
		{"communism", "ism", "m"},
		{"arsenal", "al", ""},
		{"generalities", "alities", "ities"},
		{"embed", "bed", ""},
	}
	for _, testCase := range wordTests {
		w := snowballword.New(testCase.word)
		r1start, r2start := r1r2(w)
		w.R1start = r1start
		w.R2start = r2start
		if w.R1String() != testCase.r1 || w.R2String() != testCase.r2 {
			t.Errorf("Expected \"{%v, %v}\", but got \"{%v, %v}\"", testCase.r1, testCase.r2, w.R1String(), w.R2String())
		}
	}
}
Exemplo n.º 2
0
// Stem an English word.  This is the only exported
// function in this package.
//
func Stem(word string, stemStopwWords bool) string {

	word = strings.ToLower(strings.TrimSpace(word))

	// Return small words and stop words
	if len(word) <= 2 || (stemStopwWords == false && isStopWord(word)) {
		return word
	}

	// Return special words immediately
	if specialVersion := stemSpecialWord(word); specialVersion != "" {
		word = specialVersion
		return word
	}

	w := snowballword.New(word)

	// Stem the word.  Note, each of these
	// steps will alter `w` in place.
	//
	preprocess(w)
	step0(w)
	step1a(w)
	step1b(w)
	step1c(w)
	step2(w)
	step3(w)
	step4(w)
	step5(w)
	postprocess(w)

	return w.String()

}
Exemplo n.º 3
0
// Stem an Spanish word.  This is the only exported
// function in this package.
//
func Stem(word string, stemStopwWords bool) string {

	word = strings.ToLower(strings.TrimSpace(word))

	// Return small words and stop words
	if len(word) <= 2 || (stemStopwWords == false && isStopWord(word)) {
		return word
	}

	w := snowballword.New(word)

	// Stem the word.  Note, each of these
	// steps will alter `w` in place.
	//

	preprocess(w)
	step0(w)
	changeInStep1 := step1(w)
	if changeInStep1 == false {
		changeInStep2a := step2a(w)
		if changeInStep2a == false {
			step2b(w)
		}
	}
	step3(w)
	postprocess(w)

	return w.String()

}
Exemplo n.º 4
0
func runStepTest(t *testing.T, f stepFunc, tcs []stepTest) {
	for _, testCase := range tcs {
		w := snowballword.New(testCase.wordIn)
		w.R1start = testCase.r1start
		w.R2start = testCase.r2start
		_ = f(w)
		if w.String() != testCase.wordOut || w.R1String() != testCase.r1out || w.R2String() != testCase.r2out {
			t.Errorf("Expected \"{%v, %v, %v}\", but got \"{%v, %v, %v}\"", testCase.wordOut, testCase.r1out, testCase.r2out, w.String(), w.R1String(), w.R2String())
		}
	}
}
Exemplo n.º 5
0
// Test isLowerVowel for things we know should be true
// or false.
//
func RunFindRegionsTest(t *testing.T, f func(*snowballword.SnowballWord) (int, int, int), tcs []FindRegionsTestCase) {
	for _, testCase := range tcs {
		w := snowballword.New(testCase.Word)
		r1start, r2start, rvstart := f(w)
		if r1start != testCase.R1start || r2start != testCase.R2start || rvstart != testCase.RVstart {
			t.Errorf("Expect \"%v\" -> %v, %v, %v, but got %v, %v, %v",
				testCase.Word, testCase.R1start, testCase.R2start, testCase.RVstart,
				r1start, r2start, rvstart,
			)
		}

	}
}
Exemplo n.º 6
0
func Test_capitalizeYs(t *testing.T) {
	var wordTests = []struct {
		in  string
		out string
	}{
		{"ysdcsdeysdfsysdfsdiyoyyyxyxayxey", "YsdcsdeYsdfsysdfsdiYoYyYxyxaYxeY"},
	}
	for _, wt := range wordTests {
		w := snowballword.New(wt.in)
		capitalizeYs(w)
		if w.String() != wt.out {
			t.Errorf("Expected \"%v\", not \"%v\"", wt.out, w.String())
		}
	}
}
Exemplo n.º 7
0
func RunStepTest(t *testing.T, f stepFunc, tcs []StepTestCase) {
	for _, testCase := range tcs {
		w := snowballword.New(testCase.WordIn)
		w.R1start = testCase.R1start
		w.R2start = testCase.R2start
		w.RVstart = testCase.RVstart
		retval := f(w)
		if retval != testCase.Changed || w.String() != testCase.WordOut || w.R1start != testCase.R1startOut || w.R2start != testCase.R2startOut || w.RVstart != testCase.RVstartOut {
			t.Errorf("Expected %v -> \"{%v, %v, %v, %v, %v}\", but got \"{%v, %v, %v, %v, %v}\"", testCase.WordIn, testCase.WordOut, testCase.R1startOut, testCase.R2startOut, testCase.RVstartOut, testCase.Changed, w.String(), w.R1start, w.R2start, w.RVstart, retval)
		}
		if w.String() != testCase.WordOut {
			fmt.Printf("{\"%v\", %v, %v, %v, true, \"%v\", %v, %v, %v},\n", testCase.WordIn, testCase.R1start, testCase.R2start, testCase.RVstart, testCase.WordOut, w.R1start, w.R2start, w.RVstart)
		}
	}
}
Exemplo n.º 8
0
// Stem an French word.  This is the only exported
// function in this package.
//
func Stem(word string, stemStopwWords bool) string {

	word = strings.ToLower(strings.TrimSpace(word))

	// Return small words and stop words
	if len(word) <= 2 || (stemStopwWords == false && isStopWord(word)) {
		return word
	}

	w := snowballword.New(word)

	// Stem the word.  Note, each of these
	// steps will alter `w` in place.
	//

	preprocess(w)
	var (
		changeInStep1  bool
		changeInStep2a bool
		changeInStep2b bool
	)

	changeInStep1 = step1(w)
	if changeInStep1 == false {
		changeInStep2a = step2a(w)
		if changeInStep2a == false {
			changeInStep2b = step2b(w)
		}
	}

	// If the last step was successful, do step 3.  Note that,
	// since we only do 2a if 1 is unsuccessful, the following
	// "if" condition tests to see if the previous step was
	// successful.
	//
	if changeInStep1 || changeInStep2a || changeInStep2b {
		step3(w)
	} else {
		step4(w)
	}

	step5(w)
	step6(w)
	postprocess(w)
	return w.String()

}
Exemplo n.º 9
0
func Test_normalizeApostrophes(t *testing.T) {
	variants := [...]string{
		"\u2019xxx\u2019",
		"\u2018xxx\u2018",
		"\u201Bxxx\u201B",
		"’xxx’",
		"‘xxx‘",
		"‛xxx‛",
	}
	for _, v := range variants {
		w := snowballword.New(v)
		normalizeApostrophes(w)
		if w.String() != "'xxx'" {
			t.Errorf("Expected \"'xxx'\", not \"%v\"", w.String())
		}
	}
}
Exemplo n.º 10
0
func Test_vnvSuffix(t *testing.T) {
	var wordTests = []struct {
		word  string
		start int
		pos   int
	}{
		{"crepuscular", 0, 4},
		{"uscular", 0, 2},
	}
	for _, tc := range wordTests {
		w := snowballword.New(tc.word)
		pos := romance.VnvSuffix(w, isLowerVowel, tc.start)
		if pos != tc.pos {
			t.Errorf("Expected %v, but got %v", tc.pos, pos)
		}
	}
}
Exemplo n.º 11
0
func Test_preprocess(t *testing.T) {
	var wordTests = []struct {
		in  string
		out string
	}{
		{"arguing", "arguing"},
		{"'catty", "catty"},
		{"kyle’s", "kyle's"},
		{"toy", "toY"},
	}
	for _, wt := range wordTests {
		w := snowballword.New(wt.in)
		preprocess(w)
		if w.String() != wt.out {
			t.Errorf("Expected \"%v\", not \"%v\"", wt.out, w.String())
		}
	}
}
Exemplo n.º 12
0
// Stem an Russian word.  This is the only exported
// function in this package.
//
func Stem(word string, stemStopwWords bool) string {

	word = strings.ToLower(strings.TrimSpace(word))
	w := snowballword.New(word)

	// Return small words and stop words
	if len(w.RS) <= 2 || (stemStopwWords == false && isStopWord(word)) {
		return word
	}

	preprocess(w)
	step1(w)
	step2(w)
	step3(w)
	step4(w)
	return w.String()

}
Exemplo n.º 13
0
// Test capitalization of vowels acting as non-vowels.
//
func Test_capitalizeYUI(t *testing.T) {
	testCases := []struct {
		wordIn  string
		wordOut string
	}{
		{"jouer", "joUer"},
		{"ennuie", "ennuIe"},
		{"yeux", "Yeux"},
		{"quand", "qUand"},
	}

	for _, testCase := range testCases {
		w := snowballword.New(testCase.wordIn)
		capitalizeYUI(w)
		if w.String() != testCase.wordOut {
			t.Errorf("Expect %v -> %v, but got %v", testCase.wordIn, testCase.wordOut, w.String())
		}
	}
}
Exemplo n.º 14
0
func Test_isShortWord(t *testing.T) {
	var testCases = []struct {
		word    string
		isShort bool
	}{
		{"bed", true},
		{"shed", true},
		{"shred", true},
		{"bead", false},
		{"embed", false},
		{"beds", false},
	}
	for _, testCase := range testCases {
		w := snowballword.New(testCase.word)
		r1start, r2start := r1r2(w)
		w.R1start = r1start
		w.R2start = r2start
		isShort := isShortWord(w)
		if isShort != testCase.isShort {
			t.Errorf("Expected %v, but got %v for \"{%v, %v}\"", testCase.isShort, isShort, testCase.word, w.R1String())
		}
	}
}
Exemplo n.º 15
0
func Test_endsShortSyllable(t *testing.T) {
	var testCases = []struct {
		word   string
		pos    int
		result bool
	}{
		{"absolute", 7, true},
		{"ape", 2, true},
		{"rap", 3, true},
		{"trap", 4, true},
		{"entrap", 6, true},
		{"uproot", 6, false},
		{"bestow", 6, false},
		{"disturb", 7, false},
	}
	for _, testCase := range testCases {
		w := snowballword.New(testCase.word)
		result := endsShortSyllable(w, testCase.pos)
		if result != testCase.result {
			t.Errorf("Expected endsShortSyllable(%v, %v) to return %v, not %v", testCase.word, testCase.pos, testCase.result, result)
		}
	}

}