예제 #1
0
파일: common.go 프로젝트: kljensen/snowball
// Find the starting point of the regions R1, R2, & RV
//
func findRegions(word *snowballword.SnowballWord) (r1start, r2start, rvstart int) {

	// R1 & R2 are defined in the standard manner.
	r1start = romance.VnvSuffix(word, isLowerVowel, 0)
	r2start = romance.VnvSuffix(word, isLowerVowel, r1start)

	// Set RV, by default, as empty.
	rvstart = len(word.RS)

	// Handle the three special cases: "par", "col", & "tap"
	//
	prefix, prefixRunes := word.FirstPrefix("par", "col", "tap")
	if prefix != "" {
		rvstart = len(prefixRunes)
		return
	}

	// If the word begins with two vowels, RV is the region after the third letter
	if len(word.RS) >= 3 && isLowerVowel(word.RS[0]) && isLowerVowel(word.RS[1]) {
		rvstart = 3
		return
	}

	// Otherwise the region after the first vowel not at the beginning of the word.
	for i := 1; i < len(word.RS); i++ {
		if isLowerVowel(word.RS[i]) {
			rvstart = i + 1
			return
		}
	}

	return
}
예제 #2
0
파일: common.go 프로젝트: kljensen/snowball
// Find the starting point of the two regions R1 & R2.
//
// R1 is the region after the first non-vowel following a vowel,
// or is the null region at the end of the word if there is no
// such non-vowel.
//
// R2 is the region after the first non-vowel following a vowel
// in R1, or is the null region at the end of the word if there
// is no such non-vowel.
//
// See http://snowball.tartarus.org/texts/r1r2.html
//
func r1r2(word *snowballword.SnowballWord) (r1start, r2start int) {

	specialPrefix, _ := word.FirstPrefix("gener", "commun", "arsen")

	if specialPrefix != "" {
		r1start = len(specialPrefix)
	} else {
		r1start = romance.VnvSuffix(word, isLowerVowel, 0)
	}
	r2start = romance.VnvSuffix(word, isLowerVowel, r1start)
	return
}
예제 #3
0
파일: common.go 프로젝트: kljensen/snowball
// Find the starting point of the regions R1, R2, & RV
//
func findRegions(word *snowballword.SnowballWord) (r1start, r2start, rvstart int) {

	r1start = romance.VnvSuffix(word, isLowerVowel, 0)
	r2start = romance.VnvSuffix(word, isLowerVowel, r1start)
	rvstart = len(word.RS)

	if len(word.RS) >= 3 {
		switch {

		case !isLowerVowel(word.RS[1]):

			// If the second letter is a consonant, RV is the region after the
			// next following vowel.
			for i := 2; i < len(word.RS); i++ {
				if isLowerVowel(word.RS[i]) {
					rvstart = i + 1
					break
				}
			}

		case isLowerVowel(word.RS[0]) && isLowerVowel(word.RS[1]):

			// Or if the first two letters are vowels, RV
			// is the region after the next consonant.
			for i := 2; i < len(word.RS); i++ {
				if !isLowerVowel(word.RS[i]) {
					rvstart = i + 1
					break
				}
			}
		default:

			// Otherwise (consonant-vowel case) RV is the region after the
			// third letter. But RV is the end of the word if these
			// positions cannot be found.
			rvstart = 3
		}
	}

	return
}
예제 #4
0
파일: common.go 프로젝트: kljensen/snowball
// Find the starting point of the regions R1, R2, & RV
//
func findRegions(word *snowballword.SnowballWord) (r1start, r2start, rvstart int) {

	// R1 & R2 are defined in the standard manner.
	r1start = romance.VnvSuffix(word, isLowerVowel, 0)
	r2start = romance.VnvSuffix(word, isLowerVowel, r1start)

	// Set RV, by default, as empty.
	rvstart = len(word.RS)

	// RV is the region after the first vowel, or the end of
	// the word if it contains no vowel.
	//
	for i := 0; i < len(word.RS); i++ {
		if isLowerVowel(word.RS[i]) {
			rvstart = i + 1
			break
		}
	}

	return
}
예제 #5
0
func Test_vnvSuffix(t *testing.T) {
	var wordTests = []struct {
		word  string
		start int
		pos   int
	}{
		{"crepuscular", 0, 4},
		{"uscular", 0, 2},
	}
	for _, tc := range wordTests {
		w := snowballword.New(tc.word)
		pos := romance.VnvSuffix(w, isLowerVowel, tc.start)
		if pos != tc.pos {
			t.Errorf("Expected %v, but got %v", tc.pos, pos)
		}
	}
}