Ejemplo n.º 1
0
// Remove adjectival endings and return true if one was removed.
//
func removeAdjectivalEnding(word *snowballword.SnowballWord) bool {

	// Remove adjectival endings.  Start by looking for
	// an adjective ending.
	//
	suffix, _ := word.RemoveFirstSuffixIn(word.RVstart,
		"ими", "ыми", "его", "ого", "ему", "ому", "ее", "ие",
		"ые", "ое", "ей", "ий", "ый", "ой", "ем", "им", "ым",
		"ом", "их", "ых", "ую", "юю", "ая", "яя", "ою", "ею",
	)
	if suffix != "" {

		// We found an adjective ending.  Remove optional participle endings.
		//
		newSuffix, newSuffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS),
			"ивш", "ывш", "ующ",
			"ем", "нн", "вш", "ющ", "щ",
		)
		switch newSuffix {
		case "ем", "нн", "вш", "ющ", "щ":

			// These are "Group 1" participle endings.
			// Group 1 endings must follow а (a) or я (ia) in RV.
			if precededByARinRV(word, len(newSuffixRunes)) == false {
				newSuffix = ""
			}
		}

		if newSuffix != "" {
			word.RemoveLastNRunes(len(newSuffixRunes))
		}
		return true
	}
	return false
}
Ejemplo n.º 2
0
// Remove verb endings and return true if one was removed.
//
func removeVerbEnding(word *snowballword.SnowballWord) bool {
	suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS),
		"уйте", "ейте", "ыть", "ыло", "ыли", "ыла", "уют", "ует",
		"нно", "йте", "ишь", "ить", "ите", "ило", "или", "ила",
		"ешь", "ете", "ены", "ено", "ена", "ят", "ют", "ыт", "ым",
		"ыл", "ую", "уй", "ть", "ны", "но", "на", "ло", "ли", "ла",
		"ит", "им", "ил", "ет", "ен", "ем", "ей", "ю", "н", "л", "й",
	)
	switch suffix {
	case "ла", "на", "ете", "йте", "ли", "й", "л", "ем", "н",
		"ло", "но", "ет", "ют", "ны", "ть", "ешь", "нно":

		// These are "Group 1" verb endings.
		// Group 1 endings must follow а (a) or я (ia) in RV.
		if precededByARinRV(word, len(suffixRunes)) == false {
			suffix = ""
		}

	}

	if suffix != "" {
		word.RemoveLastNRunes(len(suffixRunes))
		return true
	}
	return false
}
Ejemplo n.º 3
0
// Step 2a is the removal of verb suffixes beginning y,
// Search for the longest among the following suffixes
// in RV, and if found, delete if preceded by u.
//
func step2a(word *snowballword.SnowballWord) bool {
	suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS), "ya", "ye", "yan", "yen", "yeron", "yendo", "yo", "yó", "yas", "yes", "yais", "yamos")
	if suffix != "" {
		idx := len(word.RS) - len(suffixRunes) - 1
		if idx >= 0 && word.RS[idx] == 117 {
			word.RemoveLastNRunes(len(suffixRunes))
			return true
		}
	}
	return false
}
Ejemplo n.º 4
0
// Step 2b is the removal of Verb suffixes in RV
// that do not begin with "i".
//
func step2b(word *snowballword.SnowballWord) bool {

	// Search for the longest among the following suffixes in RV.
	//
	suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS),
		"eraIent", "assions", "erions", "assiez", "assent",
		"èrent", "eront", "erons", "eriez", "erait", "erais",
		"asses", "antes", "aIent", "âtes", "âmes", "ions",
		"erez", "eras", "erai", "asse", "ants", "ante", "ées",
		"iez", "era", "ant", "ait", "ais", "és", "ée", "ât",
		"ez", "er", "as", "ai", "é", "a",
	)

	switch suffix {
	case "ions":

		// Delete if in R2
		suffixLen := len(suffixRunes)
		if word.FitsInR2(suffixLen) {
			word.RemoveLastNRunes(suffixLen)
			return true
		}
		return false

	case "é", "ée", "ées", "és", "èrent", "er", "era",
		"erai", "eraIent", "erais", "erait", "eras", "erez",
		"eriez", "erions", "erons", "eront", "ez", "iez":

		// Delete
		word.RemoveLastNRunes(len(suffixRunes))
		return true

	case "âmes", "ât", "âtes", "a", "ai", "aIent",
		"ais", "ait", "ant", "ante", "antes", "ants", "as",
		"asse", "assent", "asses", "assiez", "assions":

		// Delete
		word.RemoveLastNRunes(len(suffixRunes))

		// If preceded by e (unicode code point 101), delete
		//
		idx := len(word.RS) - 1
		if idx >= 0 && word.RS[idx] == 101 && word.FitsInRV(1) {
			word.RemoveLastNRunes(1)
		}
		return true

	}
	return false
}
Ejemplo n.º 5
0
// Remove perfective gerund endings and return true if one was removed.
//
func removePerfectiveGerundEnding(word *snowballword.SnowballWord) bool {
	suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS),
		"ившись", "ывшись", "вшись", "ивши", "ывши", "вши", "ив", "ыв", "в",
	)
	switch suffix {
	case "в", "вши", "вшись":

		// These are "Group 1" perfective gerund endings.
		// Group 1 endings must follow а (a) or я (ia) in RV.
		if precededByARinRV(word, len(suffixRunes)) == false {
			suffix = ""
		}

	}

	if suffix != "" {
		word.RemoveLastNRunes(len(suffixRunes))
		return true
	}
	return false
}
Ejemplo n.º 6
0
// Step 2a is the removal of Verb suffixes beginning
// with "i" in the RV region.
//
func step2a(word *snowballword.SnowballWord) bool {

	// Search for the longest among the following suffixes
	// in RV and if found, delete if preceded by a non-vowel.

	suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS),
		"issantes", "issaIent", "issions", "issants", "issante",
		"iraIent", "issons", "issiez", "issent", "issant", "issait",
		"issais", "irions", "issez", "isses", "iront", "irons", "iriez",
		"irent", "irait", "irais", "îtes", "îmes", "isse", "irez",
		"iras", "irai", "ira", "ies", "ît", "it", "is", "ir", "ie", "i",
	)
	if suffix != "" {
		sLen := len(suffixRunes)
		idx := len(word.RS) - sLen - 1
		if idx >= 0 && word.FitsInRV(sLen+1) && isLowerVowel(word.RS[idx]) == false {
			word.RemoveLastNRunes(len(suffixRunes))
			return true
		}
	}
	return false
}
Ejemplo n.º 7
0
// Step 2b is the removal of verb suffixes beginning y,
// Search for the longest among the following suffixes
// in RV, and if found, delete if preceded by u.
//
func step2b(word *snowballword.SnowballWord) bool {
	suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS),
		"iésemos", "iéramos", "iríamos", "eríamos", "aríamos", "ásemos",
		"áramos", "ábamos", "isteis", "iríais", "iremos", "ieseis",
		"ierais", "eríais", "eremos", "asteis", "aríais", "aremos",
		"íamos", "irías", "irían", "iréis", "ieses", "iesen", "ieron",
		"ieras", "ieran", "iendo", "erías", "erían", "eréis", "aseis",
		"arías", "arían", "aréis", "arais", "abais", "íais", "iste",
		"iría", "irás", "irán", "imos", "iese", "iera", "idos", "idas",
		"ería", "erás", "erán", "aste", "ases", "asen", "aría", "arás",
		"arán", "aron", "aras", "aran", "ando", "amos", "ados", "adas",
		"abas", "aban", "ías", "ían", "éis", "áis", "iré", "irá", "ido",
		"ida", "eré", "erá", "emos", "ase", "aré", "ará", "ara", "ado",
		"ada", "aba", "ís", "ía", "ió", "ir", "id", "es", "er", "en",
		"ed", "as", "ar", "an", "ad",
	)
	switch suffix {
	case "":
		return false

	case "en", "es", "éis", "emos":

		// Delete, and if preceded by gu delete the u (the gu need not be in RV)
		word.RemoveLastNRunes(len(suffixRunes))
		guSuffix, _ := word.FirstSuffix("gu")
		if guSuffix != "" {
			word.RemoveLastNRunes(1)
		}

	default:

		// Delete
		word.RemoveLastNRunes(len(suffixRunes))
	}
	return true
}
Ejemplo n.º 8
0
// Step 4 is the cleaning up of residual suffixes.
//
func step4(word *snowballword.SnowballWord) bool {

	hadChange := false

	if word.String() == "voudrion" {
		log.Println("...", word)
	}

	// If the word ends s (unicode code point 115),
	// not preceded by a, i, o, u, è or s, delete it.
	//
	if idx := len(word.RS) - 1; idx >= 1 && word.RS[idx] == 115 {
		switch word.RS[idx-1] {

		case 97, 105, 111, 117, 232, 115:

			// Do nothing, preceded by a, i, o, u, è or s
			return false

		default:
			word.RemoveLastNRunes(1)
			hadChange = true

		}
	}

	// Note: all the following are restricted to the RV region.

	// Search for the longest among the following suffixes in RV.
	//
	suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS),
		"Ière", "ière", "Ier", "ier", "ion", "e", "ë",
	)

	switch suffix {
	case "":
		return hadChange
	case "ion":

		// Delete if in R2 and preceded by s or t in RV

		const sLen int = 3 // equivalently, len(suffixRunes)
		idx := len(word.RS) - sLen - 1
		if word.FitsInR2(sLen) && idx >= 0 && word.FitsInRV(sLen+1) {
			if word.RS[idx] == 115 || word.RS[idx] == 116 {
				word.RemoveLastNRunes(sLen)
				return true
			}
		}
		return hadChange

	case "ier", "ière", "Ier", "Ière":
		// Replace with i
		word.ReplaceSuffixRunes(suffixRunes, []rune("i"), true)
		return true

	case "e":
		word.RemoveLastNRunes(1)
		return true

	case "ë":

		// If preceded by gu (unicode code point 103 & 117), delete
		idx := len(word.RS) - 1
		if idx >= 2 && word.RS[idx-2] == 103 && word.RS[idx-1] == 117 {
			word.RemoveLastNRunes(1)
			return true
		}
		return hadChange
	}

	return true
}
Ejemplo n.º 9
0
// Step 0 is the removal of attached pronouns
//
func step0(word *snowballword.SnowballWord) bool {

	// Search for the longest among the following suffixes
	suffix1, suffix1Runes := word.FirstSuffixIn(word.RVstart, len(word.RS),
		"selas", "selos", "sela", "selo", "las", "les",
		"los", "nos", "me", "se", "la", "le", "lo",
	)

	// If the suffix empty or not in RV, we have nothing to do.
	if suffix1 == "" {
		return false
	}

	// We'll remove suffix1, if comes after one of the following
	suffix2, suffix2Runes := word.FirstSuffixIn(word.RVstart, len(word.RS)-len(suffix1),
		"iéndo", "iendo", "yendo", "ando", "ándo",
		"ár", "ér", "ír", "ar", "er", "ir",
	)
	switch suffix2 {
	case "":

		// Nothing to do
		return false

	case "iéndo", "ándo", "ár", "ér", "ír":

		// In these cases, deletion is followed by removing
		// the acute accent (e.g., haciéndola -> haciendo).

		var suffix2repl string
		switch suffix2 {
		case "":
			return false
		case "iéndo":
			suffix2repl = "iendo"
		case "ándo":
			suffix2repl = "ando"
		case "ár":
			suffix2repl = "ar"
		case "ír":
			suffix2repl = "ir"
		}
		word.RemoveLastNRunes(len(suffix1Runes))
		word.ReplaceSuffixRunes(suffix2Runes, []rune(suffix2repl), true)
		return true

	case "ando", "iendo", "ar", "er", "ir":
		word.RemoveLastNRunes(len(suffix1Runes))
		return true

	case "yendo":

		// In the case of "yendo", the "yendo" must lie in RV,
		// and be preceded by a "u" somewhere in the word.

		for i := 0; i < len(word.RS)-(len(suffix1)+len(suffix2)); i++ {

			// Note, the unicode code point for "u" is 117.
			if word.RS[i] == 117 {
				word.RemoveLastNRunes(len(suffix1Runes))
				return true
			}
		}
	}
	return false
}