// Remove adjectival endings and return true if one was removed. // func removeAdjectivalEnding(word *snowballword.SnowballWord) bool { // Remove adjectival endings. Start by looking for // an adjective ending. // suffix, _ := word.RemoveFirstSuffixIn(word.RVstart, "ими", "ыми", "его", "ого", "ему", "ому", "ее", "ие", "ые", "ое", "ей", "ий", "ый", "ой", "ем", "им", "ым", "ом", "их", "ых", "ую", "юю", "ая", "яя", "ою", "ею", ) if suffix != "" { // We found an adjective ending. Remove optional participle endings. // newSuffix, newSuffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS), "ивш", "ывш", "ующ", "ем", "нн", "вш", "ющ", "щ", ) switch newSuffix { case "ем", "нн", "вш", "ющ", "щ": // These are "Group 1" participle endings. // Group 1 endings must follow а (a) or я (ia) in RV. if precededByARinRV(word, len(newSuffixRunes)) == false { newSuffix = "" } } if newSuffix != "" { word.RemoveLastNRunes(len(newSuffixRunes)) } return true } return false }
// Remove verb endings and return true if one was removed. // func removeVerbEnding(word *snowballword.SnowballWord) bool { suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS), "уйте", "ейте", "ыть", "ыло", "ыли", "ыла", "уют", "ует", "нно", "йте", "ишь", "ить", "ите", "ило", "или", "ила", "ешь", "ете", "ены", "ено", "ена", "ят", "ют", "ыт", "ым", "ыл", "ую", "уй", "ть", "ны", "но", "на", "ло", "ли", "ла", "ит", "им", "ил", "ет", "ен", "ем", "ей", "ю", "н", "л", "й", ) switch suffix { case "ла", "на", "ете", "йте", "ли", "й", "л", "ем", "н", "ло", "но", "ет", "ют", "ны", "ть", "ешь", "нно": // These are "Group 1" verb endings. // Group 1 endings must follow а (a) or я (ia) in RV. if precededByARinRV(word, len(suffixRunes)) == false { suffix = "" } } if suffix != "" { word.RemoveLastNRunes(len(suffixRunes)) return true } return false }
// Step 2a is the removal of verb suffixes beginning y, // Search for the longest among the following suffixes // in RV, and if found, delete if preceded by u. // func step2a(word *snowballword.SnowballWord) bool { suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS), "ya", "ye", "yan", "yen", "yeron", "yendo", "yo", "yó", "yas", "yes", "yais", "yamos") if suffix != "" { idx := len(word.RS) - len(suffixRunes) - 1 if idx >= 0 && word.RS[idx] == 117 { word.RemoveLastNRunes(len(suffixRunes)) return true } } return false }
// Step 2b is the removal of Verb suffixes in RV // that do not begin with "i". // func step2b(word *snowballword.SnowballWord) bool { // Search for the longest among the following suffixes in RV. // suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS), "eraIent", "assions", "erions", "assiez", "assent", "èrent", "eront", "erons", "eriez", "erait", "erais", "asses", "antes", "aIent", "âtes", "âmes", "ions", "erez", "eras", "erai", "asse", "ants", "ante", "ées", "iez", "era", "ant", "ait", "ais", "és", "ée", "ât", "ez", "er", "as", "ai", "é", "a", ) switch suffix { case "ions": // Delete if in R2 suffixLen := len(suffixRunes) if word.FitsInR2(suffixLen) { word.RemoveLastNRunes(suffixLen) return true } return false case "é", "ée", "ées", "és", "èrent", "er", "era", "erai", "eraIent", "erais", "erait", "eras", "erez", "eriez", "erions", "erons", "eront", "ez", "iez": // Delete word.RemoveLastNRunes(len(suffixRunes)) return true case "âmes", "ât", "âtes", "a", "ai", "aIent", "ais", "ait", "ant", "ante", "antes", "ants", "as", "asse", "assent", "asses", "assiez", "assions": // Delete word.RemoveLastNRunes(len(suffixRunes)) // If preceded by e (unicode code point 101), delete // idx := len(word.RS) - 1 if idx >= 0 && word.RS[idx] == 101 && word.FitsInRV(1) { word.RemoveLastNRunes(1) } return true } return false }
// Remove perfective gerund endings and return true if one was removed. // func removePerfectiveGerundEnding(word *snowballword.SnowballWord) bool { suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS), "ившись", "ывшись", "вшись", "ивши", "ывши", "вши", "ив", "ыв", "в", ) switch suffix { case "в", "вши", "вшись": // These are "Group 1" perfective gerund endings. // Group 1 endings must follow а (a) or я (ia) in RV. if precededByARinRV(word, len(suffixRunes)) == false { suffix = "" } } if suffix != "" { word.RemoveLastNRunes(len(suffixRunes)) return true } return false }
// Step 2a is the removal of Verb suffixes beginning // with "i" in the RV region. // func step2a(word *snowballword.SnowballWord) bool { // Search for the longest among the following suffixes // in RV and if found, delete if preceded by a non-vowel. suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS), "issantes", "issaIent", "issions", "issants", "issante", "iraIent", "issons", "issiez", "issent", "issant", "issait", "issais", "irions", "issez", "isses", "iront", "irons", "iriez", "irent", "irait", "irais", "îtes", "îmes", "isse", "irez", "iras", "irai", "ira", "ies", "ît", "it", "is", "ir", "ie", "i", ) if suffix != "" { sLen := len(suffixRunes) idx := len(word.RS) - sLen - 1 if idx >= 0 && word.FitsInRV(sLen+1) && isLowerVowel(word.RS[idx]) == false { word.RemoveLastNRunes(len(suffixRunes)) return true } } return false }
// Step 2b is the removal of verb suffixes beginning y, // Search for the longest among the following suffixes // in RV, and if found, delete if preceded by u. // func step2b(word *snowballword.SnowballWord) bool { suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS), "iésemos", "iéramos", "iríamos", "eríamos", "aríamos", "ásemos", "áramos", "ábamos", "isteis", "iríais", "iremos", "ieseis", "ierais", "eríais", "eremos", "asteis", "aríais", "aremos", "íamos", "irías", "irían", "iréis", "ieses", "iesen", "ieron", "ieras", "ieran", "iendo", "erías", "erían", "eréis", "aseis", "arías", "arían", "aréis", "arais", "abais", "íais", "iste", "iría", "irás", "irán", "imos", "iese", "iera", "idos", "idas", "ería", "erás", "erán", "aste", "ases", "asen", "aría", "arás", "arán", "aron", "aras", "aran", "ando", "amos", "ados", "adas", "abas", "aban", "ías", "ían", "éis", "áis", "iré", "irá", "ido", "ida", "eré", "erá", "emos", "ase", "aré", "ará", "ara", "ado", "ada", "aba", "ís", "ía", "ió", "ir", "id", "es", "er", "en", "ed", "as", "ar", "an", "ad", ) switch suffix { case "": return false case "en", "es", "éis", "emos": // Delete, and if preceded by gu delete the u (the gu need not be in RV) word.RemoveLastNRunes(len(suffixRunes)) guSuffix, _ := word.FirstSuffix("gu") if guSuffix != "" { word.RemoveLastNRunes(1) } default: // Delete word.RemoveLastNRunes(len(suffixRunes)) } return true }
// Step 4 is the cleaning up of residual suffixes. // func step4(word *snowballword.SnowballWord) bool { hadChange := false if word.String() == "voudrion" { log.Println("...", word) } // If the word ends s (unicode code point 115), // not preceded by a, i, o, u, è or s, delete it. // if idx := len(word.RS) - 1; idx >= 1 && word.RS[idx] == 115 { switch word.RS[idx-1] { case 97, 105, 111, 117, 232, 115: // Do nothing, preceded by a, i, o, u, è or s return false default: word.RemoveLastNRunes(1) hadChange = true } } // Note: all the following are restricted to the RV region. // Search for the longest among the following suffixes in RV. // suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS), "Ière", "ière", "Ier", "ier", "ion", "e", "ë", ) switch suffix { case "": return hadChange case "ion": // Delete if in R2 and preceded by s or t in RV const sLen int = 3 // equivalently, len(suffixRunes) idx := len(word.RS) - sLen - 1 if word.FitsInR2(sLen) && idx >= 0 && word.FitsInRV(sLen+1) { if word.RS[idx] == 115 || word.RS[idx] == 116 { word.RemoveLastNRunes(sLen) return true } } return hadChange case "ier", "ière", "Ier", "Ière": // Replace with i word.ReplaceSuffixRunes(suffixRunes, []rune("i"), true) return true case "e": word.RemoveLastNRunes(1) return true case "ë": // If preceded by gu (unicode code point 103 & 117), delete idx := len(word.RS) - 1 if idx >= 2 && word.RS[idx-2] == 103 && word.RS[idx-1] == 117 { word.RemoveLastNRunes(1) return true } return hadChange } return true }
// Step 0 is the removal of attached pronouns // func step0(word *snowballword.SnowballWord) bool { // Search for the longest among the following suffixes suffix1, suffix1Runes := word.FirstSuffixIn(word.RVstart, len(word.RS), "selas", "selos", "sela", "selo", "las", "les", "los", "nos", "me", "se", "la", "le", "lo", ) // If the suffix empty or not in RV, we have nothing to do. if suffix1 == "" { return false } // We'll remove suffix1, if comes after one of the following suffix2, suffix2Runes := word.FirstSuffixIn(word.RVstart, len(word.RS)-len(suffix1), "iéndo", "iendo", "yendo", "ando", "ándo", "ár", "ér", "ír", "ar", "er", "ir", ) switch suffix2 { case "": // Nothing to do return false case "iéndo", "ándo", "ár", "ér", "ír": // In these cases, deletion is followed by removing // the acute accent (e.g., haciéndola -> haciendo). var suffix2repl string switch suffix2 { case "": return false case "iéndo": suffix2repl = "iendo" case "ándo": suffix2repl = "ando" case "ár": suffix2repl = "ar" case "ír": suffix2repl = "ir" } word.RemoveLastNRunes(len(suffix1Runes)) word.ReplaceSuffixRunes(suffix2Runes, []rune(suffix2repl), true) return true case "ando", "iendo", "ar", "er", "ir": word.RemoveLastNRunes(len(suffix1Runes)) return true case "yendo": // In the case of "yendo", the "yendo" must lie in RV, // and be preceded by a "u" somewhere in the word. for i := 0; i < len(word.RS)-(len(suffix1)+len(suffix2)); i++ { // Note, the unicode code point for "u" is 117. if word.RS[i] == 117 { word.RemoveLastNRunes(len(suffix1Runes)) return true } } } return false }