// Step 2b is the removal of Verb suffixes in RV // that do not begin with "i". // func step2b(word *snowballword.SnowballWord) bool { // Search for the longest among the following suffixes in RV. // suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS), "eraIent", "assions", "erions", "assiez", "assent", "èrent", "eront", "erons", "eriez", "erait", "erais", "asses", "antes", "aIent", "âtes", "âmes", "ions", "erez", "eras", "erai", "asse", "ants", "ante", "ées", "iez", "era", "ant", "ait", "ais", "és", "ée", "ât", "ez", "er", "as", "ai", "é", "a", ) switch suffix { case "ions": // Delete if in R2 suffixLen := len(suffixRunes) if word.FitsInR2(suffixLen) { word.RemoveLastNRunes(suffixLen) return true } return false case "é", "ée", "ées", "és", "èrent", "er", "era", "erai", "eraIent", "erais", "erait", "eras", "erez", "eriez", "erions", "erons", "eront", "ez", "iez": // Delete word.RemoveLastNRunes(len(suffixRunes)) return true case "âmes", "ât", "âtes", "a", "ai", "aIent", "ais", "ait", "ant", "ante", "antes", "ants", "as", "asse", "assent", "asses", "assiez", "assions": // Delete word.RemoveLastNRunes(len(suffixRunes)) // If preceded by e (unicode code point 101), delete // idx := len(word.RS) - 1 if idx >= 0 && word.RS[idx] == 101 && word.FitsInRV(1) { word.RemoveLastNRunes(1) } return true } return false }
// Step 2a is the removal of Verb suffixes beginning // with "i" in the RV region. // func step2a(word *snowballword.SnowballWord) bool { // Search for the longest among the following suffixes // in RV and if found, delete if preceded by a non-vowel. suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS), "issantes", "issaIent", "issions", "issants", "issante", "iraIent", "issons", "issiez", "issent", "issant", "issait", "issais", "irions", "issez", "isses", "iront", "irons", "iriez", "irent", "irait", "irais", "îtes", "îmes", "isse", "irez", "iras", "irai", "ira", "ies", "ît", "it", "is", "ir", "ie", "i", ) if suffix != "" { sLen := len(suffixRunes) idx := len(word.RS) - sLen - 1 if idx >= 0 && word.FitsInRV(sLen+1) && isLowerVowel(word.RS[idx]) == false { word.RemoveLastNRunes(len(suffixRunes)) return true } } return false }
// Step 4 is the cleaning up of residual suffixes. // func step4(word *snowballword.SnowballWord) bool { hadChange := false if word.String() == "voudrion" { log.Println("...", word) } // If the word ends s (unicode code point 115), // not preceded by a, i, o, u, è or s, delete it. // if idx := len(word.RS) - 1; idx >= 1 && word.RS[idx] == 115 { switch word.RS[idx-1] { case 97, 105, 111, 117, 232, 115: // Do nothing, preceded by a, i, o, u, è or s return false default: word.RemoveLastNRunes(1) hadChange = true } } // Note: all the following are restricted to the RV region. // Search for the longest among the following suffixes in RV. // suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS), "Ière", "ière", "Ier", "ier", "ion", "e", "ë", ) switch suffix { case "": return hadChange case "ion": // Delete if in R2 and preceded by s or t in RV const sLen int = 3 // equivalently, len(suffixRunes) idx := len(word.RS) - sLen - 1 if word.FitsInR2(sLen) && idx >= 0 && word.FitsInRV(sLen+1) { if word.RS[idx] == 115 || word.RS[idx] == 116 { word.RemoveLastNRunes(sLen) return true } } return hadChange case "ier", "ière", "Ier", "Ière": // Replace with i word.ReplaceSuffixRunes(suffixRunes, []rune("i"), true) return true case "e": word.RemoveLastNRunes(1) return true case "ë": // If preceded by gu (unicode code point 103 & 117), delete idx := len(word.RS) - 1 if idx >= 2 && word.RS[idx-2] == 103 && word.RS[idx-1] == 117 { word.RemoveLastNRunes(1) return true } return hadChange } return true }
// Step 1 is the removal of standard suffixes // func step1(word *snowballword.SnowballWord) bool { suffix, suffixRunes := word.FirstSuffix( "issements", "issement", "atrices", "utions", "usions", "logies", "emment", "ements", "atrice", "ations", "ateurs", "amment", "ution", "usion", "ments", "logie", "istes", "ismes", "iqUes", "euses", "ences", "ement", "ation", "ateur", "ances", "ables", "ment", "ités", "iste", "isme", "iqUe", "euse", "ence", "eaux", "ance", "able", "ives", "ité", "eux", "aux", "ive", "ifs", "if", ) if suffix == "" { return false } isInR1 := (word.R1start <= len(word.RS)-len(suffixRunes)) isInR2 := (word.R2start <= len(word.RS)-len(suffixRunes)) isInRV := (word.RVstart <= len(word.RS)-len(suffixRunes)) // Handle simple replacements & deletions in R2 first if isInR2 { // Handle simple replacements in R2 repl := "" switch suffix { case "logie", "logies": repl = "log" case "usion", "ution", "usions", "utions": repl = "u" case "ence", "ences": repl = "ent" } if repl != "" { word.ReplaceSuffixRunes(suffixRunes, []rune(repl), true) return true } // Handle simple deletions in R2 switch suffix { case "ance", "iqUe", "isme", "able", "iste", "eux", "ances", "iqUes", "ismes", "ables", "istes": word.RemoveLastNRunes(len(suffixRunes)) return true } } // Handle simple replacements in RV if isInRV { // NOTE: these are "special" suffixes in that // we must still do steps 2a and 2b of the // French stemmer even when these suffixes are // found in step1. Therefore, we are returning // `false` here. repl := "" switch suffix { case "amment": repl = "ant" case "emment": repl = "ent" } if repl != "" { word.ReplaceSuffixRunes(suffixRunes, []rune(repl), true) return false } // Delete if preceded by a vowel that is also in RV if suffix == "ment" || suffix == "ments" { idx := len(word.RS) - len(suffixRunes) - 1 if idx >= word.RVstart && isLowerVowel(word.RS[idx]) { word.RemoveLastNRunes(len(suffixRunes)) return false } return false } } // Handle all the other "special" cases. All of these // return true immediately after changing the word. // switch suffix { case "eaux": // Replace with eau word.ReplaceSuffixRunes(suffixRunes, []rune("eau"), true) return true case "aux": // Replace with al if in R1 if isInR1 { word.ReplaceSuffixRunes(suffixRunes, []rune("al"), true) return true } case "euse", "euses": // Delete if in R2, else replace by eux if in R1 if isInR2 { word.RemoveLastNRunes(len(suffixRunes)) return true } else if isInR1 { word.ReplaceSuffixRunes(suffixRunes, []rune("eux"), true) return true } case "issement", "issements": // Delete if in R1 and preceded by a non-vowel if isInR1 { idx := len(word.RS) - len(suffixRunes) - 1 if idx >= 0 && isLowerVowel(word.RS[idx]) == false { word.RemoveLastNRunes(len(suffixRunes)) return true } } return false case "atrice", "ateur", "ation", "atrices", "ateurs", "ations": // Delete if in R2 if isInR2 { word.RemoveLastNRunes(len(suffixRunes)) // If preceded by "ic", delete if in R2, else replace by "iqU". newSuffix, newSuffixRunes := word.FirstSuffix("ic") if newSuffix != "" { if word.FitsInR2(len(newSuffixRunes)) { word.RemoveLastNRunes(len(newSuffixRunes)) } else { word.ReplaceSuffixRunes(newSuffixRunes, []rune("iqU"), true) } } return true } case "ement", "ements": if isInRV { // Delete if in RV word.RemoveLastNRunes(len(suffixRunes)) // If preceded by "iv", delete if in R2 // (and if further preceded by "at", delete if in R2) newSuffix, newSuffixRunes := word.RemoveFirstSuffixIfIn(word.R2start, "iv") if newSuffix != "" { word.RemoveFirstSuffixIfIn(word.R2start, "at") return true } // If preceded by "eus", delete if in R2, else replace by "eux" if in R1 newSuffix, newSuffixRunes = word.FirstSuffix("eus") if newSuffix != "" { newSuffixLen := len(newSuffixRunes) if word.FitsInR2(newSuffixLen) { word.RemoveLastNRunes(newSuffixLen) } else if word.FitsInR1(newSuffixLen) { word.ReplaceSuffixRunes(newSuffixRunes, []rune("eux"), true) } return true } // If preceded by abl or iqU, delete if in R2, otherwise, newSuffix, newSuffixRunes = word.FirstSuffix("abl", "iqU") if newSuffix != "" { newSuffixLen := len(newSuffixRunes) if word.FitsInR2(newSuffixLen) { word.RemoveLastNRunes(newSuffixLen) } return true } // If preceded by ièr or Ièr, replace by i if in RV newSuffix, newSuffixRunes = word.FirstSuffix("ièr", "Ièr") if newSuffix != "" { if word.FitsInRV(len(newSuffixRunes)) { word.ReplaceSuffixRunes(newSuffixRunes, []rune("i"), true) } return true } return true } case "ité", "ités": if isInR2 { // Delete if in R2 word.RemoveLastNRunes(len(suffixRunes)) // If preceded by "abil", delete if in R2, else replace by "abl" newSuffix, newSuffixRunes := word.FirstSuffix("abil") if newSuffix != "" { newSuffixLen := len(newSuffixRunes) if word.FitsInR2(newSuffixLen) { word.RemoveLastNRunes(newSuffixLen) } else { word.ReplaceSuffixRunes(newSuffixRunes, []rune("abl"), true) } return true } // If preceded by "ic", delete if in R2, else replace by "iqU" newSuffix, newSuffixRunes = word.FirstSuffix("ic") if newSuffix != "" { newSuffixLen := len(newSuffixRunes) if word.FitsInR2(newSuffixLen) { word.RemoveLastNRunes(newSuffixLen) } else { word.ReplaceSuffixRunes(newSuffixRunes, []rune("iqU"), true) } return true } // If preceded by "iv", delete if in R2 newSuffix, newSuffixRunes = word.RemoveFirstSuffixIfIn(word.R2start, "iv") return true } case "if", "ive", "ifs", "ives": if isInR2 { // Delete if in R2 word.RemoveLastNRunes(len(suffixRunes)) // If preceded by at, delete if in R2 newSuffix, newSuffixRunes := word.RemoveFirstSuffixIfIn(word.R2start, "at") if newSuffix != "" { // And if further preceded by ic, delete if in R2, else replace by iqU newSuffix, newSuffixRunes = word.FirstSuffix("ic") if newSuffix != "" { newSuffixLen := len(newSuffixRunes) if word.FitsInR2(newSuffixLen) { word.RemoveLastNRunes(newSuffixLen) } else { word.ReplaceSuffixRunes(newSuffixRunes, []rune("iqU"), true) } } } return true } } return false }