// Step 1 is the removal of standard suffixes // func step1(word *snowballword.SnowballWord) bool { suffix, suffixRunes := word.FirstSuffix( "issements", "issement", "atrices", "utions", "usions", "logies", "emment", "ements", "atrice", "ations", "ateurs", "amment", "ution", "usion", "ments", "logie", "istes", "ismes", "iqUes", "euses", "ences", "ement", "ation", "ateur", "ances", "ables", "ment", "ités", "iste", "isme", "iqUe", "euse", "ence", "eaux", "ance", "able", "ives", "ité", "eux", "aux", "ive", "ifs", "if", ) if suffix == "" { return false } isInR1 := (word.R1start <= len(word.RS)-len(suffixRunes)) isInR2 := (word.R2start <= len(word.RS)-len(suffixRunes)) isInRV := (word.RVstart <= len(word.RS)-len(suffixRunes)) // Handle simple replacements & deletions in R2 first if isInR2 { // Handle simple replacements in R2 repl := "" switch suffix { case "logie", "logies": repl = "log" case "usion", "ution", "usions", "utions": repl = "u" case "ence", "ences": repl = "ent" } if repl != "" { word.ReplaceSuffixRunes(suffixRunes, []rune(repl), true) return true } // Handle simple deletions in R2 switch suffix { case "ance", "iqUe", "isme", "able", "iste", "eux", "ances", "iqUes", "ismes", "ables", "istes": word.RemoveLastNRunes(len(suffixRunes)) return true } } // Handle simple replacements in RV if isInRV { // NOTE: these are "special" suffixes in that // we must still do steps 2a and 2b of the // French stemmer even when these suffixes are // found in step1. Therefore, we are returning // `false` here. repl := "" switch suffix { case "amment": repl = "ant" case "emment": repl = "ent" } if repl != "" { word.ReplaceSuffixRunes(suffixRunes, []rune(repl), true) return false } // Delete if preceded by a vowel that is also in RV if suffix == "ment" || suffix == "ments" { idx := len(word.RS) - len(suffixRunes) - 1 if idx >= word.RVstart && isLowerVowel(word.RS[idx]) { word.RemoveLastNRunes(len(suffixRunes)) return false } return false } } // Handle all the other "special" cases. All of these // return true immediately after changing the word. // switch suffix { case "eaux": // Replace with eau word.ReplaceSuffixRunes(suffixRunes, []rune("eau"), true) return true case "aux": // Replace with al if in R1 if isInR1 { word.ReplaceSuffixRunes(suffixRunes, []rune("al"), true) return true } case "euse", "euses": // Delete if in R2, else replace by eux if in R1 if isInR2 { word.RemoveLastNRunes(len(suffixRunes)) return true } else if isInR1 { word.ReplaceSuffixRunes(suffixRunes, []rune("eux"), true) return true } case "issement", "issements": // Delete if in R1 and preceded by a non-vowel if isInR1 { idx := len(word.RS) - len(suffixRunes) - 1 if idx >= 0 && isLowerVowel(word.RS[idx]) == false { word.RemoveLastNRunes(len(suffixRunes)) return true } } return false case "atrice", "ateur", "ation", "atrices", "ateurs", "ations": // Delete if in R2 if isInR2 { word.RemoveLastNRunes(len(suffixRunes)) // If preceded by "ic", delete if in R2, else replace by "iqU". newSuffix, newSuffixRunes := word.FirstSuffix("ic") if newSuffix != "" { if word.FitsInR2(len(newSuffixRunes)) { word.RemoveLastNRunes(len(newSuffixRunes)) } else { word.ReplaceSuffixRunes(newSuffixRunes, []rune("iqU"), true) } } return true } case "ement", "ements": if isInRV { // Delete if in RV word.RemoveLastNRunes(len(suffixRunes)) // If preceded by "iv", delete if in R2 // (and if further preceded by "at", delete if in R2) newSuffix, newSuffixRunes := word.RemoveFirstSuffixIfIn(word.R2start, "iv") if newSuffix != "" { word.RemoveFirstSuffixIfIn(word.R2start, "at") return true } // If preceded by "eus", delete if in R2, else replace by "eux" if in R1 newSuffix, newSuffixRunes = word.FirstSuffix("eus") if newSuffix != "" { newSuffixLen := len(newSuffixRunes) if word.FitsInR2(newSuffixLen) { word.RemoveLastNRunes(newSuffixLen) } else if word.FitsInR1(newSuffixLen) { word.ReplaceSuffixRunes(newSuffixRunes, []rune("eux"), true) } return true } // If preceded by abl or iqU, delete if in R2, otherwise, newSuffix, newSuffixRunes = word.FirstSuffix("abl", "iqU") if newSuffix != "" { newSuffixLen := len(newSuffixRunes) if word.FitsInR2(newSuffixLen) { word.RemoveLastNRunes(newSuffixLen) } return true } // If preceded by ièr or Ièr, replace by i if in RV newSuffix, newSuffixRunes = word.FirstSuffix("ièr", "Ièr") if newSuffix != "" { if word.FitsInRV(len(newSuffixRunes)) { word.ReplaceSuffixRunes(newSuffixRunes, []rune("i"), true) } return true } return true } case "ité", "ités": if isInR2 { // Delete if in R2 word.RemoveLastNRunes(len(suffixRunes)) // If preceded by "abil", delete if in R2, else replace by "abl" newSuffix, newSuffixRunes := word.FirstSuffix("abil") if newSuffix != "" { newSuffixLen := len(newSuffixRunes) if word.FitsInR2(newSuffixLen) { word.RemoveLastNRunes(newSuffixLen) } else { word.ReplaceSuffixRunes(newSuffixRunes, []rune("abl"), true) } return true } // If preceded by "ic", delete if in R2, else replace by "iqU" newSuffix, newSuffixRunes = word.FirstSuffix("ic") if newSuffix != "" { newSuffixLen := len(newSuffixRunes) if word.FitsInR2(newSuffixLen) { word.RemoveLastNRunes(newSuffixLen) } else { word.ReplaceSuffixRunes(newSuffixRunes, []rune("iqU"), true) } return true } // If preceded by "iv", delete if in R2 newSuffix, newSuffixRunes = word.RemoveFirstSuffixIfIn(word.R2start, "iv") return true } case "if", "ive", "ifs", "ives": if isInR2 { // Delete if in R2 word.RemoveLastNRunes(len(suffixRunes)) // If preceded by at, delete if in R2 newSuffix, newSuffixRunes := word.RemoveFirstSuffixIfIn(word.R2start, "at") if newSuffix != "" { // And if further preceded by ic, delete if in R2, else replace by iqU newSuffix, newSuffixRunes = word.FirstSuffix("ic") if newSuffix != "" { newSuffixLen := len(newSuffixRunes) if word.FitsInR2(newSuffixLen) { word.RemoveLastNRunes(newSuffixLen) } else { word.ReplaceSuffixRunes(newSuffixRunes, []rune("iqU"), true) } } } return true } } return false }
// Step 1 is the removal of standard suffixes // func step1(word *snowballword.SnowballWord) bool { // Possible suffixes, longest first suffix, suffixRunes := word.FirstSuffix( "amientos", "imientos", "aciones", "amiento", "imiento", "uciones", "logías", "idades", "encias", "ancias", "amente", "adores", "adoras", "ución", "mente", "logía", "istas", "ismos", "ibles", "encia", "anzas", "antes", "ancia", "adora", "ación", "ables", "osos", "osas", "ivos", "ivas", "ista", "ismo", "idad", "icos", "icas", "ible", "anza", "ante", "ador", "able", "oso", "osa", "ivo", "iva", "ico", "ica", ) isInR1 := (word.R1start <= len(word.RS)-len(suffixRunes)) isInR2 := (word.R2start <= len(word.RS)-len(suffixRunes)) // Deal with special cases first. All of these will // return if they are hit. // switch suffix { case "": // Nothing to do return false case "amente": if isInR1 { // Delete if in R1 word.RemoveLastNRunes(len(suffixRunes)) // if preceded by iv, delete if in R2 (and if further preceded by at, // delete if in R2), otherwise, // if preceded by os, ic or ad, delete if in R2 newSuffix, _ := word.RemoveFirstSuffixIfIn(word.R2start, "iv", "os", "ic", "ad") if newSuffix == "iv" { word.RemoveFirstSuffixIfIn(word.R2start, "at") } return true } return false } // All the following cases require the found suffix // to be in R2. if isInR2 == false { return false } // Compound replacement cases. All these cases return // if they are hit. // compoundReplacement := func(otherSuffixes ...string) bool { word.RemoveLastNRunes(len(suffixRunes)) word.RemoveFirstSuffixIfIn(word.R2start, otherSuffixes...) return true } switch suffix { case "adora", "ador", "ación", "adoras", "adores", "aciones", "ante", "antes", "ancia", "ancias": return compoundReplacement("ic") case "mente": return compoundReplacement("ante", "able", "ible") case "idad", "idades": return compoundReplacement("abil", "ic", "iv") case "iva", "ivo", "ivas", "ivos": return compoundReplacement("at") } // Simple replacement & deletion cases are all that remain. // simpleReplacement := func(repl string) bool { word.ReplaceSuffixRunes(suffixRunes, []rune(repl), true) return true } switch suffix { case "logía", "logías": return simpleReplacement("log") case "ución", "uciones": return simpleReplacement("u") case "encia", "encias": return simpleReplacement("ente") case "anza", "anzas", "ico", "ica", "icos", "icas", "ismo", "ismos", "able", "ables", "ible", "ibles", "ista", "istas", "oso", "osa", "osos", "osas", "amiento", "amientos", "imiento", "imientos": word.RemoveLastNRunes(len(suffixRunes)) return true } log.Panicln("Unhandled suffix:", suffix) return false }