Ejemplos de SnowballWord.R2start en Golang

Lenguaje de programación: Golang

Namespace/Package Name: github.com/kljensen/snowball/snowballword

Clase / Tipo: SnowballWord

Método / Función: R2start

Ejemplos en hotexamples.com: 4

Golang SnowballWord.R2start - 4 ejemplos encontrados. Estos son los ejemplos en Golang del mundo real mejor valorados de github.com/kljensen/snowball/snowballword.SnowballWord.R2start extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

RemoveLastNRunes(20)

FirstSuffix(11)

FirstSuffixIn(9)

ReplaceSuffixRunes(8)

R2start(4)

FitsInRV(4)

R1start(4)

RemoveFirstSuffixIn(4)

FitsInR2(3)

RemoveFirstSuffixIfIn(2)

FirstPrefix(2)

RS(2)

ReplaceSuffix(1)

DebugString(1)

RemoveFirstSuffix(1)

RVstart(1)

HasSuffixRunes(1)

FitsInR1(1)

FirstSuffixIfIn(1)

String(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: preprocess.go Proyecto: kljensen/snowball

func preprocess(word *snowballword.SnowballWord) {

	r1start, r2start, rvstart := findRegions(word)
	word.R1start = r1start
	word.R2start = r2start
	word.RVstart = rvstart

}

Ejemplo n.º 2

Mostrar archivo

Archivo: preprocess.go Proyecto: kljensen/snowball

// Applies various transformations necessary for the
// other, subsequent stemming steps.  Most important
// of which is defining the two regions R1 & R2.
//
func preprocess(word *snowballword.SnowballWord) {

	// Clean up apostrophes
	normalizeApostrophes(word)
	trimLeftApostrophes(word)

	// Capitalize Y's that are not behaving
	// as vowels.
	capitalizeYs(word)

	// Find the two regions, R1 & R2
	r1start, r2start := r1r2(word)
	word.R1start = r1start
	word.R2start = r2start
}

Ejemplo n.º 3

Mostrar archivo

Archivo: common.go Proyecto: kljensen/snowball

// Trim off leading apostropes.  (Slight variation from
// NLTK implementation here, in which only the first is removed.)
//
func trimLeftApostrophes(word *snowballword.SnowballWord) {
	var (
		numApostrophes int
		r              rune
	)

	for numApostrophes, r = range word.RS {

		// Check for "'", which is unicode code point 39
		if r != 39 {
			break
		}
	}
	if numApostrophes > 0 {
		word.RS = word.RS[numApostrophes:]
		word.R1start = word.R1start - numApostrophes
		word.R2start = word.R2start - numApostrophes
	}
}

Ejemplo n.º 4

Mostrar archivo

Archivo: step1b.go Proyecto: kljensen/snowball

// Step 1b is the normalization of various "ly" and "ed" sufficies.
//
func step1b(w *snowballword.SnowballWord) bool {

	suffix, suffixRunes := w.FirstSuffix("eedly", "ingly", "edly", "ing", "eed", "ed")

	switch suffix {

	case "":
		// No suffix found
		return false

	case "eed", "eedly":

		// Replace by ee if in R1
		if len(suffixRunes) <= len(w.RS)-w.R1start {
			w.ReplaceSuffixRunes(suffixRunes, []rune("ee"), true)
		}
		return true

	case "ed", "edly", "ing", "ingly":
		hasLowerVowel := false
		for i := 0; i < len(w.RS)-len(suffixRunes); i++ {
			if isLowerVowel(w.RS[i]) {
				hasLowerVowel = true
				break
			}
		}
		if hasLowerVowel {

			// This case requires a two-step transformation and, due
			// to the way we've implemented the `ReplaceSuffix` method
			// here, information about R1 and R2 would be lost between
			// the two.  Therefore, we need to keep track of the
			// original R1 & R2, so that we may set them below, at the
			// end of this case.
			//
			originalR1start := w.R1start
			originalR2start := w.R2start

			// Delete if the preceding word part contains a vowel
			w.RemoveLastNRunes(len(suffixRunes))

			// ...and after the deletion...

			newSuffix, newSuffixRunes := w.FirstSuffix("at", "bl", "iz", "bb", "dd", "ff", "gg", "mm", "nn", "pp", "rr", "tt")
			switch newSuffix {

			case "":

				// If the word is short, add "e"
				if isShortWord(w) {

					// By definition, r1 and r2 are the empty string for
					// short words.
					w.RS = append(w.RS, []rune("e")...)
					w.R1start = len(w.RS)
					w.R2start = len(w.RS)
					return true
				}

			case "at", "bl", "iz":

				// If the word ends "at", "bl" or "iz" add "e"
				w.ReplaceSuffixRunes(newSuffixRunes, []rune(newSuffix+"e"), true)

			case "bb", "dd", "ff", "gg", "mm", "nn", "pp", "rr", "tt":

				// If the word ends with a double remove the last letter.
				// Note that, "double" does not include all possible doubles,
				// just those shown above.
				//
				w.RemoveLastNRunes(1)
			}

			// Because we did a double replacement, we need to fix
			// R1 and R2 manually. This is just becase of how we've
			// implemented the `ReplaceSuffix` method.
			//
			rsLen := len(w.RS)
			if originalR1start < rsLen {
				w.R1start = originalR1start
			} else {
				w.R1start = rsLen
			}
			if originalR2start < rsLen {
				w.R2start = originalR2start
			} else {
				w.R2start = rsLen
			}

			return true
		}

	}

	return false
}