func (s *PossessiveFilter) Filter(input analysis.TokenStream) analysis.TokenStream { for _, token := range input { runes := bytes.Runes(token.Term) if len(runes) >= 2 { secondToLastRune := runes[len(runes)-2] lastRune := runes[len(runes)-1] if (secondToLastRune == rightSingleQuotationMark || secondToLastRune == apostrophe || secondToLastRune == fullWidthApostrophe) && (lastRune == 's' || lastRune == 'S') { token.Term = analysis.TruncateRunes(token.Term, 2) } } } return input }
func stem(input []byte) []byte { inputLen := utf8.RuneCount(input) // 5 if inputLen > 6 && (bytes.HasSuffix(input, []byte("ाएंगी")) || bytes.HasSuffix(input, []byte("ाएंगे")) || bytes.HasSuffix(input, []byte("ाऊंगी")) || bytes.HasSuffix(input, []byte("ाऊंगा")) || bytes.HasSuffix(input, []byte("ाइयाँ")) || bytes.HasSuffix(input, []byte("ाइयों")) || bytes.HasSuffix(input, []byte("ाइयां"))) { return analysis.TruncateRunes(input, 5) } // 4 if inputLen > 5 && (bytes.HasSuffix(input, []byte("ाएगी")) || bytes.HasSuffix(input, []byte("ाएगा")) || bytes.HasSuffix(input, []byte("ाओगी")) || bytes.HasSuffix(input, []byte("ाओगे")) || bytes.HasSuffix(input, []byte("एंगी")) || bytes.HasSuffix(input, []byte("ेंगी")) || bytes.HasSuffix(input, []byte("एंगे")) || bytes.HasSuffix(input, []byte("ेंगे")) || bytes.HasSuffix(input, []byte("ूंगी")) || bytes.HasSuffix(input, []byte("ूंगा")) || bytes.HasSuffix(input, []byte("ातीं")) || bytes.HasSuffix(input, []byte("नाओं")) || bytes.HasSuffix(input, []byte("नाएं")) || bytes.HasSuffix(input, []byte("ताओं")) || bytes.HasSuffix(input, []byte("ताएं")) || bytes.HasSuffix(input, []byte("ियाँ")) || bytes.HasSuffix(input, []byte("ियों")) || bytes.HasSuffix(input, []byte("ियां"))) { return analysis.TruncateRunes(input, 4) } // 3 if inputLen > 4 && (bytes.HasSuffix(input, []byte("ाकर")) || bytes.HasSuffix(input, []byte("ाइए")) || bytes.HasSuffix(input, []byte("ाईं")) || bytes.HasSuffix(input, []byte("ाया")) || bytes.HasSuffix(input, []byte("ेगी")) || bytes.HasSuffix(input, []byte("ेगा")) || bytes.HasSuffix(input, []byte("ोगी")) || bytes.HasSuffix(input, []byte("ोगे")) || bytes.HasSuffix(input, []byte("ाने")) || bytes.HasSuffix(input, []byte("ाना")) || bytes.HasSuffix(input, []byte("ाते")) || bytes.HasSuffix(input, []byte("ाती")) || bytes.HasSuffix(input, []byte("ाता")) || bytes.HasSuffix(input, []byte("तीं")) || bytes.HasSuffix(input, []byte("ाओं")) || bytes.HasSuffix(input, []byte("ाएं")) || bytes.HasSuffix(input, []byte("ुओं")) || bytes.HasSuffix(input, []byte("ुएं")) || bytes.HasSuffix(input, []byte("ुआं"))) { return analysis.TruncateRunes(input, 3) } // 2 if inputLen > 3 && (bytes.HasSuffix(input, []byte("कर")) || bytes.HasSuffix(input, []byte("ाओ")) || bytes.HasSuffix(input, []byte("िए")) || bytes.HasSuffix(input, []byte("ाई")) || bytes.HasSuffix(input, []byte("ाए")) || bytes.HasSuffix(input, []byte("ने")) || bytes.HasSuffix(input, []byte("नी")) || bytes.HasSuffix(input, []byte("ना")) || bytes.HasSuffix(input, []byte("ते")) || bytes.HasSuffix(input, []byte("ीं")) || bytes.HasSuffix(input, []byte("ती")) || bytes.HasSuffix(input, []byte("ता")) || bytes.HasSuffix(input, []byte("ाँ")) || bytes.HasSuffix(input, []byte("ां")) || bytes.HasSuffix(input, []byte("ों")) || bytes.HasSuffix(input, []byte("ें"))) { return analysis.TruncateRunes(input, 2) } // 1 if inputLen > 2 && (bytes.HasSuffix(input, []byte("ो")) || bytes.HasSuffix(input, []byte("े")) || bytes.HasSuffix(input, []byte("ू")) || bytes.HasSuffix(input, []byte("ु")) || bytes.HasSuffix(input, []byte("ी")) || bytes.HasSuffix(input, []byte("ि")) || bytes.HasSuffix(input, []byte("ा"))) { return analysis.TruncateRunes(input, 1) } return input }