Ejemplo n.º 1
0
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {

	var charFilters []analysis.CharFilter
	charFilterNames, ok := config["char_filters"].([]interface{})
	if ok {
		charFilters = make([]analysis.CharFilter, len(charFilterNames))
		for i, charFilterName := range charFilterNames {
			charFilterNameString, ok := charFilterName.(string)
			if ok {
				charFilter, err := cache.CharFilterNamed(charFilterNameString)
				if err != nil {
					return nil, err
				}
				charFilters[i] = charFilter
			} else {
				return nil, fmt.Errorf("char filter name must be a string")
			}
		}
	}

	tokenizerName, ok := config["tokenizer"].(string)
	if !ok {
		return nil, fmt.Errorf("must specify tokenizer")
	}

	tokenizer, err := cache.TokenizerNamed(tokenizerName)
	if err != nil {
		return nil, err
	}

	var tokenFilters []analysis.TokenFilter
	tokenFilterNames, ok := config["token_filters"].([]interface{})
	if ok {
		tokenFilters = make([]analysis.TokenFilter, len(tokenFilterNames))
		for i, tokenFilterName := range tokenFilterNames {
			tokenFilterNameString, ok := tokenFilterName.(string)
			if ok {
				tokenFilter, err := cache.TokenFilterNamed(tokenFilterNameString)
				if err != nil {
					return nil, err
				}
				tokenFilters[i] = tokenFilter
			} else {
				return nil, fmt.Errorf("token filter name must be a string")
			}
		}
	}

	rv := analysis.Analyzer{
		Tokenizer: tokenizer,
	}
	if charFilters != nil {
		rv.CharFilters = charFilters
	}
	if tokenFilters != nil {
		rv.TokenFilters = tokenFilters
	}
	return &rv, nil
}
Ejemplo n.º 2
0
func getTokenFilters(tokenFilterNames []string, cache *registry.Cache) ([]analysis.TokenFilter, error) {
	tokenFilters := make([]analysis.TokenFilter, len(tokenFilterNames))
	for i, tokenFilterName := range tokenFilterNames {
		tokenFilter, err := cache.TokenFilterNamed(tokenFilterName)
		if err != nil {
			return nil, err
		}
		tokenFilters[i] = tokenFilter
	}

	return tokenFilters, nil
}
Ejemplo n.º 3
0
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
	unicodeTokenizer, err := cache.TokenizerNamed(unicode_word_boundary.Name)
	if err != nil {
		return nil, err
	}
	elisionFilter, err := cache.TokenFilterNamed(ElisionName)
	if err != nil {
		return nil, err
	}
	toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
	if err != nil {
		return nil, err
	}
	stopFrFilter, err := cache.TokenFilterNamed(StopName)
	if err != nil {
		return nil, err
	}
	stemmerFrFilter, err := cache.TokenFilterNamed(StemmerName)
	if err != nil {
		return nil, err
	}
	rv := analysis.Analyzer{
		Tokenizer: unicodeTokenizer,
		TokenFilters: []analysis.TokenFilter{
			elisionFilter,
			toLowerFilter,
			stopFrFilter,
			stemmerFrFilter,
		},
	}
	return &rv, nil
}
Ejemplo n.º 4
0
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
	tokenizer, err := cache.TokenizerNamed(unicode.Name)
	if err != nil {
		return nil, err
	}
	elisionFilter, err := cache.TokenFilterNamed(ElisionName)
	if err != nil {
		return nil, err
	}
	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
	if err != nil {
		return nil, err
	}
	stopItFilter, err := cache.TokenFilterNamed(StopName)
	if err != nil {
		return nil, err
	}
	stemmerItFilter, err := cache.TokenFilterNamed(LightStemmerName)
	if err != nil {
		return nil, err
	}
	rv := analysis.Analyzer{
		Tokenizer: tokenizer,
		TokenFilters: []analysis.TokenFilter{
			toLowerFilter,
			elisionFilter,
			stopItFilter,
			stemmerItFilter,
		},
	}
	return &rv, nil
}
Ejemplo n.º 5
0
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
	unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
	if err != nil {
		return nil, err
	}
	toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
	if err != nil {
		return nil, err
	}
	stopDeFilter, err := cache.TokenFilterNamed(NormalizeName)
	if err != nil {
		return nil, err
	}
	normalizeDeFilter, err := cache.TokenFilterNamed(NormalizeName)
	if err != nil {
		return nil, err
	}
	stemmerDeFilter, err := cache.TokenFilterNamed(StemmerName)
	if err != nil {
		return nil, err
	}
	rv := analysis.Analyzer{
		Tokenizer: unicodeTokenizer,
		TokenFilters: []analysis.TokenFilter{
			toLowerFilter,
			stopDeFilter,
			normalizeDeFilter,
			stemmerDeFilter,
		},
	}
	return &rv, nil
}
Ejemplo n.º 6
0
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
	tokenizer, err := cache.TokenizerNamed(unicode.Name)
	if err != nil {
		return nil, err
	}
	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
	if err != nil {
		return nil, err
	}
	normalizeFilter := unicodenorm.MustNewUnicodeNormalizeFilter(unicodenorm.NFKC)
	stopArFilter, err := cache.TokenFilterNamed(StopName)
	if err != nil {
		return nil, err
	}
	normalizeArFilter, err := cache.TokenFilterNamed(NormalizeName)
	if err != nil {
		return nil, err
	}
	stemmerArFilter, err := cache.TokenFilterNamed(StemmerName)
	if err != nil {
		return nil, err
	}
	rv := analysis.Analyzer{
		Tokenizer: tokenizer,
		TokenFilters: []analysis.TokenFilter{
			toLowerFilter,
			normalizeFilter,
			stopArFilter,
			normalizeArFilter,
			stemmerArFilter,
		},
	}
	return &rv, nil
}
Ejemplo n.º 7
0
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
	tokenizer, err := cache.TokenizerNamed(unicode.Name)
	if err != nil {
		return nil, err
	}
	possEnFilter, err := cache.TokenFilterNamed(PossessiveName)
	if err != nil {
		return nil, err
	}
	toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
	if err != nil {
		return nil, err
	}
	stopEnFilter, err := cache.TokenFilterNamed(StopName)
	if err != nil {
		return nil, err
	}
	stemmerEnFilter, err := cache.TokenFilterNamed(porter.Name)
	if err != nil {
		return nil, err
	}
	rv := analysis.Analyzer{
		Tokenizer: tokenizer,
		TokenFilters: []analysis.TokenFilter{
			possEnFilter,
			toLowerFilter,
			stopEnFilter,
			stemmerEnFilter,
		},
	}
	return &rv, nil
}
Ejemplo n.º 8
0
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
	icuTokenizer, err := cache.TokenizerNamed(icu.Name)
	if err != nil {
		return nil, err
	}
	aposFilter, err := cache.TokenFilterNamed(apostrophe_filter.Name)
	if err != nil {
		return nil, err
	}
	toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
	if err != nil {
		return nil, err
	}
	stopTrFilter, err := cache.TokenFilterNamed(StopName)
	if err != nil {
		return nil, err
	}
	stemmerTrFilter, err := cache.TokenFilterNamed(StemmerName)
	if err != nil {
		return nil, err
	}
	rv := analysis.Analyzer{
		Tokenizer: icuTokenizer,
		TokenFilters: []analysis.TokenFilter{
			aposFilter,
			toLowerFilter,
			stopTrFilter,
			stemmerTrFilter,
		},
	}
	return &rv, nil
}
Ejemplo n.º 9
0
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
	tokenizer, err := cache.TokenizerNamed(letter.Name)
	if err != nil {
		return nil, err
	}
	toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
	if err != nil {
		return nil, err
	}
	rv := analysis.Analyzer{
		Tokenizer: tokenizer,
		TokenFilters: []analysis.TokenFilter{
			toLowerFilter,
		},
	}
	return &rv, nil
}
Ejemplo n.º 10
0
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
	whitespaceTokenizer, err := cache.TokenizerNamed(unicode.Name)
	if err != nil {
		return nil, err
	}
	widthFilter, err := cache.TokenFilterNamed(WidthName)
	if err != nil {
		return nil, err
	}
	toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
	if err != nil {
		return nil, err
	}
	bigramFilter, err := cache.TokenFilterNamed(BigramName)
	if err != nil {
		return nil, err
	}
	rv := analysis.Analyzer{
		Tokenizer: whitespaceTokenizer,
		TokenFilters: []analysis.TokenFilter{
			widthFilter,
			toLowerFilter,
			bigramFilter,
		},
	}
	return &rv, nil
}
Ejemplo n.º 11
0
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
	unicodeTokenizer, err := cache.TokenizerNamed(TokenizerName)
	if err != nil {
		return nil, err
	}
	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
	if err != nil {
		return nil, err
	}
	stopThFilter, err := cache.TokenFilterNamed(StopName)
	if err != nil {
		return nil, err
	}
	rv := analysis.Analyzer{
		Tokenizer: unicodeTokenizer,
		TokenFilters: []analysis.TokenFilter{
			toLowerFilter,
			stopThFilter,
		},
	}
	return &rv, nil
}
Ejemplo n.º 12
0
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
	keywordTokenizer, err := cache.TokenizerNamed(single_token.Name)
	if err != nil {
		return nil, err
	}
	toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
	if err != nil {
		return nil, err
	}
	detectLangFilter, err := cache.TokenFilterNamed(cld2.Name)
	if err != nil {
		return nil, err
	}
	rv := analysis.Analyzer{
		Tokenizer: keywordTokenizer,
		TokenFilters: []analysis.TokenFilter{
			toLowerFilter,
			detectLangFilter,
		},
	}
	return &rv, nil
}
Ejemplo n.º 13
0
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
	tokenizer, err := cache.TokenizerNamed(unicode.Name)
	if err != nil {
		return nil, err
	}
	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
	if err != nil {
		return nil, err
	}
	indicNormalizeFilter, err := cache.TokenFilterNamed(in.NormalizeName)
	if err != nil {
		return nil, err
	}
	hindiNormalizeFilter, err := cache.TokenFilterNamed(NormalizeName)
	if err != nil {
		return nil, err
	}
	stopHiFilter, err := cache.TokenFilterNamed(StopName)
	if err != nil {
		return nil, err
	}
	stemmerHiFilter, err := cache.TokenFilterNamed(StemmerName)
	if err != nil {
		return nil, err
	}
	rv := analysis.Analyzer{
		Tokenizer: tokenizer,
		TokenFilters: []analysis.TokenFilter{
			toLowerFilter,
			indicNormalizeFilter,
			hindiNormalizeFilter,
			stopHiFilter,
			stemmerHiFilter,
		},
	}
	return &rv, nil
}
Ejemplo n.º 14
0
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
	whitespaceTokenizer, err := cache.TokenizerNamed(whitespace_tokenizer.Name)
	if err != nil {
		return nil, err
	}
	normalizeFilter := unicode_normalize.MustNewUnicodeNormalizeFilter(unicode_normalize.NFKD)
	toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
	if err != nil {
		return nil, err
	}
	bigramFilter, err := cache.TokenFilterNamed(BigramName)
	if err != nil {
		return nil, err
	}
	rv := analysis.Analyzer{
		Tokenizer: whitespaceTokenizer,
		TokenFilters: []analysis.TokenFilter{
			normalizeFilter,
			toLowerFilter,
			bigramFilter,
		},
	}
	return &rv, nil
}
Ejemplo n.º 15
0
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
	zFilter, err := cache.CharFilterNamed(zero_width_non_joiner.Name)
	if err != nil {
		return nil, err
	}
	unicodeTokenizer, err := cache.TokenizerNamed(unicode_word_boundary.Name)
	if err != nil {
		return nil, err
	}
	normArFilter, err := cache.TokenFilterNamed(ar.NormalizeName)
	if err != nil {
		return nil, err
	}
	normFaFilter, err := cache.TokenFilterNamed(NormalizeName)
	if err != nil {
		return nil, err
	}
	toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
	if err != nil {
		return nil, err
	}
	stopFaFilter, err := cache.TokenFilterNamed(StopName)
	if err != nil {
		return nil, err
	}
	rv := analysis.Analyzer{
		CharFilters: []analysis.CharFilter{
			zFilter,
		},
		Tokenizer: unicodeTokenizer,
		TokenFilters: []analysis.TokenFilter{
			toLowerFilter,
			normArFilter,
			normFaFilter,
			stopFaFilter,
		},
	}
	return &rv, nil
}