Пример #1
0
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
	tokenizer, err := cache.TokenizerNamed(unicode.Name)
	if err != nil {
		return nil, err
	}
	elisionFilter, err := cache.TokenFilterNamed(ElisionName)
	if err != nil {
		return nil, err
	}
	toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
	if err != nil {
		return nil, err
	}
	stopItFilter, err := cache.TokenFilterNamed(StopName)
	if err != nil {
		return nil, err
	}
	stemmerItFilter, err := cache.TokenFilterNamed(LightStemmerName)
	if err != nil {
		return nil, err
	}
	rv := analysis.Analyzer{
		Tokenizer: tokenizer,
		TokenFilters: []analysis.TokenFilter{
			toLowerFilter,
			elisionFilter,
			stopItFilter,
			stemmerItFilter,
		},
	}
	return &rv, nil
}
Пример #2
0
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
	tokenizer, err := cache.TokenizerNamed(unicode.Name)
	if err != nil {
		return nil, err
	}
	possEnFilter, err := cache.TokenFilterNamed(PossessiveName)
	if err != nil {
		return nil, err
	}
	toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
	if err != nil {
		return nil, err
	}
	stopEnFilter, err := cache.TokenFilterNamed(StopName)
	if err != nil {
		return nil, err
	}
	stemmerEnFilter, err := cache.TokenFilterNamed(porter.Name)
	if err != nil {
		return nil, err
	}
	rv := analysis.Analyzer{
		Tokenizer: tokenizer,
		TokenFilters: []analysis.TokenFilter{
			possEnFilter,
			toLowerFilter,
			stopEnFilter,
			stemmerEnFilter,
		},
	}
	return &rv, nil
}
Пример #3
0
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
	unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
	if err != nil {
		return nil, err
	}
	normCkbFilter, err := cache.TokenFilterNamed(NormalizeName)
	if err != nil {
		return nil, err
	}
	toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
	if err != nil {
		return nil, err
	}
	stopCkbFilter, err := cache.TokenFilterNamed(StopName)
	if err != nil {
		return nil, err
	}
	stemmerCkbFilter, err := cache.TokenFilterNamed(StemmerName)
	if err != nil {
		return nil, err
	}
	rv := analysis.Analyzer{
		Tokenizer: unicodeTokenizer,
		TokenFilters: []analysis.TokenFilter{
			normCkbFilter,
			toLowerFilter,
			stopCkbFilter,
			stemmerCkbFilter,
		},
	}
	return &rv, nil
}
Пример #4
0
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
	tokenizer, err := cache.TokenizerNamed(unicode.Name)
	if err != nil {
		return nil, err
	}
	toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
	if err != nil {
		return nil, err
	}
	normalizeFilter := unicode_normalize.MustNewUnicodeNormalizeFilter(unicode_normalize.NFKC)
	stopArFilter, err := cache.TokenFilterNamed(StopName)
	if err != nil {
		return nil, err
	}
	normalizeArFilter, err := cache.TokenFilterNamed(NormalizeName)
	if err != nil {
		return nil, err
	}
	stemmerArFilter, err := cache.TokenFilterNamed(StemmerName)
	if err != nil {
		return nil, err
	}
	rv := analysis.Analyzer{
		Tokenizer: tokenizer,
		TokenFilters: []analysis.TokenFilter{
			toLowerFilter,
			normalizeFilter,
			stopArFilter,
			normalizeArFilter,
			stemmerArFilter,
		},
	}
	return &rv, nil
}
Пример #5
0
func TokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
	remainingTokenizer, err := cache.TokenizerNamed(unicode.Name)
	if err != nil {
		return nil, err
	}
	return exception.NewExceptionsTokenizer(exceptionsRegexp, remainingTokenizer), nil
}
Пример #6
0
func ExceptionsTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
	exceptions := []string{}
	iexceptions, ok := config["exceptions"].([]interface{})
	if ok {
		for _, exception := range iexceptions {
			exception, ok := exception.(string)
			if ok {
				exceptions = append(exceptions, exception)
			}
		}
	}
	aexceptions, ok := config["exceptions"].([]string)
	if ok {
		exceptions = append(exceptions, aexceptions...)
	}
	if len(exceptions) == 0 {
		return nil, fmt.Errorf("no pattern found in 'exception' property")
	}
	exceptionPattern := strings.Join(exceptions, "|")
	r, err := regexp.Compile(exceptionPattern)
	if err != nil {
		return nil, fmt.Errorf("unable to build regexp tokenizer: %v", err)
	}

	remainingName, ok := config["tokenizer"].(string)
	if !ok {
		return nil, fmt.Errorf("must specify tokenizer for remaining input")
	}
	remaining, err := cache.TokenizerNamed(remainingName)
	if err != nil {
		return nil, err
	}
	return NewExceptionsTokenizer(r, remaining), nil
}
Пример #7
0
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
	keywordTokenizer, err := cache.TokenizerNamed(single_token.Name)
	if err != nil {
		return nil, err
	}
	rv := analysis.Analyzer{
		Tokenizer: keywordTokenizer,
	}
	return &rv, nil
}
Пример #8
0
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
	kagomeTokenizer, err := cache.TokenizerNamed(TokenizerName)
	if err != nil {
		return nil, err
	}
	normalizeFilter := unicode_normalize.MustNewUnicodeNormalizeFilter(unicode_normalize.NFKD)
	rv := analysis.Analyzer{
		Tokenizer: kagomeTokenizer,
		TokenFilters: []analysis.TokenFilter{
			normalizeFilter,
		},
	}
	return &rv, nil
}
Пример #9
0
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
	zFilter, err := cache.CharFilterNamed(zero_width_non_joiner.Name)
	if err != nil {
		return nil, err
	}
	unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
	if err != nil {
		return nil, err
	}
	normArFilter, err := cache.TokenFilterNamed(ar.NormalizeName)
	if err != nil {
		return nil, err
	}
	normFaFilter, err := cache.TokenFilterNamed(NormalizeName)
	if err != nil {
		return nil, err
	}
	toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
	if err != nil {
		return nil, err
	}
	stopFaFilter, err := cache.TokenFilterNamed(StopName)
	if err != nil {
		return nil, err
	}
	rv := analysis.Analyzer{
		CharFilters: []analysis.CharFilter{
			zFilter,
		},
		Tokenizer: unicodeTokenizer,
		TokenFilters: []analysis.TokenFilter{
			toLowerFilter,
			normArFilter,
			normFaFilter,
			stopFaFilter,
		},
	}
	return &rv, nil
}
Пример #10
0
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
	tokenizer, err := cache.TokenizerNamed(webt.Name)
	if err != nil {
		return nil, err
	}
	toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
	if err != nil {
		return nil, err
	}
	stopEnFilter, err := cache.TokenFilterNamed(en.StopName)
	if err != nil {
		return nil, err
	}
	rv := analysis.Analyzer{
		Tokenizer: tokenizer,
		TokenFilters: []analysis.TokenFilter{
			toLowerFilter,
			stopEnFilter,
		},
	}
	return &rv, nil
}
Пример #11
0
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
	whitespaceTokenizer, err := cache.TokenizerNamed(whitespace_tokenizer.Name)
	if err != nil {
		return nil, err
	}
	normalizeFilter := unicode_normalize.MustNewUnicodeNormalizeFilter(unicode_normalize.NFKD)
	toLowerFilter, err := cache.TokenFilterNamed(lower_case_filter.Name)
	if err != nil {
		return nil, err
	}
	bigramFilter, err := cache.TokenFilterNamed(BigramName)
	if err != nil {
		return nil, err
	}
	rv := analysis.Analyzer{
		Tokenizer: whitespaceTokenizer,
		TokenFilters: []analysis.TokenFilter{
			normalizeFilter,
			toLowerFilter,
			bigramFilter,
		},
	}
	return &rv, nil
}
Пример #12
0
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {

	var err error
	var charFilters []analysis.CharFilter
	charFiltersNames, ok := config["char_filters"].([]string)
	if ok {
		charFilters, err = getCharFilters(charFiltersNames, cache)
		if err != nil {
			return nil, err
		}
	} else {
		charFiltersNamesInterfaceSlice, ok := config["char_filters"].([]interface{})
		if ok {
			charFiltersNames, err := convertInterfaceSliceToStringSlice(charFiltersNamesInterfaceSlice, "char filter")
			if err != nil {
				return nil, err
			}
			charFilters, err = getCharFilters(charFiltersNames, cache)
			if err != nil {
				return nil, err
			}
		}
	}

	tokenizerName, ok := config["tokenizer"].(string)
	if !ok {
		return nil, fmt.Errorf("must specify tokenizer")
	}

	tokenizer, err := cache.TokenizerNamed(tokenizerName)
	if err != nil {
		return nil, err
	}

	var tokenFilters []analysis.TokenFilter
	tokenFiltersNames, ok := config["token_filters"].([]string)
	if ok {
		tokenFilters, err = getTokenFilters(tokenFiltersNames, cache)
		if err != nil {
			return nil, err
		}
	} else {
		tokenFiltersNamesInterfaceSlice, ok := config["token_filters"].([]interface{})
		if ok {
			tokenFiltersNames, err := convertInterfaceSliceToStringSlice(tokenFiltersNamesInterfaceSlice, "token filter")
			if err != nil {
				return nil, err
			}
			tokenFilters, err = getTokenFilters(tokenFiltersNames, cache)
			if err != nil {
				return nil, err
			}
		}
	}

	rv := analysis.Analyzer{
		Tokenizer: tokenizer,
	}
	if charFilters != nil {
		rv.CharFilters = charFilters
	}
	if tokenFilters != nil {
		rv.TokenFilters = tokenFilters
	}
	return &rv, nil
}