func TestKeyWordMarkerFilter(t *testing.T) {

	inputTokenStream := analysis.TokenStream{
		&analysis.Token{
			Term: []byte("a"),
		},
		&analysis.Token{
			Term: []byte("walk"),
		},
		&analysis.Token{
			Term: []byte("in"),
		},
		&analysis.Token{
			Term: []byte("the"),
		},
		&analysis.Token{
			Term: []byte("park"),
		},
	}

	expectedTokenStream := analysis.TokenStream{
		&analysis.Token{
			Term: []byte("a"),
		},
		&analysis.Token{
			Term:    []byte("walk"),
			KeyWord: true,
		},
		&analysis.Token{
			Term: []byte("in"),
		},
		&analysis.Token{
			Term: []byte("the"),
		},
		&analysis.Token{
			Term:    []byte("park"),
			KeyWord: true,
		},
	}

	keyWordsMap := analysis.NewTokenMap()
	keyWordsMap.AddToken("walk")
	keyWordsMap.AddToken("park")

	filter := NewKeyWordMarkerFilter(keyWordsMap)
	ouputTokenStream := filter.Filter(inputTokenStream)
	if !reflect.DeepEqual(ouputTokenStream, expectedTokenStream) {
		t.Errorf("expected %#v got %#v", expectedTokenStream[0].KeyWord, ouputTokenStream[0].KeyWord)
	}
}
示例#2
0
func GenericTokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
	rv := analysis.NewTokenMap()

	// first: try to load by filename
	filename, ok := config["filename"].(string)
	if ok {
		err := rv.LoadFile(filename)
		return rv, err
	}
	// next: look for an inline word list
	tokens, ok := config["tokens"].([]interface{})
	if ok {
		for _, token := range tokens {
			tokenStr, ok := token.(string)
			if ok {
				rv.AddToken(tokenStr)
			}
		}
		return rv, nil
	}
	return nil, fmt.Errorf("must specify filename or list of tokens for token map")
}
示例#3
0
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
	rv := analysis.NewTokenMap()
	err := rv.LoadBytes(DanishStopWords)
	return rv, err
}
示例#4
0
func ArticlesTokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
	rv := analysis.NewTokenMap()
	err := rv.LoadBytes(ItalianArticles)
	return rv, err
}