func TestKeyWordMarkerFilter(t *testing.T) { inputTokenStream := analysis.TokenStream{ &analysis.Token{ Term: []byte("a"), }, &analysis.Token{ Term: []byte("walk"), }, &analysis.Token{ Term: []byte("in"), }, &analysis.Token{ Term: []byte("the"), }, &analysis.Token{ Term: []byte("park"), }, } expectedTokenStream := analysis.TokenStream{ &analysis.Token{ Term: []byte("a"), }, &analysis.Token{ Term: []byte("walk"), KeyWord: true, }, &analysis.Token{ Term: []byte("in"), }, &analysis.Token{ Term: []byte("the"), }, &analysis.Token{ Term: []byte("park"), KeyWord: true, }, } keyWordsMap := analysis.NewTokenMap() keyWordsMap.AddToken("walk") keyWordsMap.AddToken("park") filter := NewKeyWordMarkerFilter(keyWordsMap) ouputTokenStream := filter.Filter(inputTokenStream) if !reflect.DeepEqual(ouputTokenStream, expectedTokenStream) { t.Errorf("expected %#v got %#v", expectedTokenStream[0].KeyWord, ouputTokenStream[0].KeyWord) } }
func GenericTokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) { rv := analysis.NewTokenMap() // first: try to load by filename filename, ok := config["filename"].(string) if ok { err := rv.LoadFile(filename) return rv, err } // next: look for an inline word list tokens, ok := config["tokens"].([]interface{}) if ok { for _, token := range tokens { tokenStr, ok := token.(string) if ok { rv.AddToken(tokenStr) } } return rv, nil } return nil, fmt.Errorf("must specify filename or list of tokens for token map") }
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) { rv := analysis.NewTokenMap() err := rv.LoadBytes(DanishStopWords) return rv, err }
func ArticlesTokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) { rv := analysis.NewTokenMap() err := rv.LoadBytes(ItalianArticles) return rv, err }