Пример #1
0
//标准分词测试
func TestStandard(dawgFile string, text string) {
	fmt.Println("测试:" + text)
	tokenizer := mmseg.New(dawgFile)
	for iter := tokenizer.Traverse(cwsharp.NewStringReader(text)); iter.Next(); {
		fmt.Print(iter.Cur())
		fmt.Print(" / ")
	}
	fmt.Println()
}
Пример #2
0
func main() {
	file := "data//cwsharp.dawg"
	tokenizer := cwsharp.NewStopwordFilter(mmseg.New(file))
	w := map[string]bool{"the": true}
	tokenizer.CheckIgnore = func(t cwsharp.Token) bool {
		_, ok := w[t.Text()]
		if t.Kind() == cwsharp.PUNCT || ok {
			return true
		}
		return false
	}
	for _, text := range []string{"长春市长春药店", "The quick brown fox jumps over the lazy dog"} {
		for iter := tokenizer.Traverse(cwsharp.NewStringReader(text)); iter.Next(); {
			fmt.Print(iter.Cur())
			fmt.Print(" / ")
		}
		fmt.Println()
	}
}