func cut_DAG_NO_HMM(sentence string) []WordTag { result := make([]WordTag, 0) re_eng := regexp.MustCompile(`[[:alnum:]]`) dag := jiebago.GetDAG(sentence) routes := jiebago.Calc(sentence, dag, 0) x := 0 var y int runes := []rune(sentence) length := len(runes) buf := make([]rune, 0) for { if x >= length { break } y = routes[x].Index + 1 l_word := runes[x:y] if re_eng.MatchString(string(l_word)) && len(l_word) == 1 { buf = append(buf, l_word...) x = y } else { if len(buf) > 0 { result = append(result, WordTag{string(buf), "eng"}) buf = make([]rune, 0) } sl_word := string(l_word) if tag, ok := WordTagTab[sl_word]; ok { result = append(result, WordTag{sl_word, tag}) } else { result = append(result, WordTag{sl_word, "x"}) } x = y } } if len(buf) > 0 { result = append(result, WordTag{string(buf), "eng"}) buf = make([]rune, 0) } return result }
func cut_DAG(sentence string) []WordTag { dag := jiebago.GetDAG(sentence) routes := jiebago.Calc(sentence, dag, 0) x := 0 var y int runes := []rune(sentence) length := len(runes) result := make([]WordTag, 0) buf := make([]rune, 0) for { if x >= length { break } y = routes[x].Index + 1 l_word := runes[x:y] if y-x == 1 { buf = append(buf, l_word...) } else { if len(buf) > 0 { if len(buf) == 1 { sbuf := string(buf) if tag, ok := WordTagTab[sbuf]; ok { result = append(result, WordTag{sbuf, tag}) } else { result = append(result, WordTag{sbuf, "x"}) } buf = make([]rune, 0) } else { bufString := string(buf) if _, ok := jiebago.TT.Freq[bufString]; !ok { recognized := cutDetail(bufString) for _, t := range recognized { result = append(result, t) } } else { for _, elem := range buf { selem := string(elem) if tag, ok := WordTagTab[selem]; ok { result = append(result, WordTag{string(elem), tag}) } else { result = append(result, WordTag{string(elem), "x"}) } } } buf = make([]rune, 0) } } sl_word := string(l_word) if tag, ok := WordTagTab[sl_word]; ok { result = append(result, WordTag{sl_word, tag}) } else { result = append(result, WordTag{sl_word, "x"}) } } x = y } if len(buf) > 0 { if len(buf) == 1 { sbuf := string(buf) if tag, ok := WordTagTab[sbuf]; ok { result = append(result, WordTag{sbuf, tag}) } else { result = append(result, WordTag{sbuf, "x"}) } } else { bufString := string(buf) if _, ok := jiebago.TT.Freq[bufString]; !ok { recognized := cutDetail(bufString) for _, t := range recognized { result = append(result, t) } } else { for _, elem := range buf { selem := string(elem) if tag, ok := WordTagTab[selem]; ok { result = append(result, WordTag{selem, tag}) } else { result = append(result, WordTag{selem, "x"}) } } } } } return result }