func cutDetail(sentence string) []WordTag { result := make([]WordTag, 0) re_han := regexp.MustCompile(`\p{Han}+`) re_skip := regexp.MustCompile(`[[\.[:digit:]]+|[:alnum:]]+`) re_eng := regexp.MustCompile(`[[:alnum:]]`) re_num := regexp.MustCompile(`[\.[:digit:]]+`) blocks := jiebago.RegexpSplit(re_han, sentence) for _, blk := range blocks { if re_han.MatchString(blk) { for _, wordTag := range __cut(blk) { result = append(result, wordTag) } } else { for _, x := range jiebago.RegexpSplit(re_skip, blk) { if len(x) == 0 { continue } switch { case re_num.MatchString(x): result = append(result, WordTag{x, "m"}) case re_eng.MatchString(x): result = append(result, WordTag{x, "eng"}) default: result = append(result, WordTag{x, "x"}) } } } } return result }
func cut(sentence string, HMM bool) []WordTag { result := make([]WordTag, 0) re_han := regexp.MustCompile(`([\p{Han}+[:alnum:]+#&\._]+)`) re_skip := regexp.MustCompile(`(\r\n|\s)`) re_eng := regexp.MustCompile(`[[:alnum:]]`) re_num := regexp.MustCompile(`[\.[:digit:]]+`) blocks := jiebago.RegexpSplit(re_han, sentence) var cut_block cutAction if HMM { cut_block = cut_DAG } else { cut_block = cut_DAG_NO_HMM } for _, blk := range blocks { if re_han.MatchString(blk) { for _, wordTag := range cut_block(blk) { result = append(result, wordTag) } } else { for _, x := range jiebago.RegexpSplit(re_skip, blk) { if re_skip.MatchString(x) { result = append(result, WordTag{x, "x"}) } else { for _, xx := range x { s := string(xx) switch { case re_num.MatchString(s): result = append(result, WordTag{s, "m"}) case re_eng.MatchString(x): result = append(result, WordTag{x, "eng"}) break default: result = append(result, WordTag{s, "x"}) } } } } } } return result }