Example #1
0
func cutDetail(sentence string) []WordTag {
	result := make([]WordTag, 0)
	re_han := regexp.MustCompile(`\p{Han}+`)
	re_skip := regexp.MustCompile(`[[\.[:digit:]]+|[:alnum:]]+`)

	re_eng := regexp.MustCompile(`[[:alnum:]]`)
	re_num := regexp.MustCompile(`[\.[:digit:]]+`)
	blocks := jiebago.RegexpSplit(re_han, sentence)
	for _, blk := range blocks {
		if re_han.MatchString(blk) {
			for _, wordTag := range __cut(blk) {
				result = append(result, wordTag)
			}
		} else {
			for _, x := range jiebago.RegexpSplit(re_skip, blk) {
				if len(x) == 0 {
					continue
				}
				switch {
				case re_num.MatchString(x):
					result = append(result, WordTag{x, "m"})
				case re_eng.MatchString(x):
					result = append(result, WordTag{x, "eng"})
				default:
					result = append(result, WordTag{x, "x"})
				}
			}
		}
	}

	return result
}
Example #2
0
func cut(sentence string, HMM bool) []WordTag {
	result := make([]WordTag, 0)
	re_han := regexp.MustCompile(`([\p{Han}+[:alnum:]+#&\._]+)`)
	re_skip := regexp.MustCompile(`(\r\n|\s)`)
	re_eng := regexp.MustCompile(`[[:alnum:]]`)
	re_num := regexp.MustCompile(`[\.[:digit:]]+`)
	blocks := jiebago.RegexpSplit(re_han, sentence)
	var cut_block cutAction
	if HMM {
		cut_block = cut_DAG
	} else {
		cut_block = cut_DAG_NO_HMM
	}
	for _, blk := range blocks {
		if re_han.MatchString(blk) {
			for _, wordTag := range cut_block(blk) {
				result = append(result, wordTag)
			}
		} else {
			for _, x := range jiebago.RegexpSplit(re_skip, blk) {
				if re_skip.MatchString(x) {
					result = append(result, WordTag{x, "x"})
				} else {
					for _, xx := range x {
						s := string(xx)
						switch {
						case re_num.MatchString(s):
							result = append(result, WordTag{s, "m"})
						case re_eng.MatchString(x):
							result = append(result, WordTag{x, "eng"})
							break
						default:
							result = append(result, WordTag{s, "x"})
						}
					}
				}
			}
		}
	}
	return result
}