Beispiel #1
0
func cut_DAG_NO_HMM(sentence string) []WordTag {
	result := make([]WordTag, 0)
	re_eng := regexp.MustCompile(`[[:alnum:]]`)
	dag := jiebago.GetDAG(sentence)
	routes := jiebago.Calc(sentence, dag, 0)
	x := 0
	var y int
	runes := []rune(sentence)
	length := len(runes)
	buf := make([]rune, 0)
	for {
		if x >= length {
			break
		}
		y = routes[x].Index + 1
		l_word := runes[x:y]
		if re_eng.MatchString(string(l_word)) && len(l_word) == 1 {
			buf = append(buf, l_word...)
			x = y
		} else {
			if len(buf) > 0 {
				result = append(result, WordTag{string(buf), "eng"})
				buf = make([]rune, 0)
			}
			sl_word := string(l_word)
			if tag, ok := WordTagTab[sl_word]; ok {
				result = append(result, WordTag{sl_word, tag})
			} else {
				result = append(result, WordTag{sl_word, "x"})
			}
			x = y
		}
	}
	if len(buf) > 0 {
		result = append(result, WordTag{string(buf), "eng"})
		buf = make([]rune, 0)
	}
	return result
}
Beispiel #2
0
func cut_DAG(sentence string) []WordTag {
	dag := jiebago.GetDAG(sentence)
	routes := jiebago.Calc(sentence, dag, 0)
	x := 0
	var y int
	runes := []rune(sentence)
	length := len(runes)
	result := make([]WordTag, 0)
	buf := make([]rune, 0)
	for {
		if x >= length {
			break
		}
		y = routes[x].Index + 1
		l_word := runes[x:y]
		if y-x == 1 {
			buf = append(buf, l_word...)
		} else {
			if len(buf) > 0 {
				if len(buf) == 1 {
					sbuf := string(buf)
					if tag, ok := WordTagTab[sbuf]; ok {
						result = append(result, WordTag{sbuf, tag})
					} else {
						result = append(result, WordTag{sbuf, "x"})
					}
					buf = make([]rune, 0)
				} else {
					bufString := string(buf)
					if _, ok := jiebago.TT.Freq[bufString]; !ok {
						recognized := cutDetail(bufString)
						for _, t := range recognized {
							result = append(result, t)
						}
					} else {
						for _, elem := range buf {
							selem := string(elem)
							if tag, ok := WordTagTab[selem]; ok {
								result = append(result, WordTag{string(elem), tag})
							} else {
								result = append(result, WordTag{string(elem), "x"})
							}

						}
					}
					buf = make([]rune, 0)
				}
			}
			sl_word := string(l_word)
			if tag, ok := WordTagTab[sl_word]; ok {
				result = append(result, WordTag{sl_word, tag})
			} else {
				result = append(result, WordTag{sl_word, "x"})
			}
		}
		x = y
	}

	if len(buf) > 0 {
		if len(buf) == 1 {
			sbuf := string(buf)
			if tag, ok := WordTagTab[sbuf]; ok {
				result = append(result, WordTag{sbuf, tag})
			} else {
				result = append(result, WordTag{sbuf, "x"})
			}
		} else {
			bufString := string(buf)
			if _, ok := jiebago.TT.Freq[bufString]; !ok {
				recognized := cutDetail(bufString)
				for _, t := range recognized {
					result = append(result, t)
				}
			} else {
				for _, elem := range buf {
					selem := string(elem)
					if tag, ok := WordTagTab[selem]; ok {
						result = append(result, WordTag{selem, tag})
					} else {
						result = append(result, WordTag{selem, "x"})
					}
				}
			}
		}
	}
	return result
}