Beispiel #1
0
func (seg *Segmenter) cutDAG(sentence string) <-chan string {
	result := make(chan string)
	go func() {
		runes := []rune(sentence)
		routes := seg.calc(runes)
		var y int
		length := len(runes)
		var buf []rune
		for x := 0; x < length; {
			y = routes[x].index + 1
			frag := runes[x:y]
			if y-x == 1 {
				buf = append(buf, frag...)
			} else {
				if len(buf) > 0 {
					bufString := string(buf)
					if len(buf) == 1 {
						result <- bufString
					} else {
						if v, ok := seg.dict.Frequency(bufString); !ok || v == 0.0 {
							for x := range finalseg.Cut(bufString) {
								result <- x
							}
						} else {
							for _, elem := range buf {
								result <- string(elem)
							}
						}
					}
					buf = make([]rune, 0)
				}
				result <- string(frag)
			}
			x = y
		}

		if len(buf) > 0 {
			bufString := string(buf)
			if len(buf) == 1 {
				result <- bufString
			} else {
				if v, ok := seg.dict.Frequency(bufString); !ok || v == 0.0 {
					for t := range finalseg.Cut(bufString) {
						result <- t
					}
				} else {
					for _, elem := range buf {
						result <- string(elem)
					}
				}
			}
		}
		close(result)
	}()
	return result
}
Beispiel #2
0
func cut_DAG(sentence string) []string {
	dag := GetDAG(sentence)
	routes := Calc(sentence, dag, 0)
	x := 0
	var y int
	runes := []rune(sentence)
	length := len(runes)
	result := make([]string, 0)
	buf := make([]rune, 0)
	for {
		if x >= length {
			break
		}
		y = routes[x].Index + 1
		l_word := runes[x:y]
		if y-x == 1 {
			buf = append(buf, l_word...)
		} else {
			if len(buf) > 0 {
				if len(buf) == 1 {
					result = append(result, string(buf))
					buf = make([]rune, 0)
				} else {
					bufString := string(buf)
					if _, ok := TT.Freq[bufString]; !ok {
						recognized := finalseg.Cut(bufString)
						for _, t := range recognized {
							result = append(result, t)
						}
					} else {
						for _, elem := range buf {
							result = append(result, string(elem)) // TODO: I don't get this?
						}
					}
					buf = make([]rune, 0)
				}
			}
			result = append(result, string(l_word))
		}
		x = y
	}

	if len(buf) > 0 {
		if len(buf) == 1 {
			result = append(result, string(buf))
		} else {
			bufString := string(buf)
			if _, ok := TT.Freq[bufString]; !ok {
				recognized := finalseg.Cut(bufString)
				for _, t := range recognized {
					result = append(result, t)
				}
			} else {
				for _, elem := range buf {
					result = append(result, string(elem)) // TODO: I don't get this?
				}
			}
		}
	}
	return result
}