Esempio n. 1
0
func (seg *Segmenter) cutDetail(sentence string) <-chan Segment {
	result := make(chan Segment)
	go func() {
		for _, blk := range util.RegexpSplit(reHanDetail, sentence, -1) {
			if reHanDetail.MatchString(blk) {
				for segment := range seg.cutDetailInternal(blk) {
					result <- segment
				}
				continue
			}
			for _, x := range util.RegexpSplit(reSkipDetail, blk, -1) {
				if len(x) == 0 {
					continue
				}
				switch {
				case reNum.MatchString(x):
					result <- Segment{x, "m"}
				case reEng.MatchString(x):
					result <- Segment{x, "eng"}
				default:
					result <- Segment{x, "x"}
				}
			}
		}
		close(result)
	}()
	return result
}
Esempio n. 2
0
// Cut cuts a sentence into words using accurate mode.
// Parameter hmm controls whether to use the Hidden Markov Model.
// Accurate mode attempts to cut the sentence into the most accurate
// segmentations, which is suitable for text analysis.
func (seg *Segmenter) Cut(sentence string, hmm bool) <-chan string {
	result := make(chan string)
	var cut cutFunc
	if hmm {
		cut = seg.cutDAG
	} else {
		cut = seg.cutDAGNoHMM
	}

	go func() {
		for _, block := range util.RegexpSplit(reHanDefault, sentence, -1) {
			if len(block) == 0 {
				continue
			}
			if reHanDefault.MatchString(block) {
				for x := range cut(block) {
					result <- x
				}
				continue
			}
			for _, subBlock := range util.RegexpSplit(reSkipDefault, block, -1) {
				if reSkipDefault.MatchString(subBlock) {
					result <- subBlock
					continue
				}
				for _, r := range subBlock {
					result <- string(r)
				}
			}
		}
		close(result)
	}()
	return result
}
Esempio n. 3
0
// Cut cuts a sentence into words.
// Parameter hmm controls whether to use the Hidden Markov Model.
func (seg *Segmenter) Cut(sentence string, hmm bool) <-chan Segment {
	result := make(chan Segment)
	var cut cutFunc
	if hmm {
		cut = seg.cutDAG
	} else {
		cut = seg.cutDAGNoHMM
	}
	go func() {
		for _, blk := range util.RegexpSplit(reHanInternal, sentence, -1) {
			if reHanInternal.MatchString(blk) {
				for wordTag := range cut(blk) {
					result <- wordTag
				}
				continue
			}
			for _, x := range util.RegexpSplit(reSkipInternal, blk, -1) {
				if reSkipInternal.MatchString(x) {
					result <- Segment{x, "x"}
					continue
				}
				for _, xx := range x {
					s := string(xx)
					switch {
					case reNum.MatchString(s):
						result <- Segment{s, "m"}
					case reEng.MatchString(x):
						result <- Segment{x, "eng"}
					default:
						result <- Segment{s, "x"}
					}
				}
			}
		}
		close(result)
	}()
	return result
}
Esempio n. 4
0
// CutAll cuts a sentence into words using full mode.
// Full mode gets all the possible words from the sentence.
// Fast but not accurate.
func (seg *Segmenter) CutAll(sentence string) <-chan string {
	result := make(chan string)
	go func() {
		for _, block := range util.RegexpSplit(reHanCutAll, sentence, -1) {
			if len(block) == 0 {
				continue
			}
			if reHanCutAll.MatchString(block) {
				for x := range seg.cutAll(block) {
					result <- x
				}
				continue
			}
			for _, subBlock := range reSkipCutAll.Split(block, -1) {
				result <- subBlock
			}
		}
		close(result)
	}()
	return result
}