func (seg *Segmenter) cutDetail(sentence string) <-chan Segment { result := make(chan Segment) go func() { for _, blk := range util.RegexpSplit(reHanDetail, sentence, -1) { if reHanDetail.MatchString(blk) { for segment := range seg.cutDetailInternal(blk) { result <- segment } continue } for _, x := range util.RegexpSplit(reSkipDetail, blk, -1) { if len(x) == 0 { continue } switch { case reNum.MatchString(x): result <- Segment{x, "m"} case reEng.MatchString(x): result <- Segment{x, "eng"} default: result <- Segment{x, "x"} } } } close(result) }() return result }
// Cut cuts a sentence into words using accurate mode. // Parameter hmm controls whether to use the Hidden Markov Model. // Accurate mode attempts to cut the sentence into the most accurate // segmentations, which is suitable for text analysis. func (seg *Segmenter) Cut(sentence string, hmm bool) <-chan string { result := make(chan string) var cut cutFunc if hmm { cut = seg.cutDAG } else { cut = seg.cutDAGNoHMM } go func() { for _, block := range util.RegexpSplit(reHanDefault, sentence, -1) { if len(block) == 0 { continue } if reHanDefault.MatchString(block) { for x := range cut(block) { result <- x } continue } for _, subBlock := range util.RegexpSplit(reSkipDefault, block, -1) { if reSkipDefault.MatchString(subBlock) { result <- subBlock continue } for _, r := range subBlock { result <- string(r) } } } close(result) }() return result }
// Cut cuts a sentence into words. // Parameter hmm controls whether to use the Hidden Markov Model. func (seg *Segmenter) Cut(sentence string, hmm bool) <-chan Segment { result := make(chan Segment) var cut cutFunc if hmm { cut = seg.cutDAG } else { cut = seg.cutDAGNoHMM } go func() { for _, blk := range util.RegexpSplit(reHanInternal, sentence, -1) { if reHanInternal.MatchString(blk) { for wordTag := range cut(blk) { result <- wordTag } continue } for _, x := range util.RegexpSplit(reSkipInternal, blk, -1) { if reSkipInternal.MatchString(x) { result <- Segment{x, "x"} continue } for _, xx := range x { s := string(xx) switch { case reNum.MatchString(s): result <- Segment{s, "m"} case reEng.MatchString(x): result <- Segment{x, "eng"} default: result <- Segment{s, "x"} } } } } close(result) }() return result }
// CutAll cuts a sentence into words using full mode. // Full mode gets all the possible words from the sentence. // Fast but not accurate. func (seg *Segmenter) CutAll(sentence string) <-chan string { result := make(chan string) go func() { for _, block := range util.RegexpSplit(reHanCutAll, sentence, -1) { if len(block) == 0 { continue } if reHanCutAll.MatchString(block) { for x := range seg.cutAll(block) { result <- x } continue } for _, subBlock := range reSkipCutAll.Split(block, -1) { result <- subBlock } } close(result) }() return result }