func (s *Segment) processAfterSegment(text string, result *list.List) { // 匹配同义词 if s.options.SynonymOutput { node := result.Front() for node != nil { pW := node.Value.(*dict.WordInfo) synonyms := s.synonym.GetSynonyms(pW.Word) if synonyms != nil { for _, word := range synonyms { node = result.InsertAfter(dict.NewWordInfo(word, pW.Position, pW.Pos, pW.Frequency, s.params.SymbolRank, dict.TSynonym, pW.WordType), node) } } node = node.Next() } } // 通配符匹配 if s.options.WildcardOutput { // todo: >>>>>>> } }
func (s *Segment) preSegment(text string) *list.List { result := s.getInitSegment(text) runes := utils.ToRunes(text) cur := result.Front() for cur != nil { if s.options.IgnoreSpace { if cur.Value.(*dict.WordInfo).WordType == dict.TSpace { lst := cur cur = cur.Next() result.Remove(lst) continue } } switch cur.Value.(*dict.WordInfo).WordType { case dict.TSimplifiedChinese: inputText := cur.Value.(*dict.WordInfo).Word originalWordType := dict.TSimplifiedChinese pls := s.wordDictionary.GetAllMatchs(inputText, s.options.ChineseNameIdentify) chsMatch := match.NewChsFullTextMatch(s.wordDictionary) chsMatch.SetOptionParams(s.options, s.params) chsMatchWords := chsMatch.Match(pls, inputText) curChsMatch := chsMatchWords.Front() for curChsMatch != nil { wi := curChsMatch.Value.(*dict.WordInfo) wi.Position += cur.Value.(*dict.WordInfo).Position wi.OriginalWordType = originalWordType wi.WordType = originalWordType curChsMatch = curChsMatch.Next() } rcur := utils.InsertAfterList(result, chsMatchWords, cur) removeItem := cur cur = rcur.Next() result.Remove(removeItem) case dict.TEnglish: cur.Value.(*dict.WordInfo).Rank = s.params.EnglishRank cur.Value.(*dict.WordInfo).Word = s.convertChineseCapicalToAsiic(cur.Value.(*dict.WordInfo).Word) if s.options.IgnoreCapital { cur.Value.(*dict.WordInfo).Word = strings.ToLower(cur.Value.(*dict.WordInfo).Word) } if s.options.EnglishSegment { lower := strings.ToLower(cur.Value.(*dict.WordInfo).Word) if lower != cur.Value.(*dict.WordInfo).Word { result.InsertBefore(dict.NewWordInfo(lower, cur.Value.(*dict.WordInfo).Position, dict.POS_A_NX, 1, s.params.EnglishLowerRank, dict.TEnglish, dict.TEnglish), cur) } stem := s.getStem(lower) if len(stem) > 0 { if lower != stem { result.InsertBefore(dict.NewWordInfo(stem, cur.Value.(*dict.WordInfo).Position, dict.POS_A_NX, 1, s.params.EnglishStemRank, dict.TEnglish, dict.TEnglish), cur) } } } if s.options.EnglishMultiDimensionality { needSplit := false for _, c := range cur.Value.(*dict.WordInfo).Word { if (c >= '0' && c <= '9') || (c == '_') { needSplit = true break } } if needSplit { output := s.re.FindAllString(cur.Value.(*dict.WordInfo).Word, -1) if len(output) > 1 { position := cur.Value.(*dict.WordInfo).Position for _, splitWord := range output { if len(splitWord) == 0 { continue } var wi *dict.WordInfo r := utils.FirstRune(splitWord) if r >= '0' && r <= '9' { wi = dict.NewWordInfoSome(splitWord, dict.POS_A_M, 1) wi.Position = position wi.Rank = s.params.NumericRank wi.OriginalWordType = dict.TEnglish wi.WordType = dict.TNumeric } else { wi = dict.NewWordInfoSome(splitWord, dict.POS_A_NX, 1) wi.Position = position wi.Rank = s.params.EnglishRank wi.OriginalWordType = dict.TEnglish wi.WordType = dict.TEnglish } result.InsertBefore(wi, cur) position += utils.RuneLen(splitWord) } } } } var ok bool if ok, cur = s.mergeEnglishSpecialWord(runes, result, cur); !ok { cur = cur.Next() } case dict.TNumeric: cur.Value.(*dict.WordInfo).Word = s.convertChineseCapicalToAsiic(cur.Value.(*dict.WordInfo).Word) cur.Value.(*dict.WordInfo).Rank = s.params.NumericRank var ok bool if ok, cur = s.mergeEnglishSpecialWord(runes, result, cur); !ok { cur = cur.Next() } case dict.TSymbol: cur.Value.(*dict.WordInfo).Rank = s.params.SymbolRank cur = cur.Next() default: cur = cur.Next() } } return result }