예제 #1
0
func (s *Segment) processAfterSegment(text string, result *list.List) {
	// 匹配同义词
	if s.options.SynonymOutput {
		node := result.Front()
		for node != nil {
			pW := node.Value.(*dict.WordInfo)
			synonyms := s.synonym.GetSynonyms(pW.Word)
			if synonyms != nil {
				for _, word := range synonyms {
					node = result.InsertAfter(dict.NewWordInfo(word, pW.Position, pW.Pos, pW.Frequency, s.params.SymbolRank, dict.TSynonym, pW.WordType), node)
				}
			}
			node = node.Next()
		}
	}

	// 通配符匹配
	if s.options.WildcardOutput {
		// todo: >>>>>>>
	}

}
예제 #2
0
func (s *Segment) preSegment(text string) *list.List {
	result := s.getInitSegment(text)
	runes := utils.ToRunes(text)
	cur := result.Front()
	for cur != nil {
		if s.options.IgnoreSpace {
			if cur.Value.(*dict.WordInfo).WordType == dict.TSpace {
				lst := cur
				cur = cur.Next()
				result.Remove(lst)
				continue
			}
		}
		switch cur.Value.(*dict.WordInfo).WordType {
		case dict.TSimplifiedChinese:
			inputText := cur.Value.(*dict.WordInfo).Word
			originalWordType := dict.TSimplifiedChinese
			pls := s.wordDictionary.GetAllMatchs(inputText, s.options.ChineseNameIdentify)
			chsMatch := match.NewChsFullTextMatch(s.wordDictionary)
			chsMatch.SetOptionParams(s.options, s.params)
			chsMatchWords := chsMatch.Match(pls, inputText)
			curChsMatch := chsMatchWords.Front()
			for curChsMatch != nil {
				wi := curChsMatch.Value.(*dict.WordInfo)
				wi.Position += cur.Value.(*dict.WordInfo).Position
				wi.OriginalWordType = originalWordType
				wi.WordType = originalWordType
				curChsMatch = curChsMatch.Next()
			}
			rcur := utils.InsertAfterList(result, chsMatchWords, cur)
			removeItem := cur
			cur = rcur.Next()
			result.Remove(removeItem)
		case dict.TEnglish:
			cur.Value.(*dict.WordInfo).Rank = s.params.EnglishRank
			cur.Value.(*dict.WordInfo).Word = s.convertChineseCapicalToAsiic(cur.Value.(*dict.WordInfo).Word)
			if s.options.IgnoreCapital {
				cur.Value.(*dict.WordInfo).Word = strings.ToLower(cur.Value.(*dict.WordInfo).Word)
			}

			if s.options.EnglishSegment {
				lower := strings.ToLower(cur.Value.(*dict.WordInfo).Word)
				if lower != cur.Value.(*dict.WordInfo).Word {
					result.InsertBefore(dict.NewWordInfo(lower, cur.Value.(*dict.WordInfo).Position, dict.POS_A_NX, 1, s.params.EnglishLowerRank, dict.TEnglish, dict.TEnglish), cur)
				}
				stem := s.getStem(lower)
				if len(stem) > 0 {
					if lower != stem {
						result.InsertBefore(dict.NewWordInfo(stem, cur.Value.(*dict.WordInfo).Position, dict.POS_A_NX, 1, s.params.EnglishStemRank, dict.TEnglish, dict.TEnglish), cur)
					}
				}
			}

			if s.options.EnglishMultiDimensionality {
				needSplit := false
				for _, c := range cur.Value.(*dict.WordInfo).Word {
					if (c >= '0' && c <= '9') || (c == '_') {
						needSplit = true
						break
					}
				}
				if needSplit {
					output := s.re.FindAllString(cur.Value.(*dict.WordInfo).Word, -1)
					if len(output) > 1 {
						position := cur.Value.(*dict.WordInfo).Position
						for _, splitWord := range output {
							if len(splitWord) == 0 {
								continue
							}

							var wi *dict.WordInfo
							r := utils.FirstRune(splitWord)
							if r >= '0' && r <= '9' {
								wi = dict.NewWordInfoSome(splitWord, dict.POS_A_M, 1)
								wi.Position = position
								wi.Rank = s.params.NumericRank
								wi.OriginalWordType = dict.TEnglish
								wi.WordType = dict.TNumeric
							} else {
								wi = dict.NewWordInfoSome(splitWord, dict.POS_A_NX, 1)
								wi.Position = position
								wi.Rank = s.params.EnglishRank
								wi.OriginalWordType = dict.TEnglish
								wi.WordType = dict.TEnglish
							}

							result.InsertBefore(wi, cur)
							position += utils.RuneLen(splitWord)
						}
					}
				}
			}

			var ok bool
			if ok, cur = s.mergeEnglishSpecialWord(runes, result, cur); !ok {
				cur = cur.Next()
			}

		case dict.TNumeric:
			cur.Value.(*dict.WordInfo).Word = s.convertChineseCapicalToAsiic(cur.Value.(*dict.WordInfo).Word)
			cur.Value.(*dict.WordInfo).Rank = s.params.NumericRank
			var ok bool
			if ok, cur = s.mergeEnglishSpecialWord(runes, result, cur); !ok {
				cur = cur.Next()
			}
		case dict.TSymbol:
			cur.Value.(*dict.WordInfo).Rank = s.params.SymbolRank
			cur = cur.Next()
		default:
			cur = cur.Next()
		}
	}
	return result
}