Ejemplo n.º 1
0
func (docs ScoredDocuments) Less(i, j int) bool {
	// 为了从大到小排序,这实际上实现的是More的功能
	for iScore := 0; iScore < utils.MinInt(len(docs[i].Scores), len(docs[j].Scores)); iScore++ {
		if docs[i].Scores[iScore] > docs[j].Scores[iScore] {
			return true
		} else if docs[i].Scores[iScore] < docs[j].Scores[iScore] {
			return false
		}
	}
	return len(docs[i].Scores) > len(docs[j].Scores)
}
Ejemplo n.º 2
0
// 给文档评分并排序
func (self *WuKongRanker) Rank(
	docs []search.IndexedDocument, options search.RankOptions) (outputDocs search.ScoredDocuments) {
	if self.initialized == false {
		log.Fatal("排序器尚未初始化")
	}

	// 对每个文档评分
	for _, d := range docs {
		self.lock.RLock()
		fs := self.lock.fields[d.DocId]
		self.lock.RUnlock()
		// 计算评分并剔除没有分值的文档
		scores := options.SearchScorer.Score(d, fs)
		if len(scores) > 0 {
			outputDocs = append(outputDocs, search.ScoredDocument{
				DocId:                 d.DocId,
				Scores:                scores,
				TokenSnippetLocations: d.TokenSnippetLocations,
				TokenLocations:        d.TokenLocations})
		}
	}

	// 排序
	if options.ReverseOrder {
		sort.Sort(sort.Reverse(outputDocs))
	} else {
		sort.Sort(outputDocs)
	}

	// 当用户要求只返回部分结果时返回部分结果
	var start, end int
	if options.MaxOutputs != 0 {
		start = utils.MinInt(options.OutputOffset, len(outputDocs))
		end = utils.MinInt(options.OutputOffset+options.MaxOutputs, len(outputDocs))
	} else {
		start = utils.MinInt(options.OutputOffset, len(outputDocs))
		end = len(outputDocs)
	}
	return outputDocs[start:end]
}
Ejemplo n.º 3
0
// 查找满足搜索条件的文档,此函数线程安全
func (engine *Engine) Search(request SearchRequest) (output SearchResponse) {
	if !engine.initialized {
		log.Fatal("必须先初始化引擎")
	}

	var rankOptions RankOptions
	rankOptions.SearchScorer = engine.initOptions.SearchScorer
	if request.RankOptions == nil {
		log.Println("必须设置搜索排序选项")
		return
	} else {
		rankOptions = *request.RankOptions
	}
	if rankOptions.SearchScorer == nil {
		log.Println("必须设置打分器")
		return
	}

	// 收集关键词
	tokens := []string{}
	if request.Text != "" {
		querySegments := engine.segmenter.Cut([]byte(request.Text), true)
		for _, s := range querySegments {
			token := s.GetToken().GetText()
			if !engine.stopTokens.IsStopToken(token) {
				tokens = append(tokens, s.GetToken().GetText())
			}
		}
	} else {
		for _, t := range request.Tokens {
			tokens = append(tokens, t)
		}
	}

	// 建立排序器返回的通信通道
	rankerReturnChannel := make(
		chan rankerReturnRequest, engine.initOptions.NumShards)

	// 生成查找请求
	lookupRequest := indexerLookupRequest{
		tokens:              tokens,
		labels:              request.Labels,
		docIds:              request.DocIds,
		options:             rankOptions,
		rankerReturnChannel: rankerReturnChannel}

	// 向索引器发送查找请求
	for shard := 0; shard < engine.initOptions.NumShards; shard++ {
		engine.indexerLookupChannels[shard] <- lookupRequest
	}

	// 从通信通道读取排序器的输出
	rankOutput := ScoredDocuments{}
	timeout := request.Timeout
	isTimeout := false
	if timeout <= 0 {
		// 不设置超时
		for shard := 0; shard < engine.initOptions.NumShards; shard++ {
			rankerOutput := <-rankerReturnChannel
			for _, doc := range rankerOutput.docs {
				rankOutput = append(rankOutput, doc)
			}
		}
	} else {
		// 设置超时
		deadline := time.Now().Add(time.Nanosecond * time.Duration(NumNanosecondsInAMillisecond*request.Timeout))
		for shard := 0; shard < engine.initOptions.NumShards; shard++ {
			select {
			case rankerOutput := <-rankerReturnChannel:
				for _, doc := range rankerOutput.docs {
					rankOutput = append(rankOutput, doc)
				}
			case <-time.After(deadline.Sub(time.Now())):
				isTimeout = true
				break
			}
		}
	}

	// 再排序
	if rankOptions.ReverseOrder {
		sort.Sort(sort.Reverse(rankOutput))
	} else {
		sort.Sort(rankOutput)
	}

	// 准备输出
	output.Tokens = tokens
	var start, end int
	if rankOptions.MaxOutputs == 0 {
		start = utils.MinInt(rankOptions.OutputOffset, len(rankOutput))
		end = len(rankOutput)
	} else {
		start = utils.MinInt(rankOptions.OutputOffset, len(rankOutput))
		end = utils.MinInt(start+rankOptions.MaxOutputs, len(rankOutput))
	}
	output.Docs = rankOutput[start:end]
	output.Timeout = isTimeout
	return
}