// 给文档评分并排序 func (ranker *Ranker) Rank( docs []types.IndexedDocument, options types.RankOptions, countDocsOnly bool) (types.ScoredDocuments, int) { if ranker.initialized == false { log.Fatal("排序器尚未初始化") } // 对每个文档评分 var outputDocs types.ScoredDocuments numDocs := 0 for _, d := range docs { ranker.lock.RLock() // 判断doc是否存在 if _, ok := ranker.lock.docs[d.DocId]; ok { fs := ranker.lock.fields[d.DocId] ranker.lock.RUnlock() // 计算评分并剔除没有分值的文档 scores := options.ScoringCriteria.Score(d, fs) if len(scores) > 0 { if !countDocsOnly { outputDocs = append(outputDocs, types.ScoredDocument{ DocId: d.DocId, Scores: scores, TokenSnippetLocations: d.TokenSnippetLocations, TokenLocations: d.TokenLocations}) } numDocs++ } } else { ranker.lock.RUnlock() } } // 排序 if !countDocsOnly { if options.ReverseOrder { sort.Sort(sort.Reverse(outputDocs)) } else { sort.Sort(outputDocs) } // 当用户要求只返回部分结果时返回部分结果 var start, end int if options.MaxOutputs != 0 { start = utils.MinInt(options.OutputOffset, len(outputDocs)) end = utils.MinInt(options.OutputOffset+options.MaxOutputs, len(outputDocs)) } else { start = utils.MinInt(options.OutputOffset, len(outputDocs)) end = len(outputDocs) } return outputDocs[start:end], numDocs } return outputDocs, numDocs }
func (docs ScoredDocuments) Less(i, j int) bool { // 为了从大到小排序,这实际上实现的是More的功能 for iScore := 0; iScore < utils.MinInt(len(docs[i].Scores), len(docs[j].Scores)); iScore++ { if docs[i].Scores[iScore] > docs[j].Scores[iScore] { return true } else if docs[i].Scores[iScore] < docs[j].Scores[iScore] { return false } } return len(docs[i].Scores) > len(docs[j].Scores) }
// 查找满足搜索条件的文档,此函数线程安全 func (engine *Engine) Search(request types.SearchRequest) (output types.SearchResponse) { if !engine.initialized { log.Fatal("必须先初始化引擎") } var rankOptions types.RankOptions if request.RankOptions == nil { rankOptions = *engine.initOptions.DefaultRankOptions } else { rankOptions = *request.RankOptions } if rankOptions.ScoringCriteria == nil { rankOptions.ScoringCriteria = engine.initOptions.DefaultRankOptions.ScoringCriteria } // 收集关键词 tokens := []string{} if request.Text != "" { querySegments := engine.segmenter.Segment([]byte(request.Text)) for _, s := range querySegments { token := s.Token().Text() if !engine.stopTokens.IsStopToken(token) { tokens = append(tokens, s.Token().Text()) } } } else { for _, t := range request.Tokens { tokens = append(tokens, t) } } // 建立排序器返回的通信通道 rankerReturnChannel := make( chan rankerReturnRequest, engine.initOptions.NumShards) // 生成查找请求 lookupRequest := indexerLookupRequest{ countDocsOnly: request.CountDocsOnly, tokens: tokens, labels: request.Labels, docIds: request.DocIds, options: rankOptions, rankerReturnChannel: rankerReturnChannel, } // 向索引器发送查找请求 for shard := 0; shard < engine.initOptions.NumShards; shard++ { engine.indexerLookupChannels[shard] <- lookupRequest } // 从通信通道读取排序器的输出 numDocs := 0 rankOutput := types.ScoredDocuments{} timeout := request.Timeout isTimeout := false if timeout <= 0 { // 不设置超时 for shard := 0; shard < engine.initOptions.NumShards; shard++ { rankerOutput := <-rankerReturnChannel if !request.CountDocsOnly { for _, doc := range rankerOutput.docs { rankOutput = append(rankOutput, doc) } } numDocs += rankerOutput.numDocs } } else { // 设置超时 deadline := time.Now().Add(time.Nanosecond * time.Duration(NumNanosecondsInAMillisecond*request.Timeout)) for shard := 0; shard < engine.initOptions.NumShards; shard++ { select { case rankerOutput := <-rankerReturnChannel: if !request.CountDocsOnly { for _, doc := range rankerOutput.docs { rankOutput = append(rankOutput, doc) } } numDocs += rankerOutput.numDocs case <-time.After(deadline.Sub(time.Now())): isTimeout = true break } } } // 再排序 if !request.CountDocsOnly { if rankOptions.ReverseOrder { sort.Sort(sort.Reverse(rankOutput)) } else { sort.Sort(rankOutput) } } // 准备输出 output.Tokens = tokens if !request.CountDocsOnly { var start, end int if rankOptions.MaxOutputs == 0 { start = utils.MinInt(rankOptions.OutputOffset, len(rankOutput)) end = len(rankOutput) } else { start = utils.MinInt(rankOptions.OutputOffset, len(rankOutput)) end = utils.MinInt(start+rankOptions.MaxOutputs, len(rankOutput)) } output.Docs = rankOutput[start:end] } output.NumDocs = numDocs output.Timeout = isTimeout return }