func (docs ScoredDocuments) Less(i, j int) bool { // 为了从大到小排序,这实际上实现的是More的功能 for iScore := 0; iScore < utils.MinInt(len(docs[i].Scores), len(docs[j].Scores)); iScore++ { if docs[i].Scores[iScore] > docs[j].Scores[iScore] { return true } else if docs[i].Scores[iScore] < docs[j].Scores[iScore] { return false } } return len(docs[i].Scores) > len(docs[j].Scores) }
// 给文档评分并排序 func (self *WuKongRanker) Rank( docs []search.IndexedDocument, options search.RankOptions) (outputDocs search.ScoredDocuments) { if self.initialized == false { log.Fatal("排序器尚未初始化") } // 对每个文档评分 for _, d := range docs { self.lock.RLock() fs := self.lock.fields[d.DocId] self.lock.RUnlock() // 计算评分并剔除没有分值的文档 scores := options.SearchScorer.Score(d, fs) if len(scores) > 0 { outputDocs = append(outputDocs, search.ScoredDocument{ DocId: d.DocId, Scores: scores, TokenSnippetLocations: d.TokenSnippetLocations, TokenLocations: d.TokenLocations}) } } // 排序 if options.ReverseOrder { sort.Sort(sort.Reverse(outputDocs)) } else { sort.Sort(outputDocs) } // 当用户要求只返回部分结果时返回部分结果 var start, end int if options.MaxOutputs != 0 { start = utils.MinInt(options.OutputOffset, len(outputDocs)) end = utils.MinInt(options.OutputOffset+options.MaxOutputs, len(outputDocs)) } else { start = utils.MinInt(options.OutputOffset, len(outputDocs)) end = len(outputDocs) } return outputDocs[start:end] }
// 查找满足搜索条件的文档,此函数线程安全 func (engine *Engine) Search(request SearchRequest) (output SearchResponse) { if !engine.initialized { log.Fatal("必须先初始化引擎") } var rankOptions RankOptions rankOptions.SearchScorer = engine.initOptions.SearchScorer if request.RankOptions == nil { log.Println("必须设置搜索排序选项") return } else { rankOptions = *request.RankOptions } if rankOptions.SearchScorer == nil { log.Println("必须设置打分器") return } // 收集关键词 tokens := []string{} if request.Text != "" { querySegments := engine.segmenter.Cut([]byte(request.Text), true) for _, s := range querySegments { token := s.GetToken().GetText() if !engine.stopTokens.IsStopToken(token) { tokens = append(tokens, s.GetToken().GetText()) } } } else { for _, t := range request.Tokens { tokens = append(tokens, t) } } // 建立排序器返回的通信通道 rankerReturnChannel := make( chan rankerReturnRequest, engine.initOptions.NumShards) // 生成查找请求 lookupRequest := indexerLookupRequest{ tokens: tokens, labels: request.Labels, docIds: request.DocIds, options: rankOptions, rankerReturnChannel: rankerReturnChannel} // 向索引器发送查找请求 for shard := 0; shard < engine.initOptions.NumShards; shard++ { engine.indexerLookupChannels[shard] <- lookupRequest } // 从通信通道读取排序器的输出 rankOutput := ScoredDocuments{} timeout := request.Timeout isTimeout := false if timeout <= 0 { // 不设置超时 for shard := 0; shard < engine.initOptions.NumShards; shard++ { rankerOutput := <-rankerReturnChannel for _, doc := range rankerOutput.docs { rankOutput = append(rankOutput, doc) } } } else { // 设置超时 deadline := time.Now().Add(time.Nanosecond * time.Duration(NumNanosecondsInAMillisecond*request.Timeout)) for shard := 0; shard < engine.initOptions.NumShards; shard++ { select { case rankerOutput := <-rankerReturnChannel: for _, doc := range rankerOutput.docs { rankOutput = append(rankOutput, doc) } case <-time.After(deadline.Sub(time.Now())): isTimeout = true break } } } // 再排序 if rankOptions.ReverseOrder { sort.Sort(sort.Reverse(rankOutput)) } else { sort.Sort(rankOutput) } // 准备输出 output.Tokens = tokens var start, end int if rankOptions.MaxOutputs == 0 { start = utils.MinInt(rankOptions.OutputOffset, len(rankOutput)) end = len(rankOutput) } else { start = utils.MinInt(rankOptions.OutputOffset, len(rankOutput)) end = utils.MinInt(start+rankOptions.MaxOutputs, len(rankOutput)) } output.Docs = rankOutput[start:end] output.Timeout = isTimeout return }