func search(q string) (*SearchResult, villa.StrSet, error) { tokens := gcse.AppendTokens(nil, []byte(q)) log.Printf("tokens for query %s: %v", q, tokens) indexDB := indexDBBox.Get().(*index.TokenSetSearcher) if indexDB == nil { return &SearchResult{}, tokens, nil } var hits []*Hit N := indexDB.DocCount() Df := func(token string) int { return len(indexDB.TokenDocList(gcse.IndexTextField, token)) } _, _ = N, Df indexDB.Search(map[string]villa.StrSet{gcse.IndexTextField: tokens}, func(docID int32, data interface{}) error { hitInfo, _ := data.(gcse.HitInfo) hit := &Hit{ HitInfo: hitInfo, } hit.MatchScore = gcse.CalcMatchScore(&hitInfo, tokens, N, Df) hit.Score = hit.StaticScore * hit.MatchScore hits = append(hits, hit) return nil }) log.Printf("Got %d hits for query %q", len(hits), q) villa.SortF(len(hits), func(i, j int) bool { // true if doc i is before doc j ssi, ssj := hits[i].Score, hits[j].Score if ssi > ssj { return true } if ssi < ssj { return false } sci, scj := hits[i].StarCount, hits[j].StarCount if sci > scj { return true } if sci < scj { return false } pi, pj := hits[i].Package, hits[j].Package if len(pi) < len(pj) { return true } if len(pi) > len(pj) { return false } return pi < pj }, func(i, j int) { // Swap hits[i], hits[j] = hits[j], hits[i] }) return &SearchResult{ TotalResults: len(hits), Hits: hits, }, tokens, nil }
func search(q string) (*SearchResult, villa.StrSet, error) { tokens := gcse.AppendTokens(nil, []byte(q)) tokenList := tokens.Elements() log.Printf("tokens for query %s: %v", q, tokens) indexDB := indexDBBox.Get().(*index.TokenSetSearcher) if indexDB == nil { return &SearchResult{}, tokens, nil } var hits []*Hit N := indexDB.DocCount() TextDf := func(token string) int { return len(indexDB.TokenDocList(gcse.IndexTextField, token)) } NameDf := func(token string) int { return len(indexDB.TokenDocList(gcse.IndexNameField, token)) } textIdfs := make([]float64, len(tokenList)) nameIdfs := make([]float64, len(tokenList)) for i := range textIdfs { textIdfs[i] = idf(TextDf(tokenList[i]), N) nameIdfs[i] = idf(NameDf(tokenList[i]), N) } indexDB.Search(map[string]villa.StrSet{gcse.IndexTextField: tokens}, func(docID int32, data interface{}) error { hitInfo, _ := data.(gcse.HitInfo) hit := &Hit{ HitInfo: hitInfo, } hit.MatchScore = gcse.CalcMatchScore(&hitInfo, tokenList, textIdfs, nameIdfs) hit.Score = maxF(hit.StaticScore, hit.TestStaticScore) * hit.MatchScore hits = append(hits, hit) return nil }) log.Printf("Got %d hits for query %q", len(hits), q) swapHits := func(i, j int) { hits[i], hits[j] = hits[j], hits[i] } sortp.SortF(len(hits), func(i, j int) bool { // true if doc i is before doc j ssi, ssj := hits[i].Score, hits[j].Score if ssi > ssj { return true } if ssi < ssj { return false } sci, scj := hits[i].StarCount, hits[j].StarCount if sci > scj { return true } if sci < scj { return false } pi, pj := hits[i].Package, hits[j].Package if len(pi) < len(pj) { return true } if len(pi) > len(pj) { return false } return pi < pj }, swapHits) if len(hits) < 5000 { // Adjust Score by down ranking duplicated packages pkgCount := make(map[string]int) for _, hit := range hits { cnt := pkgCount[hit.Name] + 1 pkgCount[hit.Name] = cnt if cnt > 1 && len(hit.Imported) == 0 && len(hit.TestImported) == 0 { hit.Score /= float64(cnt) } } // Re-sort sortp.BubbleF(len(hits), func(i, j int) bool { return hits[i].Score > hits[j].Score }, swapHits) } return &SearchResult{ TotalResults: len(hits), Hits: hits, }, tokens, nil }
func search(tr trace.Trace, db database, q string) (*SearchResult, stringsp.Set, error) { tokens := gcse.AppendTokens(nil, []byte(q)) tokenList := tokens.Elements() log.Printf("tokens for query %s: %v", q, tokens) var hits []*Hit N := db.PackageCount() textIdfs := make([]float64, len(tokenList)) nameIdfs := make([]float64, len(tokenList)) for i := range textIdfs { textIdfs[i] = idf(db.PackageCountOfToken(gcse.IndexTextField, tokenList[i]), N) nameIdfs[i] = idf(db.PackageCountOfToken(gcse.IndexNameField, tokenList[i]), N) } db.Search(map[string]stringsp.Set{gcse.IndexTextField: tokens}, func(docID int32, data interface{}) error { hit := &Hit{} var ok bool hit.HitInfo, ok = data.(gcse.HitInfo) if !ok { log.Print("ok = false") } hit.MatchScore = gcse.CalcMatchScore(&hit.HitInfo, tokenList, textIdfs, nameIdfs) hit.Score = math.Max(hit.StaticScore, hit.TestStaticScore) * hit.MatchScore hits = append(hits, hit) return nil }) tr.LazyPrintf("Got %d hits for query %q", len(hits), q) swapHits := func(i, j int) { hits[i], hits[j] = hits[j], hits[i] } sortp.SortF(len(hits), func(i, j int) bool { // true if doc i is before doc j ssi, ssj := hits[i].Score, hits[j].Score if ssi > ssj { return true } if ssi < ssj { return false } sci, scj := hits[i].StarCount, hits[j].StarCount if sci > scj { return true } if sci < scj { return false } pi, pj := hits[i].Package, hits[j].Package if len(pi) < len(pj) { return true } if len(pi) > len(pj) { return false } return pi < pj }, swapHits) tr.LazyPrintf("Results sorted") if len(hits) < 5000 { // Adjust Score by down ranking duplicated packages pkgCount := make(map[string]int) for _, hit := range hits { cnt := pkgCount[hit.Name] + 1 pkgCount[hit.Name] = cnt if cnt > 1 && hit.ImportedLen == 0 && hit.TestImportedLen == 0 { hit.Score /= float64(cnt) } } // Re-sort sortp.BubbleF(len(hits), func(i, j int) bool { return hits[i].Score > hits[j].Score }, swapHits) tr.LazyPrintf("Results reranked") } return &SearchResult{ TotalResults: len(hits), Hits: hits, }, tokens, nil }