示例#1
0
func search(q string) (*SearchResult, villa.StrSet, error) {
	tokens := gcse.AppendTokens(nil, []byte(q))
	log.Printf("tokens for query %s: %v", q, tokens)

	indexDB := indexDBBox.Get().(*index.TokenSetSearcher)

	if indexDB == nil {
		return &SearchResult{}, tokens, nil
	}

	var hits []*Hit

	N := indexDB.DocCount()
	Df := func(token string) int {
		return len(indexDB.TokenDocList(gcse.IndexTextField, token))
	}

	_, _ = N, Df

	indexDB.Search(map[string]villa.StrSet{gcse.IndexTextField: tokens},
		func(docID int32, data interface{}) error {
			hitInfo, _ := data.(gcse.HitInfo)
			hit := &Hit{
				HitInfo: hitInfo,
			}

			hit.MatchScore = gcse.CalcMatchScore(&hitInfo, tokens, N, Df)
			hit.Score = hit.StaticScore * hit.MatchScore

			hits = append(hits, hit)
			return nil
		})

	log.Printf("Got %d hits for query %q", len(hits), q)

	villa.SortF(len(hits), func(i, j int) bool {
		// true if doc i is before doc j
		ssi, ssj := hits[i].Score, hits[j].Score
		if ssi > ssj {
			return true
		}
		if ssi < ssj {
			return false
		}

		sci, scj := hits[i].StarCount, hits[j].StarCount
		if sci > scj {
			return true
		}
		if sci < scj {
			return false
		}

		pi, pj := hits[i].Package, hits[j].Package
		if len(pi) < len(pj) {
			return true
		}
		if len(pi) > len(pj) {
			return false
		}

		return pi < pj
	}, func(i, j int) {
		// Swap
		hits[i], hits[j] = hits[j], hits[i]
	})

	return &SearchResult{
		TotalResults: len(hits),
		Hits:         hits,
	}, tokens, nil
}
示例#2
0
文件: search.go 项目: ishawge/gcse
func search(q string) (*SearchResult, villa.StrSet, error) {
	tokens := gcse.AppendTokens(nil, []byte(q))
	tokenList := tokens.Elements()
	log.Printf("tokens for query %s: %v", q, tokens)

	indexDB := indexDBBox.Get().(*index.TokenSetSearcher)

	if indexDB == nil {
		return &SearchResult{}, tokens, nil
	}

	var hits []*Hit

	N := indexDB.DocCount()
	TextDf := func(token string) int {
		return len(indexDB.TokenDocList(gcse.IndexTextField, token))
	}
	NameDf := func(token string) int {
		return len(indexDB.TokenDocList(gcse.IndexNameField, token))
	}

	textIdfs := make([]float64, len(tokenList))
	nameIdfs := make([]float64, len(tokenList))
	for i := range textIdfs {
		textIdfs[i] = idf(TextDf(tokenList[i]), N)
		nameIdfs[i] = idf(NameDf(tokenList[i]), N)
	}

	indexDB.Search(map[string]villa.StrSet{gcse.IndexTextField: tokens},
		func(docID int32, data interface{}) error {
			hitInfo, _ := data.(gcse.HitInfo)
			hit := &Hit{
				HitInfo: hitInfo,
			}

			hit.MatchScore = gcse.CalcMatchScore(&hitInfo, tokenList,
				textIdfs, nameIdfs)
			hit.Score = maxF(hit.StaticScore, hit.TestStaticScore) *
				hit.MatchScore

			hits = append(hits, hit)
			return nil
		})

	log.Printf("Got %d hits for query %q", len(hits), q)

	swapHits := func(i, j int) {
		hits[i], hits[j] = hits[j], hits[i]
	}

	sortp.SortF(len(hits), func(i, j int) bool {
		// true if doc i is before doc j
		ssi, ssj := hits[i].Score, hits[j].Score
		if ssi > ssj {
			return true
		}
		if ssi < ssj {
			return false
		}

		sci, scj := hits[i].StarCount, hits[j].StarCount
		if sci > scj {
			return true
		}
		if sci < scj {
			return false
		}

		pi, pj := hits[i].Package, hits[j].Package
		if len(pi) < len(pj) {
			return true
		}
		if len(pi) > len(pj) {
			return false
		}

		return pi < pj
	}, swapHits)

	if len(hits) < 5000 {
		// Adjust Score by down ranking duplicated packages
		pkgCount := make(map[string]int)
		for _, hit := range hits {
			cnt := pkgCount[hit.Name] + 1
			pkgCount[hit.Name] = cnt
			if cnt > 1 && len(hit.Imported) == 0 && len(hit.TestImported) == 0 {
				hit.Score /= float64(cnt)
			}
		}

		// Re-sort
		sortp.BubbleF(len(hits), func(i, j int) bool {
			return hits[i].Score > hits[j].Score
		}, swapHits)
	}

	return &SearchResult{
		TotalResults: len(hits),
		Hits:         hits,
	}, tokens, nil
}
示例#3
0
func search(tr trace.Trace, db database, q string) (*SearchResult, stringsp.Set, error) {
	tokens := gcse.AppendTokens(nil, []byte(q))
	tokenList := tokens.Elements()
	log.Printf("tokens for query %s: %v", q, tokens)

	var hits []*Hit

	N := db.PackageCount()
	textIdfs := make([]float64, len(tokenList))
	nameIdfs := make([]float64, len(tokenList))
	for i := range textIdfs {
		textIdfs[i] = idf(db.PackageCountOfToken(gcse.IndexTextField, tokenList[i]), N)
		nameIdfs[i] = idf(db.PackageCountOfToken(gcse.IndexNameField, tokenList[i]), N)
	}

	db.Search(map[string]stringsp.Set{gcse.IndexTextField: tokens},
		func(docID int32, data interface{}) error {
			hit := &Hit{}
			var ok bool
			hit.HitInfo, ok = data.(gcse.HitInfo)
			if !ok {
				log.Print("ok = false")
			}

			hit.MatchScore = gcse.CalcMatchScore(&hit.HitInfo, tokenList, textIdfs, nameIdfs)
			hit.Score = math.Max(hit.StaticScore, hit.TestStaticScore) * hit.MatchScore

			hits = append(hits, hit)
			return nil
		})
	tr.LazyPrintf("Got %d hits for query %q", len(hits), q)

	swapHits := func(i, j int) {
		hits[i], hits[j] = hits[j], hits[i]
	}
	sortp.SortF(len(hits), func(i, j int) bool {
		// true if doc i is before doc j
		ssi, ssj := hits[i].Score, hits[j].Score
		if ssi > ssj {
			return true
		}
		if ssi < ssj {
			return false
		}
		sci, scj := hits[i].StarCount, hits[j].StarCount
		if sci > scj {
			return true
		}
		if sci < scj {
			return false
		}
		pi, pj := hits[i].Package, hits[j].Package
		if len(pi) < len(pj) {
			return true
		}
		if len(pi) > len(pj) {
			return false
		}
		return pi < pj
	}, swapHits)

	tr.LazyPrintf("Results sorted")

	if len(hits) < 5000 {
		// Adjust Score by down ranking duplicated packages
		pkgCount := make(map[string]int)
		for _, hit := range hits {
			cnt := pkgCount[hit.Name] + 1
			pkgCount[hit.Name] = cnt
			if cnt > 1 && hit.ImportedLen == 0 && hit.TestImportedLen == 0 {
				hit.Score /= float64(cnt)
			}
		}
		// Re-sort
		sortp.BubbleF(len(hits), func(i, j int) bool {
			return hits[i].Score > hits[j].Score
		}, swapHits)
		tr.LazyPrintf("Results reranked")
	}
	return &SearchResult{
		TotalResults: len(hits),
		Hits:         hits,
	}, tokens, nil
}