예제 #1
0
func (s *ConjunctionQueryScorer) Score(constituents []*search.DocumentMatch) *search.DocumentMatch {
	rv := search.DocumentMatch{
		ID: constituents[0].ID,
	}

	var sum float64
	var childrenExplanations []*search.Explanation
	if s.explain {
		childrenExplanations = make([]*search.Explanation, len(constituents))
	}

	locations := []search.FieldTermLocationMap{}
	for i, docMatch := range constituents {
		sum += docMatch.Score
		if s.explain {
			childrenExplanations[i] = docMatch.Expl
		}
		if docMatch.Locations != nil {
			locations = append(locations, docMatch.Locations)
		}
	}
	rv.Score = sum
	if s.explain {
		rv.Expl = &search.Explanation{Value: sum, Message: "sum of:", Children: childrenExplanations}
	}

	if len(locations) == 1 {
		rv.Locations = locations[0]
	} else if len(locations) > 1 {
		rv.Locations = search.MergeLocations(locations)
	}

	return &rv
}
예제 #2
0
func (s *DisjunctionQueryScorer) Score(constituents []*search.DocumentMatch, countMatch, countTotal int) *search.DocumentMatch {
	rv := search.DocumentMatch{
		ID: constituents[0].ID,
	}

	var sum float64
	var childrenExplanations []*search.Explanation
	if s.explain {
		childrenExplanations = make([]*search.Explanation, len(constituents))
	}

	locations := []search.FieldTermLocationMap{}
	for i, docMatch := range constituents {
		sum += docMatch.Score
		if s.explain {
			childrenExplanations[i] = docMatch.Expl
		}
		if docMatch.Locations != nil {
			locations = append(locations, docMatch.Locations)
		}
	}

	var rawExpl *search.Explanation
	if s.explain {
		rawExpl = &search.Explanation{Value: sum, Message: "sum of:", Children: childrenExplanations}
	}

	coord := float64(countMatch) / float64(countTotal)
	rv.Score = sum * coord
	if s.explain {
		ce := make([]*search.Explanation, 2)
		ce[0] = rawExpl
		ce[1] = &search.Explanation{Value: coord, Message: fmt.Sprintf("coord(%d/%d)", countMatch, countTotal)}
		rv.Expl = &search.Explanation{Value: rv.Score, Message: "product of:", Children: ce}
	}

	if len(locations) == 1 {
		rv.Locations = locations[0]
	} else if len(locations) > 1 {
		rv.Locations = search.MergeLocations(locations)
	}

	return &rv
}
예제 #3
0
func (s *ConstantScorer) Score(id string) *search.DocumentMatch {
	var scoreExplanation *search.Explanation

	score := s.constant

	if s.explain {
		scoreExplanation = &search.Explanation{
			Value:   score,
			Message: fmt.Sprintf("ConstantScore()"),
		}
	}

	// if the query weight isn't 1, multiply
	if s.queryWeight != 1.0 {
		score = score * s.queryWeight
		if s.explain {
			childExplanations := make([]*search.Explanation, 2)
			childExplanations[0] = s.queryWeightExplanation
			childExplanations[1] = scoreExplanation
			scoreExplanation = &search.Explanation{
				Value:    score,
				Message:  fmt.Sprintf("weight(^%f), product of:", s.boost),
				Children: childExplanations,
			}
		}
	}

	rv := search.DocumentMatch{
		ID:    id,
		Score: score,
	}
	if s.explain {
		rv.Expl = scoreExplanation
	}

	return &rv
}
예제 #4
0
func (s *PhraseSearcher) Next() (*search.DocumentMatch, error) {
	if !s.initialized {
		err := s.initSearchers()
		if err != nil {
			return nil, err
		}
	}

	var rv *search.DocumentMatch
	for s.currMust != nil {
		rvftlm := make(search.FieldTermLocationMap, 0)
		freq := 0
		firstTerm := s.terms[0]
		for field, termLocMap := range s.currMust.Locations {
			rvtlm := make(search.TermLocationMap, 0)
			locations, ok := termLocMap[firstTerm]
			if ok {
			OUTER:
				for _, location := range locations {
					crvtlm := make(search.TermLocationMap, 0)
				INNER:
					for i := 0; i < len(s.terms); i++ {
						nextTerm := s.terms[i]
						if nextTerm != "" {
							// look through all these term locations
							// to try and find the correct offsets
							nextLocations, ok := termLocMap[nextTerm]
							if ok {
								for _, nextLocation := range nextLocations {
									if nextLocation.Pos == location.Pos+float64(i) && nextLocation.SameArrayElement(location) {
										// found a location match for this term
										crvtlm.AddLocation(nextTerm, nextLocation)
										continue INNER
									}
								}
								// if we got here we didn't find a location match for this term
								continue OUTER
							} else {
								continue OUTER
							}
						}
					}
					// if we got here all the terms matched
					freq++
					search.MergeTermLocationMaps(rvtlm, crvtlm)
					rvftlm[field] = rvtlm
				}
			}
		}

		if freq > 0 {
			// return match
			rv = s.currMust
			rv.Locations = rvftlm
			err := s.advanceNextMust()
			if err != nil {
				return nil, err
			}
			return rv, nil
		}

		err := s.advanceNextMust()
		if err != nil {
			return nil, err
		}
	}

	return nil, nil
}
예제 #5
0
func (s *Highlighter) BestFragmentsInField(dm *search.DocumentMatch, doc *document.Document, field string, num int) []string {
	tlm := dm.Locations[field]
	orderedTermLocations := highlight.OrderTermLocations(tlm)
	scorer := NewFragmentScorer(tlm)

	// score the fragments and put them into a priority queue ordered by score
	fq := make(FragmentQueue, 0)
	heap.Init(&fq)
	for _, f := range doc.Fields {
		if f.Name() == field {
			_, ok := f.(*document.TextField)
			if ok {
				termLocationsSameArrayPosition := make(highlight.TermLocations, 0)
				for _, otl := range orderedTermLocations {
					if highlight.SameArrayPositions(f.ArrayPositions(), otl.ArrayPositions) {
						termLocationsSameArrayPosition = append(termLocationsSameArrayPosition, otl)
					}
				}

				fieldData := f.Value()
				fragments := s.fragmenter.Fragment(fieldData, termLocationsSameArrayPosition)
				for _, fragment := range fragments {
					fragment.ArrayPositions = f.ArrayPositions()
					scorer.Score(fragment)
					heap.Push(&fq, fragment)
				}
			}
		}
	}

	// now find the N best non-overlapping fragments
	var bestFragments []*highlight.Fragment
	if len(fq) > 0 {
		candidate := heap.Pop(&fq)
	OUTER:
		for candidate != nil && len(bestFragments) < num {
			// see if this overlaps with any of the best already identified
			if len(bestFragments) > 0 {
				for _, frag := range bestFragments {
					if candidate.(*highlight.Fragment).Overlaps(frag) {
						if len(fq) < 1 {
							break OUTER
						}
						candidate = heap.Pop(&fq)
						continue OUTER
					}
				}
				bestFragments = append(bestFragments, candidate.(*highlight.Fragment))
			} else {
				bestFragments = append(bestFragments, candidate.(*highlight.Fragment))
			}

			if len(fq) < 1 {
				break
			}
			candidate = heap.Pop(&fq)
		}
	}

	// now that we have the best fragments, we can format them
	orderedTermLocations.MergeOverlapping()
	formattedFragments := make([]string, len(bestFragments))
	for i, fragment := range bestFragments {
		formattedFragments[i] = ""
		if fragment.Start != 0 {
			formattedFragments[i] += s.sep
		}
		formattedFragments[i] += s.formatter.Format(fragment, orderedTermLocations)
		if fragment.End != len(fragment.Orig) {
			formattedFragments[i] += s.sep
		}
	}

	if dm.Fragments == nil {
		dm.Fragments = make(search.FieldFragmentMap, 0)
	}
	if len(formattedFragments) > 0 {
		dm.Fragments[field] = formattedFragments
	}

	return formattedFragments
}
예제 #6
0
func (s *TermQueryScorer) Score(termMatch *index.TermFieldDoc) *search.DocumentMatch {
	var scoreExplanation *search.Explanation

	// need to compute score
	var tf float64
	if termMatch.Freq < MaxSqrtCache {
		tf = SqrtCache[int(termMatch.Freq)]
	} else {
		tf = math.Sqrt(float64(termMatch.Freq))
	}
	score := tf * termMatch.Norm * s.idf

	if s.explain {
		childrenExplanations := make([]*search.Explanation, 3)
		childrenExplanations[0] = &search.Explanation{
			Value:   tf,
			Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, string(s.queryTerm), termMatch.Freq),
		}
		childrenExplanations[1] = &search.Explanation{
			Value:   termMatch.Norm,
			Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID),
		}
		childrenExplanations[2] = s.idfExplanation
		scoreExplanation = &search.Explanation{
			Value:    score,
			Message:  fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, string(s.queryTerm), termMatch.ID),
			Children: childrenExplanations,
		}
	}

	// if the query weight isn't 1, multiply
	if s.queryWeight != 1.0 {
		score = score * s.queryWeight
		if s.explain {
			childExplanations := make([]*search.Explanation, 2)
			childExplanations[0] = s.queryWeightExplanation
			childExplanations[1] = scoreExplanation
			scoreExplanation = &search.Explanation{
				Value:    score,
				Message:  fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, string(s.queryTerm), s.queryBoost, termMatch.ID),
				Children: childExplanations,
			}
		}
	}

	rv := search.DocumentMatch{
		ID:    termMatch.ID,
		Score: score,
	}
	if s.explain {
		rv.Expl = scoreExplanation
	}

	if termMatch.Vectors != nil && len(termMatch.Vectors) > 0 {

		rv.Locations = make(search.FieldTermLocationMap)
		for _, v := range termMatch.Vectors {
			tlm := rv.Locations[v.Field]
			if tlm == nil {
				tlm = make(search.TermLocationMap)
			}

			loc := search.Location{
				Pos:   float64(v.Pos),
				Start: float64(v.Start),
				End:   float64(v.End),
			}

			if len(v.ArrayPositions) > 0 {
				loc.ArrayPositions = make([]float64, len(v.ArrayPositions))
				for i, ap := range v.ArrayPositions {
					loc.ArrayPositions[i] = float64(ap)
				}
			}

			locations := tlm[s.queryTerm]
			if locations == nil {
				locations = make(search.Locations, 1)
				locations[0] = &loc
			} else {
				locations = append(locations, &loc)
			}
			tlm[s.queryTerm] = locations

			rv.Locations[v.Field] = tlm
		}

	}

	return &rv
}
예제 #7
0
파일: topn.go 프로젝트: bcampbell/bleve
func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.IndexReader, d *search.DocumentMatch) error {
	// increment total hits
	hc.total++
	d.HitNumber = hc.total

	// update max score
	if d.Score > hc.maxScore {
		hc.maxScore = d.Score
	}

	var err error
	// see if we need to load ID (at this early stage, for example to sort on it)
	if hc.needDocIds {
		d.ID, err = reader.ExternalID(d.IndexInternalID)
		if err != nil {
			return err
		}
	}

	// see if we need to load the stored fields
	if len(hc.neededFields) > 0 {
		// find out which fields haven't been loaded yet
		fieldsToLoad := d.CachedFieldTerms.FieldsNotYetCached(hc.neededFields)
		// look them up
		fieldTerms, err := reader.DocumentFieldTerms(d.IndexInternalID, fieldsToLoad)
		if err != nil {
			return err
		}
		// cache these as well
		if d.CachedFieldTerms == nil {
			d.CachedFieldTerms = make(map[string][]string)
		}
		d.CachedFieldTerms.Merge(fieldTerms)
	}

	// compute this hits sort value
	if len(hc.sort) == 1 && hc.cachedScoring[0] {
		d.Sort = sortByScoreOpt
	} else {
		hc.sort.Value(d)
	}

	// optimization, we track lowest sorting hit already removed from heap
	// with this one comparison, we can avoid all heap operations if
	// this hit would have been added and then immediately removed
	if hc.lowestMatchOutsideResults != nil {
		cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d, hc.lowestMatchOutsideResults)
		if cmp >= 0 {
			// this hit can't possibly be in the result set, so avoid heap ops
			ctx.DocumentMatchPool.Put(d)
			return nil
		}
	}

	hc.store.Add(d)
	if hc.store.Len() > hc.size+hc.skip {
		removed := hc.store.RemoveLast()
		if hc.lowestMatchOutsideResults == nil {
			hc.lowestMatchOutsideResults = removed
		} else {
			cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, removed, hc.lowestMatchOutsideResults)
			if cmp < 0 {
				tmp := hc.lowestMatchOutsideResults
				hc.lowestMatchOutsideResults = removed
				ctx.DocumentMatchPool.Put(tmp)
			}
		}
	}

	return nil
}