Example #1
0
func (s *Highlighter) BestFragmentsInField(dm *search.DocumentMatch, doc *document.Document, field string, num int) []string {
	tlm := dm.Locations[field]
	orderedTermLocations := highlight.OrderTermLocations(tlm)
	scorer := NewFragmentScorer(tlm)

	// score the fragments and put them into a priority queue ordered by score
	fq := make(FragmentQueue, 0)
	heap.Init(&fq)
	for _, f := range doc.Fields {
		if f.Name() == field {
			_, ok := f.(*document.TextField)
			if ok {
				termLocationsSameArrayPosition := make(highlight.TermLocations, 0)
				for _, otl := range orderedTermLocations {
					if highlight.SameArrayPositions(f.ArrayPositions(), otl.ArrayPositions) {
						termLocationsSameArrayPosition = append(termLocationsSameArrayPosition, otl)
					}
				}

				fieldData := f.Value()
				fragments := s.fragmenter.Fragment(fieldData, termLocationsSameArrayPosition)
				for _, fragment := range fragments {
					fragment.ArrayPositions = f.ArrayPositions()
					scorer.Score(fragment)
					heap.Push(&fq, fragment)
				}
			}
		}
	}

	// now find the N best non-overlapping fragments
	var bestFragments []*highlight.Fragment
	if len(fq) > 0 {
		candidate := heap.Pop(&fq)
	OUTER:
		for candidate != nil && len(bestFragments) < num {
			// see if this overlaps with any of the best already identified
			if len(bestFragments) > 0 {
				for _, frag := range bestFragments {
					if candidate.(*highlight.Fragment).Overlaps(frag) {
						if len(fq) < 1 {
							break OUTER
						}
						candidate = heap.Pop(&fq)
						continue OUTER
					}
				}
				bestFragments = append(bestFragments, candidate.(*highlight.Fragment))
			} else {
				bestFragments = append(bestFragments, candidate.(*highlight.Fragment))
			}

			if len(fq) < 1 {
				break
			}
			candidate = heap.Pop(&fq)
		}
	}

	// now that we have the best fragments, we can format them
	orderedTermLocations.MergeOverlapping()
	formattedFragments := make([]string, len(bestFragments))
	for i, fragment := range bestFragments {
		formattedFragments[i] = ""
		if fragment.Start != 0 {
			formattedFragments[i] += s.sep
		}
		formattedFragments[i] += s.formatter.Format(fragment, orderedTermLocations)
		if fragment.End != len(fragment.Orig) {
			formattedFragments[i] += s.sep
		}
	}

	if dm.Fragments == nil {
		dm.Fragments = make(search.FieldFragmentMap, 0)
	}
	if len(formattedFragments) > 0 {
		dm.Fragments[field] = formattedFragments
	}

	return formattedFragments
}