func (a *FragmentFormatter) Format(f *highlight.Fragment, orderedTermLocations highlight.TermLocations) string { rv := "" curr := f.Start for _, termLocation := range orderedTermLocations { if termLocation == nil { continue } // make sure the array positions match if !highlight.SameArrayPositions(f.ArrayPositions, termLocation.ArrayPositions) { continue } if termLocation.Start < curr { continue } if termLocation.End > f.End { break } // add the stuff before this location rv += string(f.Orig[curr:termLocation.Start]) // add the color rv += a.before // add the term itself rv += string(f.Orig[termLocation.Start:termLocation.End]) // reset the color rv += a.after // update current curr = termLocation.End } // add any remaining text after the last token rv += string(f.Orig[curr:f.End]) return rv }
func (s *FragmentScorer) Score(f *highlight.Fragment) { score := 0.0 OUTER: for _, locations := range s.tlm { for _, location := range locations { if highlight.SameArrayPositions(f.ArrayPositions, location.ArrayPositions) && int(location.Start) >= f.Start && int(location.End) <= f.End { score += 1.0 // once we find a term in the fragment // don't care about additional matches continue OUTER } } } f.Score = score }
func (s *Highlighter) BestFragmentsInField(dm *search.DocumentMatch, doc *document.Document, field string, num int) []string { tlm := dm.Locations[field] orderedTermLocations := highlight.OrderTermLocations(tlm) scorer := NewFragmentScorer(tlm) // score the fragments and put them into a priority queue ordered by score fq := make(FragmentQueue, 0) heap.Init(&fq) for _, f := range doc.Fields { if f.Name() == field { _, ok := f.(*document.TextField) if ok { termLocationsSameArrayPosition := make(highlight.TermLocations, 0) for _, otl := range orderedTermLocations { if highlight.SameArrayPositions(f.ArrayPositions(), otl.ArrayPositions) { termLocationsSameArrayPosition = append(termLocationsSameArrayPosition, otl) } } fieldData := f.Value() fragments := s.fragmenter.Fragment(fieldData, termLocationsSameArrayPosition) for _, fragment := range fragments { fragment.ArrayPositions = f.ArrayPositions() scorer.Score(fragment) heap.Push(&fq, fragment) } } } } // now find the N best non-overlapping fragments var bestFragments []*highlight.Fragment if len(fq) > 0 { candidate := heap.Pop(&fq) OUTER: for candidate != nil && len(bestFragments) < num { // see if this overlaps with any of the best already identified if len(bestFragments) > 0 { for _, frag := range bestFragments { if candidate.(*highlight.Fragment).Overlaps(frag) { if len(fq) < 1 { break OUTER } candidate = heap.Pop(&fq) continue OUTER } } bestFragments = append(bestFragments, candidate.(*highlight.Fragment)) } else { bestFragments = append(bestFragments, candidate.(*highlight.Fragment)) } if len(fq) < 1 { break } candidate = heap.Pop(&fq) } } // now that we have the best fragments, we can format them orderedTermLocations.MergeOverlapping() formattedFragments := make([]string, len(bestFragments)) for i, fragment := range bestFragments { formattedFragments[i] = "" if fragment.Start != 0 { formattedFragments[i] += s.sep } formattedFragments[i] += s.formatter.Format(fragment, orderedTermLocations) if fragment.End != len(fragment.Orig) { formattedFragments[i] += s.sep } } if dm.Fragments == nil { dm.Fragments = make(search.FieldFragmentMap, 0) } if len(formattedFragments) > 0 { dm.Fragments[field] = formattedFragments } return formattedFragments }