Example #1
0
func (a *FragmentFormatter) Format(f *highlight.Fragment, orderedTermLocations highlight.TermLocations) string {
	rv := ""
	curr := f.Start
	for _, termLocation := range orderedTermLocations {
		if termLocation == nil {
			continue
		}
		// make sure the array positions match
		if !highlight.SameArrayPositions(f.ArrayPositions, termLocation.ArrayPositions) {
			continue
		}
		if termLocation.Start < curr {
			continue
		}
		if termLocation.End > f.End {
			break
		}
		// add the stuff before this location
		rv += string(f.Orig[curr:termLocation.Start])
		// add the color
		rv += a.before
		// add the term itself
		rv += string(f.Orig[termLocation.Start:termLocation.End])
		// reset the color
		rv += a.after
		// update current
		curr = termLocation.End
	}
	// add any remaining text after the last token
	rv += string(f.Orig[curr:f.End])

	return rv
}
Example #2
0
func (s *FragmentScorer) Score(f *highlight.Fragment) {
	score := 0.0
OUTER:
	for _, locations := range s.tlm {
		for _, location := range locations {
			if highlight.SameArrayPositions(f.ArrayPositions, location.ArrayPositions) && int(location.Start) >= f.Start && int(location.End) <= f.End {
				score += 1.0
				// once we find a term in the fragment
				// don't care about additional matches
				continue OUTER
			}
		}
	}
	f.Score = score
}
Example #3
0
func (s *Highlighter) BestFragmentsInField(dm *search.DocumentMatch, doc *document.Document, field string, num int) []string {
	tlm := dm.Locations[field]
	orderedTermLocations := highlight.OrderTermLocations(tlm)
	scorer := NewFragmentScorer(tlm)

	// score the fragments and put them into a priority queue ordered by score
	fq := make(FragmentQueue, 0)
	heap.Init(&fq)
	for _, f := range doc.Fields {
		if f.Name() == field {
			_, ok := f.(*document.TextField)
			if ok {
				termLocationsSameArrayPosition := make(highlight.TermLocations, 0)
				for _, otl := range orderedTermLocations {
					if highlight.SameArrayPositions(f.ArrayPositions(), otl.ArrayPositions) {
						termLocationsSameArrayPosition = append(termLocationsSameArrayPosition, otl)
					}
				}

				fieldData := f.Value()
				fragments := s.fragmenter.Fragment(fieldData, termLocationsSameArrayPosition)
				for _, fragment := range fragments {
					fragment.ArrayPositions = f.ArrayPositions()
					scorer.Score(fragment)
					heap.Push(&fq, fragment)
				}
			}
		}
	}

	// now find the N best non-overlapping fragments
	var bestFragments []*highlight.Fragment
	if len(fq) > 0 {
		candidate := heap.Pop(&fq)
	OUTER:
		for candidate != nil && len(bestFragments) < num {
			// see if this overlaps with any of the best already identified
			if len(bestFragments) > 0 {
				for _, frag := range bestFragments {
					if candidate.(*highlight.Fragment).Overlaps(frag) {
						if len(fq) < 1 {
							break OUTER
						}
						candidate = heap.Pop(&fq)
						continue OUTER
					}
				}
				bestFragments = append(bestFragments, candidate.(*highlight.Fragment))
			} else {
				bestFragments = append(bestFragments, candidate.(*highlight.Fragment))
			}

			if len(fq) < 1 {
				break
			}
			candidate = heap.Pop(&fq)
		}
	}

	// now that we have the best fragments, we can format them
	orderedTermLocations.MergeOverlapping()
	formattedFragments := make([]string, len(bestFragments))
	for i, fragment := range bestFragments {
		formattedFragments[i] = ""
		if fragment.Start != 0 {
			formattedFragments[i] += s.sep
		}
		formattedFragments[i] += s.formatter.Format(fragment, orderedTermLocations)
		if fragment.End != len(fragment.Orig) {
			formattedFragments[i] += s.sep
		}
	}

	if dm.Fragments == nil {
		dm.Fragments = make(search.FieldFragmentMap, 0)
	}
	if len(formattedFragments) > 0 {
		dm.Fragments[field] = formattedFragments
	}

	return formattedFragments
}