func (p *Pattern) anchoredForwardSearch(it *colltab.Iter) (start, end int) { if it.Next() { if end := p.searchOnce(it); end != -1 { return 0, end } } return -1, -1 }
// CompileString compiles and returns a pattern that can be used for faster // searching. func (m *Matcher) CompileString(s string) *Pattern { p := &Pattern{m: m} iter := newcolltab.Iter{Weighter: m.w} for iter.SetInputString(s); iter.Next(); { } p.ce = iter.Elems p.deleteEmptyElements() return p }
// Compile compiles and returns a pattern that can be used for faster searching. func (m *Matcher) Compile(b []byte) *Pattern { p := &Pattern{m: m} iter := colltab.Iter{Weighter: m.w} for iter.SetInput(b); iter.Next(); { } p.ce = iter.Elems p.deleteEmptyElements() return p }
func (p *Pattern) forwardSearch(it *colltab.Iter) (start, end int) { for start := 0; it.Next(); it.Reset(start) { nextStart := it.End() if end := p.searchOnce(it); end != -1 { return start, end } start = nextStart } return -1, -1 }
// searchOnce tries to match the pattern s.p at the text position i. s.buf needs // to be filled with collation elements of the first segment, where n is the // number of source bytes consumed for this segment. It will return the end // position of the match or -1. func (p *Pattern) searchOnce(it *colltab.Iter) (end int) { var pLevel [4]int m := p.m for { k := 0 for ; k < it.N; k++ { if v := it.Elems[k].Primary(); v > 0 { if w, ok := p.next(&pLevel[0], colltab.Elem.Primary); !ok || v != w { return -1 } } if !m.ignoreDiacritics { if v := it.Elems[k].Secondary(); v > 0 { if w, ok := p.next(&pLevel[1], colltab.Elem.Secondary); !ok || v != w { return -1 } } } else if it.Elems[k].Primary() == 0 { // We ignore tertiary values of collation elements of the // secondary level. continue } // TODO: distinguish between case and width. This will be easier to // implement after we moved to the new collation implementation. if !m.ignoreWidth && !m.ignoreCase { if v := it.Elems[k].Tertiary(); v > 0 { if w, ok := p.next(&pLevel[2], tertiary); !ok || int(v) != w { return -1 } } } // TODO: check quaternary weight } it.Discard() // Remove the current segment from the buffer. // Check for completion. switch { // If any of these cases match, we are not at the end. case pLevel[0] < len(p.ce): case !m.ignoreDiacritics && pLevel[1] < len(p.ce): case !(m.ignoreWidth || m.ignoreCase) && pLevel[2] < len(p.ce): default: // At this point, both the segment and pattern has matched fully. // However, the segment may still be have trailing modifiers. // This can be verified by another call to next. end = it.End() if it.Next() && it.Elems[0].Primary() == 0 { if !m.ignoreDiacritics { return -1 } end = it.End() } return end } // Fill the buffer with the next batch of collation elements. if !it.Next() { return -1 } } }