// New creates a RangeTable from the given runes, which may contain duplicates. func New(r ...rune) *unicode.RangeTable { if len(r) == 0 { return &unicode.RangeTable{} } sort.Sort(byRune(r)) // Remove duplicates. k := 1 for i := 1; i < len(r); i++ { if r[k-1] != r[i] { r[k] = r[i] k++ } } var rt unicode.RangeTable for _, r := range r[:k] { if r <= 0xFFFF { rt.R16 = append(rt.R16, unicode.Range16{Lo: uint16(r), Hi: uint16(r), Stride: 1}) } else { rt.R32 = append(rt.R32, unicode.Range32{Lo: uint32(r), Hi: uint32(r), Stride: 1}) } } // Optimize RangeTable. return Merge(&rt) }
func appendRange32(t *unicode.RangeTable, lo, hi uint32) error { if t.R32 == nil || len(t.R32) == 0 { t.R32 = append(t.R32, unicode.Range32{lo, hi, 1}) return nil } range_ := &t.R32[len(t.R32)-1] if lo <= range_.Hi { return fmt.Errorf("AppendRange: lo must be greater than Hi of the last range: lo=%d", lo) } if lo == hi { if range_.Hi+range_.Stride == lo { range_.Hi = lo if len(t.R32) >= 2 { prevR := &t.R32[len(t.R32)-2] if prevR.Stride > 1 && prevR.Hi+1 == range_.Lo { prevCount := (prevR.Hi-prevR.Lo)/prevR.Stride + 1 count := (range_.Hi-range_.Lo)/range_.Stride + 1 if prevCount <= count { range_.Lo-- prevR.Hi -= prevR.Stride if prevR.Lo == prevR.Hi { prevR.Stride = 1 } } } } } else if range_.Lo == range_.Hi { range_.Hi = lo range_.Stride = lo - range_.Lo } else { t.R32 = append(t.R32, unicode.Range32{lo, hi, 1}) } } else { if range_.Stride == 1 { if lo-1 <= range_.Hi { range_.Hi = hi } else { t.R32 = append(t.R32, unicode.Range32{lo, hi, 1}) } } else { if lo-1 <= range_.Hi { range_.Hi -= range_.Stride if range_.Lo == range_.Hi { range_.Stride = 1 } t.R32 = append(t.R32, unicode.Range32{lo - 1, hi, 1}) } else { t.R32 = append(t.R32, unicode.Range32{lo, hi, 1}) } } } return nil }
func addR16ToTable(r *unicode.RangeTable, r16 unicode.Range16) { if r.R16 == nil { r.R16 = make([]unicode.Range16, 0, 1) } r.R16 = append(r.R16, r16) if r16.Hi <= unicode.MaxLatin1 { r.LatinOffset++ } }
func CharClass(negated bool, table *unicode.RangeTable) Matcher { var t unicode.RangeTable t.R16 = make([]unicode.Range16, len(table.R16)) copy(t.R16, table.R16) t.R32 = make([]unicode.Range32, len(table.R32)) copy(t.R32, table.R32) t.LatinOffset = table.LatinOffset return charClassMatcher{negated, t} }
func OneOf(chars ...rune) Matcher { if len(chars) == 0 { return Fail() } var t unicode.RangeTable sort.Sort(utf8betical(chars)) for _, ch := range chars { if ch >= 0x10000 { t.R32 = append(t.R32, unicode.Range32{Lo: uint32(ch), Hi: uint32(ch), Stride: 1}) } else { t.R16 = append(t.R16, unicode.Range16{Lo: uint16(ch), Hi: uint16(ch), Stride: 1}) if ch < 0x100 { t.LatinOffset += 1 } } } return charClassMatcher{false, t} }
func Range(lo, hi rune) Matcher { if lo > hi { return Fail() } var t unicode.RangeTable if lo >= 0x10000 { t.R32 = append(t.R32, unicode.Range32{Lo: uint32(lo), Hi: uint32(hi), Stride: 1}) } else if hi < 0x10000 { t.R16 = append(t.R16, unicode.Range16{Lo: uint16(lo), Hi: uint16(hi), Stride: 1}) if hi < 0x100 { t.LatinOffset = 1 } } else { t.R16 = append(t.R16, unicode.Range16{Lo: uint16(lo), Hi: 0xffff, Stride: 1}) t.R32 = append(t.R32, unicode.Range32{Lo: 0x10000, Hi: uint32(hi), Stride: 1}) } return charClassMatcher{false, t} }
func appendRange16(t *unicode.RangeTable, lo, hi uint16) error { if t.R16 == nil || len(t.R16) == 0 { t.R16 = append(t.R16, unicode.Range16{lo, hi, 1}) if hi <= unicode.MaxLatin1 { t.LatinOffset++ } return nil } range_ := &t.R16[len(t.R16)-1] if lo <= range_.Hi { return fmt.Errorf("AppendRange: lo must be greater than Hi of the last range: lo=%d", lo) } if lo == hi { if range_.Hi+range_.Stride == lo { range_.Hi = lo if len(t.R16) >= 2 { prevR := &t.R16[len(t.R16)-2] if prevR.Stride > 1 && prevR.Hi+1 == range_.Lo { prevCount := (prevR.Hi-prevR.Lo)/prevR.Stride + 1 count := (range_.Hi-range_.Lo)/range_.Stride + 1 if prevCount <= count { range_.Lo-- prevR.Hi -= prevR.Stride if prevR.Lo == prevR.Hi { prevR.Stride = 1 } } } } } else if range_.Lo == range_.Hi { range_.Hi = lo range_.Stride = lo - range_.Lo } else { t.R16 = append(t.R16, unicode.Range16{lo, hi, 1}) if hi <= unicode.MaxLatin1 { t.LatinOffset++ } } } else { if range_.Stride == 1 { if lo-1 <= range_.Hi { range_.Hi = hi } else { if range_.Lo == range_.Hi { range_.Hi = hi range_.Stride = range_.Hi - range_.Lo } else { t.R16 = append(t.R16, unicode.Range16{lo, hi, 1}) if hi <= unicode.MaxLatin1 { t.LatinOffset++ } } } } else { if lo-1 <= range_.Hi { range_.Hi -= range_.Stride if range_.Lo == range_.Hi { range_.Stride = 1 } t.R16 = append(t.R16, unicode.Range16{lo - 1, hi, 1}) if hi <= unicode.MaxLatin1 { t.LatinOffset++ } } else { t.R16 = append(t.R16, unicode.Range16{lo, hi, 1}) if hi <= unicode.MaxLatin1 { t.LatinOffset++ } } } } return nil }
func (n *Nfa) parseTerm(s *ScannerSource, in0 *NfaState, nest int) (in, out *NfaState) { if in = in0; in == nil { in = n.NewState() } switch arune := s.Current(); arune { default: s.Move() out = in.AddConsuming(NewRuneEdge(n.NewState(), arune)).Target() case '+', '*', '?': panic(fmt.Errorf("unexpected metachar %q", string(arune))) case '\\': switch arune = s.mustParseChar("ApPz"); arune { default: out = in.AddConsuming(NewRuneEdge(n.NewState(), arune)).Target() case 'A': out = in.AddNonConsuming(NewAssertEdge(n.NewState(), TextStart)).Target() case 'z': out = in.AddNonConsuming(NewAssertEdge(n.NewState(), TextEnd)).Target() case 'p', 'P': name, ok := "", false s.expect('{') for !s.Accept('}') { name += string(s.mustGetChar()) } var ranges *unicode.RangeTable if ranges, ok = unicode.Categories[name]; !ok { if ranges, ok = unicode.Scripts[name]; !ok { panic(fmt.Errorf("unknown Unicode category name %q", name)) } } out = in.AddConsuming(NewRangesEdge(n.NewState(), arune == 'P', ranges)).Target() } case 0, '|': return nil, nil case ')': if nest == 0 { panic(fmt.Errorf(`unexpected ")"`)) } return nil, nil case '(': s.Move() in, out = n.parseExpr(s, in, n.NewState(), nest+1) s.expect(')') case '.': // All but '\U+0000', '\n' s.Move() out = in.AddConsuming(NewRangesEdge(n.NewState(), true, &unicode.RangeTable{R16: []unicode.Range16{{'\n', '\n', 1}}})).Target() case '^': s.Move() out = in.AddNonConsuming(NewAssertEdge(n.NewState(), LineStart)).Target() case '$': s.Move() out = in.AddNonConsuming(NewAssertEdge(n.NewState(), LineEnd)).Target() case '[': s.Move() ranges := &unicode.RangeTable{} invert := s.Accept('^') loop: for { a := s.mustParseChar("-") switch s.Current() { case '\\': ranges.R32 = append(ranges.R32, unicode.Range32{uint32(a), uint32(a), 1}) a := s.mustParseChar("-") ranges.R32 = append(ranges.R32, unicode.Range32{uint32(a), uint32(a), 1}) default: if s.Accept('-') { // Allow `[+-]` if s.Current() == ']' { s.Move() ranges.R32 = append(ranges.R32, unicode.Range32{uint32(a), uint32(a), 1}) ranges.R32 = append(ranges.R32, unicode.Range32{'-', '-', 1}) break loop } b := s.mustParseChar("") if b < a { panic(fmt.Errorf(`missing or invalid range bounds ordering in bracket expression "%s-%s"`, string(a), string(b))) } ranges.R32 = append(ranges.R32, unicode.Range32{uint32(a), uint32(b), 1}) } else { ranges.R32 = append(ranges.R32, unicode.Range32{uint32(a), uint32(a), 1}) } } if s.Accept(']') { break } } (*rangeSlice)(&ranges.R32).normalize() out = in.AddConsuming(NewRangesEdge(n.NewState(), invert, ranges)).Target() } // postfix ops switch s.Current() { case '+': s.Move() _, out = n.OneOrMore(in, out) case '*': s.Move() _, out = n.ZeroOrMore(in, out) case '?': s.Move() _, out = n.ZeroOrOne(in, out) } return }
func addR32ToTable(r *unicode.RangeTable, r32 unicode.Range32) { if r.R32 == nil { r.R32 = make([]unicode.Range32, 0, 1) } r.R32 = append(r.R32, r32) }