Beispiel #1
2
// New creates a RangeTable from the given runes, which may contain duplicates.
func New(r ...rune) *unicode.RangeTable {
	if len(r) == 0 {
		return &unicode.RangeTable{}
	}

	sort.Sort(byRune(r))

	// Remove duplicates.
	k := 1
	for i := 1; i < len(r); i++ {
		if r[k-1] != r[i] {
			r[k] = r[i]
			k++
		}
	}

	var rt unicode.RangeTable
	for _, r := range r[:k] {
		if r <= 0xFFFF {
			rt.R16 = append(rt.R16, unicode.Range16{Lo: uint16(r), Hi: uint16(r), Stride: 1})
		} else {
			rt.R32 = append(rt.R32, unicode.Range32{Lo: uint32(r), Hi: uint32(r), Stride: 1})
		}
	}

	// Optimize RangeTable.
	return Merge(&rt)
}
Beispiel #2
2
func appendRange32(t *unicode.RangeTable, lo, hi uint32) error {
	if t.R32 == nil || len(t.R32) == 0 {
		t.R32 = append(t.R32, unicode.Range32{lo, hi, 1})
		return nil
	}

	range_ := &t.R32[len(t.R32)-1]
	if lo <= range_.Hi {
		return fmt.Errorf("AppendRange: lo must be greater than Hi of the last range: lo=%d", lo)
	}

	if lo == hi {
		if range_.Hi+range_.Stride == lo {
			range_.Hi = lo
			if len(t.R32) >= 2 {
				prevR := &t.R32[len(t.R32)-2]
				if prevR.Stride > 1 && prevR.Hi+1 == range_.Lo {
					prevCount := (prevR.Hi-prevR.Lo)/prevR.Stride + 1
					count := (range_.Hi-range_.Lo)/range_.Stride + 1
					if prevCount <= count {
						range_.Lo--
						prevR.Hi -= prevR.Stride
						if prevR.Lo == prevR.Hi {
							prevR.Stride = 1
						}
					}
				}
			}
		} else if range_.Lo == range_.Hi {
			range_.Hi = lo
			range_.Stride = lo - range_.Lo
		} else {
			t.R32 = append(t.R32, unicode.Range32{lo, hi, 1})
		}
	} else {
		if range_.Stride == 1 {
			if lo-1 <= range_.Hi {
				range_.Hi = hi
			} else {
				t.R32 = append(t.R32, unicode.Range32{lo, hi, 1})
			}
		} else {
			if lo-1 <= range_.Hi {
				range_.Hi -= range_.Stride
				if range_.Lo == range_.Hi {
					range_.Stride = 1
				}
				t.R32 = append(t.R32, unicode.Range32{lo - 1, hi, 1})
			} else {
				t.R32 = append(t.R32, unicode.Range32{lo, hi, 1})
			}
		}
	}
	return nil
}
Beispiel #3
1
func addR16ToTable(r *unicode.RangeTable, r16 unicode.Range16) {
	if r.R16 == nil {
		r.R16 = make([]unicode.Range16, 0, 1)
	}
	r.R16 = append(r.R16, r16)
	if r16.Hi <= unicode.MaxLatin1 {
		r.LatinOffset++
	}
}
func CharClass(negated bool, table *unicode.RangeTable) Matcher {
	var t unicode.RangeTable
	t.R16 = make([]unicode.Range16, len(table.R16))
	copy(t.R16, table.R16)
	t.R32 = make([]unicode.Range32, len(table.R32))
	copy(t.R32, table.R32)
	t.LatinOffset = table.LatinOffset
	return charClassMatcher{negated, t}
}
func OneOf(chars ...rune) Matcher {
	if len(chars) == 0 {
		return Fail()
	}
	var t unicode.RangeTable
	sort.Sort(utf8betical(chars))
	for _, ch := range chars {
		if ch >= 0x10000 {
			t.R32 = append(t.R32, unicode.Range32{Lo: uint32(ch), Hi: uint32(ch), Stride: 1})
		} else {
			t.R16 = append(t.R16, unicode.Range16{Lo: uint16(ch), Hi: uint16(ch), Stride: 1})
			if ch < 0x100 {
				t.LatinOffset += 1
			}
		}
	}
	return charClassMatcher{false, t}
}
func Range(lo, hi rune) Matcher {
	if lo > hi {
		return Fail()
	}
	var t unicode.RangeTable
	if lo >= 0x10000 {
		t.R32 = append(t.R32, unicode.Range32{Lo: uint32(lo), Hi: uint32(hi), Stride: 1})
	} else if hi < 0x10000 {
		t.R16 = append(t.R16, unicode.Range16{Lo: uint16(lo), Hi: uint16(hi), Stride: 1})
		if hi < 0x100 {
			t.LatinOffset = 1
		}
	} else {
		t.R16 = append(t.R16, unicode.Range16{Lo: uint16(lo), Hi: 0xffff, Stride: 1})
		t.R32 = append(t.R32, unicode.Range32{Lo: 0x10000, Hi: uint32(hi), Stride: 1})
	}
	return charClassMatcher{false, t}
}
Beispiel #7
0
func appendRange16(t *unicode.RangeTable, lo, hi uint16) error {
	if t.R16 == nil || len(t.R16) == 0 {
		t.R16 = append(t.R16, unicode.Range16{lo, hi, 1})
		if hi <= unicode.MaxLatin1 {
			t.LatinOffset++
		}
		return nil
	}

	range_ := &t.R16[len(t.R16)-1]
	if lo <= range_.Hi {
		return fmt.Errorf("AppendRange: lo must be greater than Hi of the last range: lo=%d", lo)
	}

	if lo == hi {
		if range_.Hi+range_.Stride == lo {
			range_.Hi = lo
			if len(t.R16) >= 2 {
				prevR := &t.R16[len(t.R16)-2]
				if prevR.Stride > 1 && prevR.Hi+1 == range_.Lo {
					prevCount := (prevR.Hi-prevR.Lo)/prevR.Stride + 1
					count := (range_.Hi-range_.Lo)/range_.Stride + 1
					if prevCount <= count {
						range_.Lo--
						prevR.Hi -= prevR.Stride
						if prevR.Lo == prevR.Hi {
							prevR.Stride = 1
						}
					}
				}
			}
		} else if range_.Lo == range_.Hi {
			range_.Hi = lo
			range_.Stride = lo - range_.Lo
		} else {
			t.R16 = append(t.R16, unicode.Range16{lo, hi, 1})
			if hi <= unicode.MaxLatin1 {
				t.LatinOffset++
			}
		}
	} else {
		if range_.Stride == 1 {
			if lo-1 <= range_.Hi {
				range_.Hi = hi
			} else {
				if range_.Lo == range_.Hi {
					range_.Hi = hi
					range_.Stride = range_.Hi - range_.Lo
				} else {
					t.R16 = append(t.R16, unicode.Range16{lo, hi, 1})
					if hi <= unicode.MaxLatin1 {
						t.LatinOffset++
					}
				}
			}
		} else {
			if lo-1 <= range_.Hi {
				range_.Hi -= range_.Stride
				if range_.Lo == range_.Hi {
					range_.Stride = 1
				}
				t.R16 = append(t.R16, unicode.Range16{lo - 1, hi, 1})
				if hi <= unicode.MaxLatin1 {
					t.LatinOffset++
				}
			} else {
				t.R16 = append(t.R16, unicode.Range16{lo, hi, 1})
				if hi <= unicode.MaxLatin1 {
					t.LatinOffset++
				}
			}
		}
	}
	return nil
}
Beispiel #8
0
func (n *Nfa) parseTerm(s *ScannerSource, in0 *NfaState, nest int) (in, out *NfaState) {
	if in = in0; in == nil {
		in = n.NewState()
	}
	switch arune := s.Current(); arune {
	default:
		s.Move()
		out = in.AddConsuming(NewRuneEdge(n.NewState(), arune)).Target()
	case '+', '*', '?':
		panic(fmt.Errorf("unexpected metachar %q", string(arune)))
	case '\\':
		switch arune = s.mustParseChar("ApPz"); arune {
		default:
			out = in.AddConsuming(NewRuneEdge(n.NewState(), arune)).Target()
		case 'A':
			out = in.AddNonConsuming(NewAssertEdge(n.NewState(), TextStart)).Target()
		case 'z':
			out = in.AddNonConsuming(NewAssertEdge(n.NewState(), TextEnd)).Target()
		case 'p', 'P':
			name, ok := "", false
			s.expect('{')
			for !s.Accept('}') {
				name += string(s.mustGetChar())
			}
			var ranges *unicode.RangeTable
			if ranges, ok = unicode.Categories[name]; !ok {
				if ranges, ok = unicode.Scripts[name]; !ok {
					panic(fmt.Errorf("unknown Unicode category name %q", name))
				}
			}
			out = in.AddConsuming(NewRangesEdge(n.NewState(), arune == 'P', ranges)).Target()
		}
	case 0, '|':
		return nil, nil
	case ')':
		if nest == 0 {
			panic(fmt.Errorf(`unexpected ")"`))
		}
		return nil, nil
	case '(':
		s.Move()
		in, out = n.parseExpr(s, in, n.NewState(), nest+1)
		s.expect(')')
	case '.': // All but '\U+0000', '\n'
		s.Move()
		out = in.AddConsuming(NewRangesEdge(n.NewState(), true, &unicode.RangeTable{R16: []unicode.Range16{{'\n', '\n', 1}}})).Target()
	case '^':
		s.Move()
		out = in.AddNonConsuming(NewAssertEdge(n.NewState(), LineStart)).Target()
	case '$':
		s.Move()
		out = in.AddNonConsuming(NewAssertEdge(n.NewState(), LineEnd)).Target()
	case '[':
		s.Move()
		ranges := &unicode.RangeTable{}
		invert := s.Accept('^')
	loop:
		for {
			a := s.mustParseChar("-")
			switch s.Current() {
			case '\\':
				ranges.R32 = append(ranges.R32, unicode.Range32{uint32(a), uint32(a), 1})
				a := s.mustParseChar("-")
				ranges.R32 = append(ranges.R32, unicode.Range32{uint32(a), uint32(a), 1})
			default:
				if s.Accept('-') {
					// Allow `[+-]`
					if s.Current() == ']' {
						s.Move()
						ranges.R32 = append(ranges.R32, unicode.Range32{uint32(a), uint32(a), 1})
						ranges.R32 = append(ranges.R32, unicode.Range32{'-', '-', 1})
						break loop
					}

					b := s.mustParseChar("")
					if b < a {
						panic(fmt.Errorf(`missing or invalid range bounds ordering in bracket expression "%s-%s"`, string(a), string(b)))
					}
					ranges.R32 = append(ranges.R32, unicode.Range32{uint32(a), uint32(b), 1})
				} else {
					ranges.R32 = append(ranges.R32, unicode.Range32{uint32(a), uint32(a), 1})
				}
			}
			if s.Accept(']') {
				break
			}
		}
		(*rangeSlice)(&ranges.R32).normalize()
		out = in.AddConsuming(NewRangesEdge(n.NewState(), invert, ranges)).Target()
	}

	// postfix ops
	switch s.Current() {
	case '+':
		s.Move()
		_, out = n.OneOrMore(in, out)
	case '*':
		s.Move()
		_, out = n.ZeroOrMore(in, out)
	case '?':
		s.Move()
		_, out = n.ZeroOrOne(in, out)
	}

	return
}
Beispiel #9
0
func addR32ToTable(r *unicode.RangeTable, r32 unicode.Range32) {
	if r.R32 == nil {
		r.R32 = make([]unicode.Range32, 0, 1)
	}
	r.R32 = append(r.R32, r32)
}