func parseStr(p string) *syntax.Regexp { r, err := syntax.Parse(p, syntax.Literal) if err != nil { panic(err) } return r }
func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) { re, err := syntax.Parse(expr, mode) if err != nil { return nil, err } maxCap := re.MaxCap() capNames := re.CapNames() re = re.Simplify() prog, err := syntax.Compile(re) if err != nil { return nil, err } regexp := &Regexp{ expr: expr, prog: prog, numSubexp: maxCap, subexpNames: capNames, cond: prog.StartCond(), longest: longest, } regexp.prefix, regexp.prefixComplete = prog.Prefix() if regexp.prefix != "" { // TODO(rsc): Remove this allocation by adding // IndexString to package bytes. regexp.prefixBytes = []byte(regexp.prefix) regexp.prefixRune, _ = utf8.DecodeRuneInString(regexp.prefix) } return regexp, nil }
func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) { re, err := syntax.Parse(expr, mode) if err != nil { return nil, err } return compileSyntax(re, expr, longest) }
// NewGenerator creates a generator that returns random strings that match the regular expression in pattern. // If args is nil, default values are used. func NewGenerator(pattern string, args *GeneratorArgs) (generator Generator, err error) { if nil == args { args = &GeneratorArgs{} } var seed int64 if nil == args.RngSource { seed = rand.Int63() } else { seed = args.RngSource.Int63() } rngSource := xorShift64Source(seed) args.rng = rand.New(&rngSource) // unicode groups only allowed with Perl if (args.Flags&syntax.UnicodeGroups) == syntax.UnicodeGroups && (args.Flags&syntax.Perl) != syntax.Perl { return nil, generatorError(nil, "UnicodeGroups not supported") } var regexp *syntax.Regexp regexp, err = syntax.Parse(pattern, args.Flags) if err != nil { return } var gen *internalGenerator gen, err = newGenerator(regexp, args) if err != nil { return } return gen, nil }
func (b *builder) terminal(f reflect.StructField, fullName string) (*Field, *syntax.Regexp, error) { pattern := string(f.Tag) if pattern == "" { return nil, nil, nil } // TODO: check for sub-captures within expr and remove them expr, err := syntax.Parse(pattern, b.opts.SyntaxFlags) if err != nil { return nil, nil, fmt.Errorf(`%s: %v (pattern was "%s")`, fullName, err, f.Tag) } captureIndex := -1 if isExported(f) { captureIndex = b.nextCaptureIndex() expr = &syntax.Regexp{ Op: syntax.OpCapture, Sub: []*syntax.Regexp{expr}, Name: f.Name, Cap: captureIndex, } } field := &Field{ index: f.Index, capture: captureIndex, } return field, expr, nil }
// NewGenerator creates a generator that returns random strings that match the regular expression in pattern. // If args is nil, default values are used. func NewGenerator(pattern string, inputArgs *GeneratorArgs) (generator Generator, err error) { args := GeneratorArgs{} // Copy inputArgs so the caller can't change them. if inputArgs != nil { args = *inputArgs } if err = args.initialize(); err != nil { return nil, err } var regexp *syntax.Regexp regexp, err = syntax.Parse(pattern, args.Flags) if err != nil { return } var gen *internalGenerator gen, err = newGenerator(regexp, &args) if err != nil { return } return gen, nil }
func parsePat(p string) *syntax.Regexp { r, err := syntax.Parse(p, syntax.ClassNL|syntax.DotNL|syntax.MatchNL|syntax.PerlX|syntax.UnicodeGroups) if err != nil { panic(err) } return r }
// regexStrings returns a set of strings such that any string that matches re must // contain at least one of the strings in the set. If no such set can be found, // regexStrings returns an empty set. func regexStrings(re string) (stringSet, error) { parsed, err := syntax.Parse(re, syntax.Perl) if err != nil { return nil, err } info := analyze(parsed) return info.bestSet(), nil }
func New(pattern string) (*Node, error) { r, err := syntax.Parse(pattern, syntax.Perl) if err != nil { return nil, err } return NewFromRegexp(r.Simplify()), nil }
/* I'm sorry, dear reader. I really am. The problem here is to take an arbitrary regular expression and: 1. return a regular expression that is just like it, but left-anchored, preferring to return the original if possible. 2. determine a string literal prefix that all matches of this regular expression have, much like regexp.Regexp.Prefix(). Unfortunately, Prefix() does not work in the presence of anchors, so we need to write it ourselves. What this actually means is that we need to sketch on the internals of the standard regexp library to forcefully extract the information we want. Unfortunately, regexp.Regexp hides a lot of its state, so our abstraction is going to be pretty leaky. The biggest leak is that we blindly assume that all regular expressions are perl-style, not POSIX. This is probably Mostly True, and I think most users of the library probably won't be able to notice. */ func sketchOnRegex(re *regexp.Regexp) (*regexp.Regexp, string) { rawRe := re.String() sRe, err := syntax.Parse(rawRe, syntax.Perl) if err != nil { log.Printf("WARN(web): unable to parse regexp %v as perl. "+ "This route might behave unexpectedly.", re) return re, "" } sRe = sRe.Simplify() p, err := syntax.Compile(sRe) if err != nil { log.Printf("WARN(web): unable to compile regexp %v. This "+ "route might behave unexpectedly.", re) return re, "" } if p.StartCond()&syntax.EmptyBeginText == 0 { // I hope doing this is always legal... newRe, err := regexp.Compile(`\A` + rawRe) if err != nil { log.Printf("WARN(web): unable to create a left-"+ "anchored regexp from %v. This route might "+ "behave unexpectedly", re) return re, "" } re = newRe } // Run the regular expression more or less by hand :( pc := uint32(p.Start) atStart := true i := &p.Inst[pc] var buf bytes.Buffer Sadness: for { switch i.Op { case syntax.InstEmptyWidth: if !atStart { break Sadness } case syntax.InstCapture, syntax.InstNop: // nop! case syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL: atStart = false if len(i.Rune) != 1 || syntax.Flags(i.Arg)&syntax.FoldCase != 0 { break Sadness } buf.WriteRune(i.Rune[0]) default: break Sadness } pc = i.Out i = &p.Inst[pc] } return re, buf.String() }
func (b *builder) terminal(f reflect.StructField, fullName string) (*Field, *syntax.Regexp, error) { pattern, err := b.extractTag(f.Tag) if err != nil { return nil, nil, fmt.Errorf("%s: %v", fullName, err) } if pattern == "" { return nil, nil, nil } // Parse the pattern expr, err := syntax.Parse(pattern, b.opts.SyntaxFlags) if err != nil { return nil, nil, fmt.Errorf(`%s: %v (pattern was "%s")`, fullName, err, f.Tag) } // Remove capture nodes within the AST expr, err = transform(expr, removeCaptures) if err != nil { return nil, nil, fmt.Errorf(`failed to remove captures from "%s": %v`, pattern, err) } // Determine the kind t := f.Type if t.Kind() == reflect.Ptr { t = t.Elem() } var role Role switch t { case emptyType: role = EmptyRole case stringType: role = StringScalarRole case byteSliceType: role = ByteSliceScalarRole case submatchType: role = SubmatchScalarRole } captureIndex := -1 if isExported(f) { captureIndex = b.nextCaptureIndex() expr = &syntax.Regexp{ Op: syntax.OpCapture, Sub: []*syntax.Regexp{expr}, Name: f.Name, Cap: captureIndex, } } field := &Field{ index: f.Index, capture: captureIndex, role: role, } return field, expr, nil }
// isLiteralRegexp checks regexp is a simple literal or not. func isLiteralRegexp(expr string, flags syntax.Flags) bool { re, err := syntax.Parse(expr, flags) if err != nil { return false } if re.Op == syntax.OpLiteral && re.Flags&syntax.FoldCase == 0 { return true } return false }
// RegexMatch generates matches for a given regular expression // regexStr is supposed to conform to the perl regular expression syntax func RegexMatch(regexStr string) gopter.Gen { regexSyntax, err1 := syntax.Parse(regexStr, syntax.Perl) regex, err2 := regexp.Compile(regexStr) if err1 != nil || err2 != nil { return Fail(reflect.TypeOf("")) } return regexMatchGen(regexSyntax.Simplify()).SuchThat(func(v interface{}) bool { return regex.MatchString(v.(string)) }).WithShrinker(StringShrinker) }
// ParseOrPanic parses a regular expression into an AST. // Panics on error. func parseOrPanic(simplify bool, pattern string) *syntax.Regexp { regexp, err := syntax.Parse(pattern, 0) if err != nil { panic(err) } if simplify { regexp = regexp.Simplify() } return regexp }
func newRegexpCache(r *regexp.Regexp) *regexpCache { s := r.String() re, _ := syntax.Parse(s, syntax.Perl) return ®expCache{ re: re, min: minCap(re), max: re.MaxCap(), cache: make(map[string]*regexp.Regexp), } }
func literalRegexp(r *regexp.Regexp) string { re, _ := syntax.Parse(r.String(), syntax.Perl) if re.MaxCap() == 0 && re.Op == syntax.OpConcat && len(re.Sub) == 3 && re.Sub[0].Op == syntax.OpBeginText && re.Sub[1].Op == syntax.OpLiteral && re.Sub[2].Op == syntax.OpEndText { return string(re.Sub[1].Rune) } return "" }
func TestQuery(t *testing.T) { for _, tt := range queryTests { re, err := syntax.Parse(tt.re, syntax.Perl) if err != nil { t.Fatal(err) } q := RegexpQuery(re).String() if q != tt.q { t.Errorf("RegexpQuery(%#q) = %#q, want %#q", tt.re, q, tt.q) } } }
// Check that one-pass cutoff does trigger. func TestOnePassCutoff(t *testing.T) { re, err := syntax.Parse(`^x{1,1000}y{1,1000}$`, syntax.Perl) if err != nil { t.Fatalf("parse: %v", err) } p, err := syntax.Compile(re.Simplify()) if err != nil { t.Fatalf("compile: %v", err) } if compileOnePass(p) != notOnePass { t.Fatalf("makeOnePass succeeded; wanted notOnePass") } }
func NewInverseRegex(s string) (*Xeger, error) { _, err := regexp.Compile(s) if err != nil { return nil, err } re, err := syntax.Parse(s, syntax.POSIX) if err != nil { return nil, err } simp := re.Simplify() return &Xeger{re: simp, logger: nopLogger{}}, nil }
func testRegex() { for k, v := range lexer_rules { fmt.Println("\nparsing ", k, " = ", v) re, err := syntax.Parse(lexer_rules["word"], syntax.Simple|syntax.UnicodeGroups|syntax.PerlX) if !tlog.Ok(err) { return } // fmt.Println(re) prog, err := syntax.Compile(re) if !tlog.Ok(err) { return } fmt.Println(prog) } }
func NewEventFilter(rawPatterns []string) EventFilter { filters := defaultRegexes globs := defaultGlobs for _, pat := range rawPatterns { if len(pat) <= 0 { continue } regex, err := syn.Parse(pat, syn.POSIX) if err != nil { globs = append(globs, pat) } else { filters = append(filters, re.MustCompile(regex.String())) } } filters = append(filters, re.MustCompile(ConfigurationFilename)) return EventFilter{filters: filters, globs: globs} }
// CompileRegexp compiles a regular expression pattern and creates a template // to revert it. func CompileRegexp(pattern string) (*Regexp, error) { compiled, err := regexp.Compile(pattern) if err != nil { return nil, err } re, err := syntax.Parse(pattern, syntax.Perl) if err != nil { return nil, err } tpl := &template{buffer: new(bytes.Buffer)} tpl.write(re) return &Regexp{ compiled: compiled, template: tpl.buffer.String(), groups: tpl.groups, indices: tpl.indices, }, nil }
func init() { for _, v := range CdnInfos { re, _ := syntax.Parse(v.Pattern.String(), syntax.Perl) var buf bytes.Buffer walk(re, func(r *syntax.Regexp) bool { if r.Op == syntax.OpLiteral { buf.WriteString(string(r.Rune)) return false } if r.Op == syntax.OpConcat { return false } return true }) orig := buf.String() key := strings.Trim(orig, " -") Register(key, SingleParser(cdnScriptParser(key, orig))) } }
// Compile parses a regular expression and returns, if successful, // a Regexp object that can be used to match against lines of text. func Compile(expr string) (*Regexp, error) { re, err := syntax.Parse(expr, syntax.Perl) if err != nil { return nil, err } sre := re.Simplify() prog, err := syntax.Compile(sre) if err != nil { return nil, err } if err := toByteProg(prog); err != nil { return nil, err } r := &Regexp{ Syntax: re, expr: expr, } if err := r.m.init(prog); err != nil { return nil, err } return r, nil }
func compileLiteral(node lexer.ASTNode, scope Scope) (output string, parentScope Scope, err error) { if !node.IsCommandType(lexer.CmdLiteral) { panic("Expecting literal command") } // Directly copy the regexp literal output = node.Command().Value // Scan for any capture groups in the literal and increment our reference literalRegexp, err := syntax.Parse(output, syntax.Perl) if err != nil { return output, scope, err } numCaptureGroups := len(literalRegexp.CapNames()) // Don't count the full literal regexp as a capture if numCaptureGroups > 1 { scope.CurrentRegexp.IncrementCaptureGroup(numCaptureGroups - 1) } return output, scope, err }
// regexpQuery parses an atom into either a regular expression, or a // simple substring atom. func regexpQuery(text string, file bool) (Q, error) { var expr Q r, err := syntax.Parse(text, syntax.Perl) if err != nil { return nil, err } if r.Op == syntax.OpLiteral { expr = &Substring{ Pattern: string(r.Rune), FileName: file, } } else { expr = &Regexp{ Regexp: r, FileName: file, } } return expr, nil }
func TestCompileOnePass(t *testing.T) { var ( p *syntax.Prog re *syntax.Regexp err error ) for _, test := range onePassTests { if re, err = syntax.Parse(test.re, syntax.Perl); err != nil { t.Errorf("Parse(%q) got err:%s, want success", test.re, err) continue } // needs to be done before compile... re = re.Simplify() if p, err = syntax.Compile(re); err != nil { t.Errorf("Compile(%q) got err:%s, want success", test.re, err) continue } onePass = compileOnePass(p) if (onePass == notOnePass) != (test.onePass == notOnePass) { t.Errorf("CompileOnePass(%q) got %v, expected %v", test.re, onePass, test.onePass) } } }
func compile(exprs ...string) (*matcher, error) { var progs []*syntax.Prog for _, expr := range exprs { re, err := syntax.Parse(expr, syntax.Perl) if err != nil { return nil, err } sre := re.Simplify() prog, err := syntax.Compile(sre) if err != nil { return nil, err } if err := toByteProg(prog); err != nil { return nil, err } progs = append(progs, prog) } m := &matcher{} if err := m.init(joinProgs(progs), len(progs)); err != nil { return nil, err } return m, nil }
func NewRe(pattern, groupDelimiter, groupRepeatDelimiter string, ignoreCase, dotAll bool) (*Re, error) { flags := syntax.Perl if ignoreCase { flags |= syntax.FoldCase } if dotAll { flags |= syntax.DotNL } meta, err := syntax.Parse(pattern, flags) if err != nil { return nil, err } return &Re{ re: regexp.MustCompile(meta.String()), dotAll: dotAll, groupDelimiter: groupDelimiter, groupRepeatDelimiter: groupRepeatDelimiter, groupCount: meta.MaxCap(), groupNames: meta.CapNames()[1:], }, nil }
func TestRegexpParse(t *testing.T) { type testcase struct { in string want Q } cases := []testcase{ {"(foo|)bar", &Substring{Pattern: "bar"}}, {"(foo|)", &Const{true}}, {"(foo|bar)baz.*bla", &And{[]Q{ &Or{[]Q{ &Substring{Pattern: "foo"}, &Substring{Pattern: "bar"}, }}, &Substring{Pattern: "baz"}, &Substring{Pattern: "bla"}, }}}, {"^[a-z](People)+barrabas$", &And{[]Q{ &Substring{Pattern: "People"}, &Substring{Pattern: "barrabas"}, }}}, } for _, c := range cases { r, err := syntax.Parse(c.in, syntax.Perl) if err != nil { t.Errorf("Parse(%q): %v", c.in, err) continue } got := RegexpToQuery(r, 3) if !reflect.DeepEqual(c.want, got) { t.Errorf("regexpToQuery(%q): got %v, want %v", c.in, got, c.want) } } }