// Returns a generator that will run the generator for r's sub-expression [min, max] times. func createRepeatingGenerator(regexp *syntax.Regexp, genArgs *GeneratorArgs, min, max int) (*internalGenerator, error) { if err := enforceSingleSub(regexp); err != nil { return nil, err } generator, err := newGenerator(regexp.Sub[0], genArgs) if err != nil { return nil, generatorError(err, "failed to create generator for subexpression: /%s/", regexp) } if min == noBound { min = int(genArgs.MinUnboundedRepeatCount) } if max == noBound { max = int(genArgs.MaxUnboundedRepeatCount) } return &internalGenerator{regexp.String(), func() string { n := min + genArgs.rng.Intn(max-min+1) var result bytes.Buffer for i := 0; i < n; i++ { result.WriteString(generator.Generate()) } return result.String() }}, nil }
// Create a new generator for r. func newGenerator(regexp *syntax.Regexp, args *GeneratorArgs) (generator *internalGenerator, err error) { simplified := regexp.Simplify() factory, ok := generatorFactories[simplified.Op] if ok { return factory(simplified, args) } return nil, fmt.Errorf("invalid generator pattern: /%s/ as /%s/\n%s", regexp, simplified, inspectRegexpToString(simplified)) }
func opConcat(regexp *syntax.Regexp, genArgs *GeneratorArgs) (*internalGenerator, error) { enforceOp(regexp, syntax.OpConcat) generators, err := newGenerators(regexp.Sub, genArgs) if err != nil { return nil, generatorError(err, "error creating generators for concat pattern /%s/", regexp) } return &internalGenerator{regexp.String(), func() string { return genArgs.Executor.Execute(generators) }}, nil }
func singleLiteralToCharClass(rx *syntax.Regexp) { if rx.Op == syntax.OpLiteral && len(rx.Rune) == 1 { char := rx.Rune[0] if rx.Flags&syntax.FoldCase != 0 && unicode.ToLower(char) != unicode.ToUpper(char) { l, h := unicode.ToLower(char), unicode.ToUpper(char) rx.Rune = []rune{h, h, l, l} rx.Rune0 = [...]rune{h, h} } else { rx.Rune = []rune{char, char} rx.Rune0 = [...]rune{char, char} } rx.Op = syntax.OpCharClass } }
func opConcat(regexp *syntax.Regexp, genArgs *GeneratorArgs) (*internalGenerator, error) { enforceOp(regexp, syntax.OpConcat) generators, err := newGenerators(regexp.Sub, genArgs) if err != nil { return nil, generatorError(err, "error creating generators for concat pattern /%s/", regexp) } return &internalGenerator{regexp.String(), func() string { var result bytes.Buffer for _, generator := range generators { result.WriteString(generator.Generate()) } return result.String() }}, nil }
func opAlternate(regexp *syntax.Regexp, genArgs *GeneratorArgs) (*internalGenerator, error) { enforceOp(regexp, syntax.OpAlternate) generators, err := newGenerators(regexp.Sub, genArgs) if err != nil { return nil, generatorError(err, "error creating generators for alternate pattern /%s/", regexp) } numGens := len(generators) return &internalGenerator{regexp.String(), func() string { i := genArgs.rng.Intn(numGens) generator := generators[i] return generator.Generate() }}, nil }
// Returns a generator that will run the generator for r's sub-expression [min, max] times. func createRepeatingGenerator(regexp *syntax.Regexp, genArgs *GeneratorArgs, min int, max int) (*internalGenerator, error) { if err := enforceSingleSub(regexp); err != nil { return nil, err } generator, err := newGenerator(regexp.Sub[0], genArgs) if err != nil { return nil, generatorError(err, "failed to create generator for subexpression: /%s/", regexp) } if max < 0 { max = maxUpperBound } return &internalGenerator{regexp.String(), func() string { n := min + genArgs.rng.Intn(max-min+1) return executeGeneratorRepeatedly(genArgs.Executor, generator, n) }}, nil }
func opCapture(regexp *syntax.Regexp, args *GeneratorArgs) (*internalGenerator, error) { enforceOp(regexp, syntax.OpCapture) if err := enforceSingleSub(regexp); err != nil { return nil, err } groupRegexp := regexp.Sub[0] generator, err := newGenerator(groupRegexp, args) if err != nil { return nil, err } // Group indices are 0-based, but index 0 is the whole expression. index := regexp.Cap - 1 return &internalGenerator{regexp.String(), func() string { return args.CaptureGroupHandler(index, regexp.Name, groupRegexp, generator, args) }}, nil }
func compileSyntax(re *syntax.Regexp, expr string, longest bool) (*Regexp, error) { maxCap := re.MaxCap() capNames := re.CapNames() re = re.Simplify() prog, err := syntax.Compile(re) if err != nil { return nil, err } regexp := &Regexp{ expr: expr, prog: prog, onepass: compileOnePass(prog), numSubexp: maxCap, subexpNames: capNames, cond: prog.StartCond(), longest: longest, } if regexp.onepass == notOnePass { regexp.prefix, regexp.prefixComplete = prog.Prefix() } else { regexp.prefix, regexp.prefixComplete, regexp.prefixEnd = onePassPrefix(prog) } if regexp.prefix != "" { // TODO(rsc): Remove this allocation by adding // IndexString to package bytes. regexp.prefixBytes = []byte(regexp.prefix) regexp.prefixRune, _ = utf8.DecodeRuneInString(regexp.prefix) } return regexp, nil }
func TestCompileOnePass(t *testing.T) { var ( p *syntax.Prog re *syntax.Regexp err error ) for _, test := range onePassTests { if re, err = syntax.Parse(test.re, syntax.Perl); err != nil { t.Errorf("Parse(%q) got err:%s, want success", test.re, err) continue } // needs to be done before compile... re = re.Simplify() if p, err = syntax.Compile(re); err != nil { t.Errorf("Compile(%q) got err:%s, want success", test.re, err) continue } onePass = compileOnePass(p) if (onePass == notOnePass) != (test.onePass == notOnePass) { t.Errorf("CompileOnePass(%q) got %v, expected %v", test.re, onePass, test.onePass) } } }
// transform replaces each node in a regex AST with the return value of the given function // it processes the children of a node before the node itself func transform(expr *syntax.Regexp, f transformer) (*syntax.Regexp, error) { var newchildren []*syntax.Regexp for _, child := range expr.Sub { newchild, err := transform(child, f) if err != nil { return nil, err } replacements, err := f(newchild) if err != nil { return nil, err } newchildren = append(newchildren, replacements...) } expr.Sub = newchildren return expr, nil }
// CompileSyntax is like Compile but takes a syntax tree as input. func CompileSyntax(ast *syntax.Regexp) (*Regexp, error) { return compileSyntax(ast, ast.String(), true) }
// Generator that does nothing. func noop(regexp *syntax.Regexp, args *GeneratorArgs) (*internalGenerator, error) { return &internalGenerator{regexp.String(), func() string { return "" }}, nil }
func opEmptyMatch(regexp *syntax.Regexp, args *GeneratorArgs) (*internalGenerator, error) { enforceOp(regexp, syntax.OpEmptyMatch) return &internalGenerator{regexp.String(), func() string { return "" }}, nil }
// Handles syntax.ClassNL because the parser uses that flag to generate character // classes that respect it. func opCharClass(regexp *syntax.Regexp, args *GeneratorArgs) (*internalGenerator, error) { enforceOp(regexp, syntax.OpCharClass) charClass := parseCharClass(regexp.Rune) return createCharClassGenerator(regexp.String(), charClass, args) }
func opAnyCharNotNl(regexp *syntax.Regexp, args *GeneratorArgs) (*internalGenerator, error) { enforceOp(regexp, syntax.OpAnyCharNotNL) charClass := newCharClass(1, rune(math.MaxInt32)) return createCharClassGenerator(regexp.String(), charClass, args) }
func opAnyChar(regexp *syntax.Regexp, args *GeneratorArgs) (*internalGenerator, error) { enforceOp(regexp, syntax.OpAnyChar) return &internalGenerator{regexp.String(), func() string { return runesToString(rune(args.rng.Int31())) }}, nil }
func opLiteral(regexp *syntax.Regexp, args *GeneratorArgs) (*internalGenerator, error) { enforceOp(regexp, syntax.OpLiteral) return &internalGenerator{regexp.String(), func() string { return runesToString(regexp.Rune...) }}, nil }