Exemple #1
0
func compileSyntax(re *syntax.Regexp, expr string, longest bool) (*Regexp, error) {
	maxCap := re.MaxCap()
	capNames := re.CapNames()

	re = re.Simplify()
	prog, err := syntax.Compile(re)
	if err != nil {
		return nil, err
	}
	regexp := &Regexp{
		expr:        expr,
		prog:        prog,
		onepass:     compileOnePass(prog),
		numSubexp:   maxCap,
		subexpNames: capNames,
		cond:        prog.StartCond(),
		longest:     longest,
	}
	if regexp.onepass == notOnePass {
		regexp.prefix, regexp.prefixComplete = prog.Prefix()
	} else {
		regexp.prefix, regexp.prefixComplete, regexp.prefixEnd = onePassPrefix(prog)
	}
	if regexp.prefix != "" {
		// TODO(rsc): Remove this allocation by adding
		// IndexString to package bytes.
		regexp.prefixBytes = []byte(regexp.prefix)
		regexp.prefixRune, _ = utf8.DecodeRuneInString(regexp.prefix)
	}
	return regexp, nil
}
Exemple #2
0
func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) {
	re, err := syntax.Parse(expr, mode)
	if err != nil {
		return nil, err
	}
	maxCap := re.MaxCap()
	capNames := re.CapNames()

	re = re.Simplify()
	prog, err := syntax.Compile(re)
	if err != nil {
		return nil, err
	}
	regexp := &Regexp{
		expr:        expr,
		prog:        prog,
		numSubexp:   maxCap,
		subexpNames: capNames,
		cond:        prog.StartCond(),
		longest:     longest,
	}
	regexp.prefix, regexp.prefixComplete = prog.Prefix()
	if regexp.prefix != "" {
		// TODO(rsc): Remove this allocation by adding
		// IndexString to package bytes.
		regexp.prefixBytes = []byte(regexp.prefix)
		regexp.prefixRune, _ = utf8.DecodeRuneInString(regexp.prefix)
	}
	return regexp, nil
}
Exemple #3
0
/*
I'm sorry, dear reader. I really am.

The problem here is to take an arbitrary regular expression and:
1. return a regular expression that is just like it, but left-anchored,
   preferring to return the original if possible.
2. determine a string literal prefix that all matches of this regular expression
   have, much like regexp.Regexp.Prefix(). Unfortunately, Prefix() does not work
   in the presence of anchors, so we need to write it ourselves.

What this actually means is that we need to sketch on the internals of the
standard regexp library to forcefully extract the information we want.

Unfortunately, regexp.Regexp hides a lot of its state, so our abstraction is
going to be pretty leaky. The biggest leak is that we blindly assume that all
regular expressions are perl-style, not POSIX. This is probably Mostly True, and
I think most users of the library probably won't be able to notice.
*/
func sketchOnRegex(re *regexp.Regexp) (*regexp.Regexp, string) {
	rawRe := re.String()
	sRe, err := syntax.Parse(rawRe, syntax.Perl)
	if err != nil {
		log.Printf("WARN(web): unable to parse regexp %v as perl. "+
			"This route might behave unexpectedly.", re)
		return re, ""
	}
	sRe = sRe.Simplify()
	p, err := syntax.Compile(sRe)
	if err != nil {
		log.Printf("WARN(web): unable to compile regexp %v. This "+
			"route might behave unexpectedly.", re)
		return re, ""
	}
	if p.StartCond()&syntax.EmptyBeginText == 0 {
		// I hope doing this is always legal...
		newRe, err := regexp.Compile(`\A` + rawRe)
		if err != nil {
			log.Printf("WARN(web): unable to create a left-"+
				"anchored regexp from %v. This route might "+
				"behave unexpectedly", re)
			return re, ""
		}
		re = newRe
	}

	// Run the regular expression more or less by hand :(
	pc := uint32(p.Start)
	atStart := true
	i := &p.Inst[pc]
	var buf bytes.Buffer
Sadness:
	for {
		switch i.Op {
		case syntax.InstEmptyWidth:
			if !atStart {
				break Sadness
			}
		case syntax.InstCapture, syntax.InstNop:
			// nop!
		case syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny,
			syntax.InstRuneAnyNotNL:

			atStart = false
			if len(i.Rune) != 1 ||
				syntax.Flags(i.Arg)&syntax.FoldCase != 0 {
				break Sadness
			}
			buf.WriteRune(i.Rune[0])
		default:
			break Sadness
		}
		pc = i.Out
		i = &p.Inst[pc]
	}
	return re, buf.String()
}
Exemple #4
0
// Check that one-pass cutoff does trigger.
func TestOnePassCutoff(t *testing.T) {
	re, err := syntax.Parse(`^x{1,1000}y{1,1000}$`, syntax.Perl)
	if err != nil {
		t.Fatalf("parse: %v", err)
	}
	p, err := syntax.Compile(re.Simplify())
	if err != nil {
		t.Fatalf("compile: %v", err)
	}
	if compileOnePass(p) != notOnePass {
		t.Fatalf("makeOnePass succeeded; wanted notOnePass")
	}
}
func testRegex() {
	for k, v := range lexer_rules {
		fmt.Println("\nparsing ", k, " = ", v)
		re, err := syntax.Parse(lexer_rules["word"], syntax.Simple|syntax.UnicodeGroups|syntax.PerlX)
		if !tlog.Ok(err) {
			return
		}
		// fmt.Println(re)
		prog, err := syntax.Compile(re)
		if !tlog.Ok(err) {
			return
		}
		fmt.Println(prog)
	}
}
Exemple #6
0
// Compile parses a regular expression and returns, if successful,
// a Regexp object that can be used to match against lines of text.
func Compile(expr string) (*Regexp, error) {
	re, err := syntax.Parse(expr, syntax.Perl)
	if err != nil {
		return nil, err
	}
	sre := re.Simplify()
	prog, err := syntax.Compile(sre)
	if err != nil {
		return nil, err
	}
	if err := toByteProg(prog); err != nil {
		return nil, err
	}
	r := &Regexp{
		Syntax: re,
		expr:   expr,
	}
	if err := r.m.init(prog); err != nil {
		return nil, err
	}
	return r, nil
}
Exemple #7
0
func TestCompileOnePass(t *testing.T) {
	var (
		p   *syntax.Prog
		re  *syntax.Regexp
		err error
	)
	for _, test := range onePassTests {
		if re, err = syntax.Parse(test.re, syntax.Perl); err != nil {
			t.Errorf("Parse(%q) got err:%s, want success", test.re, err)
			continue
		}
		// needs to be done before compile...
		re = re.Simplify()
		if p, err = syntax.Compile(re); err != nil {
			t.Errorf("Compile(%q) got err:%s, want success", test.re, err)
			continue
		}
		onePass = compileOnePass(p)
		if (onePass == notOnePass) != (test.onePass == notOnePass) {
			t.Errorf("CompileOnePass(%q) got %v, expected %v", test.re, onePass, test.onePass)
		}
	}
}
Exemple #8
0
func compile(exprs ...string) (*matcher, error) {
	var progs []*syntax.Prog
	for _, expr := range exprs {
		re, err := syntax.Parse(expr, syntax.Perl)
		if err != nil {
			return nil, err
		}
		sre := re.Simplify()
		prog, err := syntax.Compile(sre)
		if err != nil {
			return nil, err
		}
		if err := toByteProg(prog); err != nil {
			return nil, err
		}
		progs = append(progs, prog)
	}
	m := &matcher{}
	if err := m.init(joinProgs(progs), len(progs)); err != nil {
		return nil, err
	}
	return m, nil
}
Exemple #9
0
func main() {
	var args []string

	argv := os.Args
	argc := len(argv)
	for n := 1; n < argc; n++ {
		if len(argv[n]) > 1 && argv[n][0] == '-' && argv[n][1] != '-' {
			switch argv[n][1] {
			case '8':
				utf8 = true
			case 'F':
				fixed = true
			case 'R':
				recursive = true
			case 'S':
				verbose = true
			case 'i':
				ignorecase = true
			case 'l':
				list = true
			case 'n':
				number = true
			case 'P':
				perl = true
			case 'G':
				basic = true
			case 'v':
				invert = true
			case 'o':
				only = true
			case 'f':
				if n < argc-1 {
					infile = argv[n+1]
					n++
					continue
				}
			case 'V':
				fmt.Fprintf(os.Stdout, "%s\n", version)
				os.Exit(0)
			default:
				usage()
			}
			if len(argv[n]) > 2 {
				argv[n] = "-" + argv[n][2:]
				n--
			}
		} else if len(argv[n]) > 1 && argv[n][0] == '-' && argv[n][1] == '-' {
			if n == argc-1 {
				usage()
			}
			switch argv[n] {
			case "--enc":
				encs = argv[n+1]
			case "--exclude":
				exclude = argv[n+1]
			default:
				usage()
			}
			n++
		} else {
			args = append(args, argv[n])
		}
	}

	if len(args) == 0 {
		usage()
	}

	var err error
	var errs *string
	var pattern interface{}
	if encs != "" {
		encodings = strings.Split(encs, ",")
	} else {
		enc_env := os.Getenv("JVGREP_ENCODINGS")
		if enc_env != "" {
			encodings = strings.Split(enc_env, ",")
		}
	}

	if runtime.GOOS == "windows" {
		// set dll name that is first to try to load by go-iconv.
		os.Setenv("ICONV_DLL", "jvgrep-iconv.dll")
	}

	var oc *iconv.Iconv
	if !utf8 {
		oc, err = iconv.Open("char", "utf-8")
		if err != nil {
			oc, err = iconv.Open("utf-8", "utf-8")
		}
	}
	defer func() {
		if oc != nil {
			oc.Close()
		}
	}()

	instr := ""
	argindex := 0
	if len(infile) > 0 {
		b, err := ioutil.ReadFile(infile)
		if err != nil {
			errorline(err.Error())
			os.Exit(-1)
		}
		instr = strings.TrimSpace(string(b))
	} else {
		instr = args[0]
		argindex = 1
	}
	if fixed {
		pattern = instr
	} else if perl {
		re, err := syntax.Parse(instr, syntax.Perl)
		if err != nil {
			errorline(err.Error())
			os.Exit(-1)
		}
		rec, err := syntax.Compile(re)
		if err != nil {
			errorline(err.Error())
			os.Exit(-1)
		}
		instr = rec.String()
		if ignorecase {
			instr = "(?i:" + instr + ")"
		}
		pattern, err = regexp.Compile(instr)
		if err != nil {
			errorline(err.Error())
			os.Exit(-1)
		}
	} else {
		if ignorecase {
			instr = "(?i:" + instr + ")"
		}
		pattern, err = regexp.Compile(instr)
		if err != nil {
			errorline(err.Error())
			os.Exit(-1)
		}
	}

	var ere *regexp.Regexp
	if exclude != "" {
		ere, err = regexp.Compile(exclude)
		if errs != nil {
			errorline(err.Error())
			os.Exit(-1)
		}
	}
	if len(args) == 1 && argindex != 0 {
		Grep(&GrepArg{pattern, os.Stdin, oc, true})
		return
	}

	envre := regexp.MustCompile(`^(\$[a-zA-Z][a-zA-Z0-9_]+|\$\([a-zA-Z][a-zA-Z0-9_]+\))$`)
	globmask := ""

	ch := make(chan *GrepArg)
	done := make(chan int)
	go GoGrep(ch, done)
	nargs := len(args[argindex:])
	for ai, arg := range args[argindex:] {
		globmask = ""
		root := ""
		arg = strings.Trim(arg, `"`)
		for n, i := range strings.Split(filepath.ToSlash(arg), "/") {
			if root == "" && strings.Index(i, "*") != -1 {
				if globmask == "" {
					root = "."
				} else {
					root = filepath.ToSlash(globmask)
				}
			}
			if n == 0 && i == "~" {
				if runtime.GOOS == "windows" {
					i = os.Getenv("USERPROFILE")
				} else {
					i = os.Getenv("HOME")
				}
			}
			if envre.MatchString(i) {
				i = strings.Trim(strings.Trim(os.Getenv(i[1:]), "()"), `"`)
			}

			globmask = filepath.Join(globmask, i)
			if n == 0 {
				if runtime.GOOS == "windows" && filepath.VolumeName(i) != "" {
					globmask = i + "/"
				} else if len(globmask) == 0 {
					globmask = "/"
				}
			}
		}
		if root == "" {
			path, _ := filepath.Abs(arg)
			if !recursive {
				if verbose {
					println("search:", path)
				}
				println(ai, nargs-1)
				ch <- &GrepArg{pattern, path, oc, ai == nargs-1}
				continue
			} else {
				root = path
				globmask = "**/" + globmask
			}
		}
		if globmask == "" {
			globmask = "."
		}
		globmask = filepath.ToSlash(filepath.Clean(globmask))
		if recursive {
			if strings.Index(globmask, "/") > -1 {
				globmask += "/"
			} else {
				globmask = "**/" + globmask
			}
		}
		if runtime.GOOS == "windows" {
			// keep double backslask windows UNC.
			if len(arg) > 2 && (arg[0:2] == `\\` || arg[0:2] == `//`) {
				root = "/" + root
				globmask = "/" + globmask
			}
		}

		cc := []rune(globmask)
		dirmask := ""
		filemask := ""
		for i := 0; i < len(cc); i++ {
			if cc[i] == '*' {
				if i < len(cc)-2 && cc[i+1] == '*' && cc[i+2] == '/' {
					filemask += "(.*/)?"
					dirmask = filemask
					i += 2
				} else {
					filemask += "[^/]*"
				}
			} else {
				c := cc[i]
				if c == '/' || ('0' <= c && c <= '9') || ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || 255 < c {
					filemask += string(c)
				} else {
					filemask += fmt.Sprintf("[\\x%x]", c)
				}
				if c == '/' && dirmask == "" && strings.Index(filemask, "*") != -1 {
					dirmask = filemask
				}
			}
		}
		if dirmask == "" {
			dirmask = filemask
		}
		if len(filemask) > 0 && filemask[len(filemask)-1] == '/' {
			if root == "" {
				root = filemask
			}
			filemask += "[^/]*"
		}
		if runtime.GOOS == "windows" || runtime.GOOS == "darwin" {
			dirmask = "(?i:" + dirmask + ")"
			filemask = "(?i:" + filemask + ")"
		}
		dre := regexp.MustCompile("^" + dirmask)
		fre := regexp.MustCompile("^" + filemask + "$")

		root = filepath.Clean(root)

		if verbose {
			println("dirmask:", dirmask)
			println("filemask:", filemask)
			println("root:", root)
		}
		filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
			if info == nil {
				return err
			}

			path = filepath.ToSlash(path)

			if ere != nil && ere.MatchString(path) {
				if info.IsDir() {
					return filepath.SkipDir
				}
				return nil
			}

			if info.IsDir() {
				if path == "." || recursive || len(path) <= len(root) || dre.MatchString(path+"/") {
					return nil
				}
				return filepath.SkipDir
			}

			if fre.MatchString(path) {
				if verbose {
					println("search:", path)
				}
				//ch <- &GrepArg{pattern, path, oc, ai == nargs-1}
				ch <- &GrepArg{pattern, path, oc, false}
			}
			return nil
		})
	}
	ch <- nil
	<-done
}
Exemple #10
0
func main() {
	var args []string

	argv := os.Args
	argc := len(argv)
	for n := 1; n < argc; n++ {
		if len(argv[n]) > 1 && argv[n][0] == '-' && argv[n][1] != '-' {
			switch argv[n][1] {
			case 'A':
				if n < argc-1 {
					after, _ = strconv.Atoi(argv[n+1])
					n++
					continue
				}
			case 'B':
				if n < argc-1 {
					before, _ = strconv.Atoi(argv[n+1])
					n++
					continue
				}
			case '8':
				utf8out = true
			case 'F':
				fixed = true
			case 'R':
				recursive = true
			case 'S':
				verbose = true
			case 'c':
				count = true
			case 'r':
				fullpath = false
			case 'i':
				ignorecase = true
			case 'I':
				ignorebinary = true
			case 'l':
				list = true
			case 'n':
				number = true
			case 'P':
				perl = true
			case 'G':
				basic = true
			case 'v':
				invert = true
			case 'o':
				only = true
			case 'f':
				if n < argc-1 {
					infile = argv[n+1]
					n++
					continue
				}
			case 'z':
				zeroData = true
			case 'Z':
				zeroFile = true
			case 'V':
				fmt.Fprintf(os.Stdout, "%s\n", version)
				os.Exit(0)
			default:
				usage(true)
			}
			if len(argv[n]) > 2 {
				argv[n] = "-" + argv[n][2:]
				n--
			}
		} else if len(argv[n]) > 1 && argv[n][0] == '-' && argv[n][1] == '-' {
			name := argv[n][2:]
			switch {
			case strings.HasPrefix(name, "enc="):
				encs = name[4:]
			case name == "enc" && n < argc-1:
				encs = argv[n+1]
				n++
			case strings.HasPrefix(name, "exclude="):
				exclude = name[8:]
			case name == "exclude" && n < argc-1:
				exclude = argv[n+1]
				n++
			case strings.HasPrefix(name, "color="):
				color = name[6:]
			case name == "color" && n < argc-1:
				color = argv[n+1]
				n++
			case strings.HasPrefix(name, "separator="):
				separator = name[10:]
			case name == "separator":
				separator = argv[n+1]
				n++
			case name == "null":
				zeroFile = true
			case name == "null-data":
				zeroData = true
			case name == "help":
				usage(false)
			default:
				usage(true)
			}
		} else {
			args = append(args, argv[n])
		}
	}

	if len(args) == 0 {
		usage(true)
	}

	var err error
	var pattern interface{}
	if encs != "" {
		encodings = strings.Split(encs, ",")
	} else {
		enc_env := os.Getenv("JVGREP_ENCODINGS")
		if enc_env != "" {
			encodings = strings.Split(enc_env, ",")
		}
	}
	out_enc := os.Getenv("JVGREP_OUTPUT_ENCODING")
	if out_enc != "" {
		ee, _ := charset.Lookup(out_enc)
		if ee == nil {
			errorline(fmt.Sprintf("unknown encoding: %s", out_enc))
			os.Exit(1)
		}
		oc = transform.NewWriter(os.Stdout, ee.NewEncoder())
	}

	instr := ""
	argindex := 0
	if len(infile) > 0 {
		b, err := ioutil.ReadFile(infile)
		if err != nil {
			errorline(err.Error())
			os.Exit(1)
		}
		instr = strings.TrimSpace(string(b))
	} else {
		instr = args[0]
		argindex = 1
	}
	if fixed {
		pattern = instr
	} else if perl {
		re, err := syntax.Parse(instr, syntax.Perl)
		if err != nil {
			errorline(err.Error())
			os.Exit(1)
		}
		rec, err := syntax.Compile(re)
		if err != nil {
			errorline(err.Error())
			os.Exit(1)
		}
		instr = rec.String()
		if ignorecase {
			instr = "(?i:" + instr + ")"
		}
		pattern, err = regexp.Compile(instr)
		if err != nil {
			errorline(err.Error())
			os.Exit(1)
		}
	} else {
		if ignorecase {
			instr = "(?i:" + instr + ")"
		}
		pattern, err = regexp.Compile(instr)
		if err != nil {
			errorline(err.Error())
			os.Exit(1)
		}
	}

	if exclude == "" {
		exclude = os.Getenv("JVGREP_EXCLUDE")
	}
	if exclude == "" {
		exclude = excludeDefaults
	}
	ere, err := regexp.Compile(exclude)
	if err != nil {
		errorline(err.Error())
		os.Exit(1)
	}

	atty := false
	if color == "" {
		color = os.Getenv("JVGREP_COLOR")
	}
	if color == "" || color == "auto" {
		atty = isatty.IsTerminal(os.Stdout.Fd())
	} else if color == "always" {
		atty = true
	} else if color == "never" {
		atty = false
	} else {
		usage(true)
	}

	if atty {
		sc := make(chan os.Signal, 10)
		signal.Notify(sc, syscall.SIGTERM, syscall.SIGINT, syscall.SIGHUP)
		go func() {
			for _ = range sc {
				ct.ResetColor()
				os.Exit(0)
			}
		}()
	}

	if len(args) == 1 && argindex != 0 {
		Grep(&GrepArg{
			pattern: pattern,
			input:   os.Stdin,
			single:  true,
			atty:    atty,
		})
		return
	}

	envre := regexp.MustCompile(`^(\$[a-zA-Z][a-zA-Z0-9_]+|\$\([a-zA-Z][a-zA-Z0-9_]+\))$`)
	globmask := ""

	ch := make(chan *GrepArg, 10)
	done := make(chan int)
	go GoGrep(ch, done)
	nargs := len(args[argindex:])
	for _, arg := range args[argindex:] {
		globmask = ""
		root := ""
		arg = strings.Trim(arg, `"`)
		for n, i := range strings.Split(filepath.ToSlash(arg), "/") {
			if root == "" && strings.Index(i, "*") != -1 {
				if globmask == "" {
					root = "."
				} else {
					root = filepath.ToSlash(globmask)
				}
			}
			if n == 0 && i == "~" {
				if runtime.GOOS == "windows" {
					i = os.Getenv("USERPROFILE")
				} else {
					i = os.Getenv("HOME")
				}
			}
			if envre.MatchString(i) {
				i = strings.Trim(strings.Trim(os.Getenv(i[1:]), "()"), `"`)
			}

			globmask = filepath.Join(globmask, i)
			if n == 0 {
				if runtime.GOOS == "windows" && filepath.VolumeName(i) != "" {
					globmask = i + "/"
				} else if len(globmask) == 0 {
					globmask = "/"
				}
			}
		}
		if root == "" {
			path, _ := filepath.Abs(arg)
			fi, err := os.Stat(path)
			if err != nil {
				errorline(fmt.Sprintf("jvgrep: %s: No such file or directory", arg))
				os.Exit(1)
			}
			if !fi.IsDir() {
				if verbose {
					println("search:", path)
				}
				ch <- &GrepArg{
					pattern: pattern,
					input:   path,
					single:  nargs == 1,
					atty:    atty,
				}
				continue
			} else {
				root = path
				if fi.IsDir() {
					globmask = "**/*"
				} else {
					globmask = "**/" + globmask
				}
			}
		}
		if globmask == "" {
			globmask = "."
		}
		globmask = filepath.ToSlash(filepath.Clean(globmask))
		if recursive {
			if strings.Index(globmask, "/") > -1 {
				globmask += "/"
			} else {
				globmask = "**/" + globmask
			}
		}

		cc := []rune(globmask)
		dirmask := ""
		filemask := ""
		for i := 0; i < len(cc); i++ {
			if cc[i] == '*' {
				if i < len(cc)-2 && cc[i+1] == '*' && cc[i+2] == '/' {
					filemask += "(.*/)?"
					dirmask = filemask
					i += 2
				} else {
					filemask += "[^/]*"
				}
			} else {
				c := cc[i]
				if c == '/' || ('0' <= c && c <= '9') || ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || 255 < c {
					filemask += string(c)
				} else {
					filemask += fmt.Sprintf("[\\x%x]", c)
				}
				if c == '/' && dirmask == "" && strings.Index(filemask, "*") != -1 {
					dirmask = filemask
				}
			}
		}
		if dirmask == "" {
			dirmask = filemask
		}
		if len(filemask) > 0 && filemask[len(filemask)-1] == '/' {
			if root == "" {
				root = filemask
			}
			filemask += "[^/]*"
		}
		if runtime.GOOS == "windows" || runtime.GOOS == "darwin" {
			dirmask = "(?i:" + dirmask + ")"
			filemask = "(?i:" + filemask + ")"
		}
		dre := regexp.MustCompile("^" + dirmask)
		fre := regexp.MustCompile("^" + filemask + "$")

		root = filepath.Clean(root)

		if verbose {
			println("dirmask:", dirmask)
			println("filemask:", filemask)
			println("root:", root)
		}
		filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
			if info == nil {
				return err
			}

			path = filepath.ToSlash(path)

			if ere != nil && ere.MatchString(path) {
				if info.IsDir() {
					return filepath.SkipDir
				}
				return nil
			}

			if info.IsDir() {
				if path == "." || recursive || len(path) <= len(root) || dre.MatchString(path+"/") {
					return nil
				}
				return filepath.SkipDir
			}

			if fre.MatchString(path) && info.Mode().IsRegular() {
				if verbose {
					println("search:", path)
				}
				ch <- &GrepArg{
					pattern: pattern,
					input:   path,
					single:  false,
					atty:    atty,
				}
			}
			return nil
		})
	}
	ch <- nil
	if count {
		fmt.Println(countMatch)
	}
	<-done
}
Exemple #11
0
/*
I'm sorry, dear reader. I really am.

The problem here is to take an arbitrary regular expression and:
1. return a regular expression that is just like it, but left-anchored,
   preferring to return the original if possible.
2. determine a string literal prefix that all matches of this regular expression
   have, much like regexp.Regexp.Prefix(). Unfortunately, Prefix() does not work
   in the presence of anchors, so we need to write it ourselves.

What this actually means is that we need to sketch on the internals of the
standard regexp library to forcefully extract the information we want.

Unfortunately, regexp.Regexp hides a lot of its state, so our abstraction is
going to be pretty leaky. The biggest leak is that we blindly assume that all
regular expressions are perl-style, not POSIX. This is probably Mostly True, and
I think most users of the library probably won't be able to notice.
*/
func sketchOnRegex(re *regexp.Regexp) (*regexp.Regexp, string) {
	// Re-parse the regex from the string representation.
	rawRe := re.String()
	sRe, err := syntax.Parse(rawRe, syntax.Perl)
	if err != nil {
		// TODO: better way to warn?
		log.Printf("WARN(router): unable to parse regexp %v as perl. "+
			"This route might behave unexpectedly.", re)
		return re, ""
	}

	// Simplify and then compile the regex.
	sRe = sRe.Simplify()
	p, err := syntax.Compile(sRe)
	if err != nil {
		// TODO: better way to warn?
		log.Printf("WARN(router): unable to compile regexp %v. This "+
			"route might behave unexpectedly.", re)
		return re, ""
	}

	// If it's not left-anchored, we add that now.
	if p.StartCond()&syntax.EmptyBeginText == 0 {
		// I hope doing this is always legal...
		newRe, err := regexp.Compile(`\A` + rawRe)
		if err != nil {
			// TODO: better way to warn?
			log.Printf("WARN(router): unable to create a left-"+
				"anchored regexp from %v. This route might "+
				"behave unexpectedly", re)
			return re, ""
		}
		re = newRe
	}

	// We run the regular expression more or less by hand in order to calculate
	// the prefix.
	pc := uint32(p.Start)
	atStart := true
	i := &p.Inst[pc]
	var buf bytes.Buffer
OuterLoop:
	for {
		switch i.Op {

		// There's may be an 'empty' operation at the beginning of every regex,
		// due to OpBeginText.
		case syntax.InstEmptyWidth:
			if !atStart {
				break OuterLoop
			}

		// Captures and no-ops don't affect the prefix
		case syntax.InstCapture, syntax.InstNop:
			// nop!

		// We handle runes
		case syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny,
			syntax.InstRuneAnyNotNL:

			atStart = false

			// If we don't have exactly one rune, or if the 'fold case' flag is
			// set, then we don't count this as part of the prefix.  Due to
			// unicode case-crazyness, it's too hard to deal with case
			// insensitivity...
			if len(i.Rune) != 1 ||
				syntax.Flags(i.Arg)&syntax.FoldCase != 0 {
				break OuterLoop
			}

			// Add to the prefix, continue.
			buf.WriteRune(i.Rune[0])

		// All other instructions may affect the prefix, so we continue.
		default:
			break OuterLoop
		}

		// Continue to the next instruction
		pc = i.Out
		i = &p.Inst[pc]
	}

	return re, buf.String()
}