// OnePassPrefix returns a literal string that all matches for the // regexp must start with. Complete is true if the prefix // is the entire match. Pc is the index of the last rune instruction // in the string. The OnePassPrefix skips over the mandatory // EmptyBeginText func onePassPrefix(p *syntax.Prog) (prefix string, complete bool, pc uint32) { i := &p.Inst[p.Start] if i.Op != syntax.InstEmptyWidth || (syntax.EmptyOp(i.Arg))&syntax.EmptyBeginText == 0 { return "", i.Op == syntax.InstMatch, uint32(p.Start) } pc = i.Out i = &p.Inst[pc] for i.Op == syntax.InstNop { pc = i.Out i = &p.Inst[pc] } // Avoid allocation of buffer if prefix is empty. if iop(i) != syntax.InstRune || len(i.Rune) != 1 { return "", i.Op == syntax.InstMatch, uint32(p.Start) } // Have prefix; gather characters. var buf bytes.Buffer for iop(i) == syntax.InstRune && len(i.Rune) == 1 && syntax.Flags(i.Arg)&syntax.FoldCase == 0 { buf.WriteRune(i.Rune[0]) pc, i = i.Out, &p.Inst[i.Out] } if i.Op == syntax.InstEmptyWidth && syntax.EmptyOp(i.Arg)&syntax.EmptyEndText != 0 && p.Inst[i.Out].Op == syntax.InstMatch { complete = true } return buf.String(), complete, pc }
// makeOnePass creates a onepass Prog, if possible. It is possible if at any alt, // the match engine can always tell which branch to take. The routine may modify // p if it is turned into a onepass Prog. If it isn't possible for this to be a // onepass Prog, the Prog notOnePass is returned. makeOnePass is recursive // to the size of the Prog. func makeOnePass(p *onePassProg) *onePassProg { // If the machine is very long, it's not worth the time to check if we can use one pass. if len(p.Inst) >= 1000 { return notOnePass } var ( instQueue = newQueue(len(p.Inst)) visitQueue = newQueue(len(p.Inst)) check func(uint32, map[uint32]bool) bool onePassRunes = make([][]rune, len(p.Inst)) ) // check that paths from Alt instructions are unambiguous, and rebuild the new // program as a onepass program check = func(pc uint32, m map[uint32]bool) (ok bool) { ok = true inst := &p.Inst[pc] if visitQueue.contains(pc) { return } visitQueue.insert(pc) switch inst.Op { case syntax.InstAlt, syntax.InstAltMatch: ok = check(inst.Out, m) && check(inst.Arg, m) // check no-input paths to InstMatch matchOut := m[inst.Out] matchArg := m[inst.Arg] if matchOut && matchArg { ok = false break } // Match on empty goes in inst.Out if matchArg { inst.Out, inst.Arg = inst.Arg, inst.Out matchOut, matchArg = matchArg, matchOut } if matchOut { m[pc] = true inst.Op = syntax.InstAltMatch } // build a dispatch operator from the two legs of the alt. onePassRunes[pc], inst.Next = mergeRuneSets( &onePassRunes[inst.Out], &onePassRunes[inst.Arg], inst.Out, inst.Arg) if len(inst.Next) > 0 && inst.Next[0] == mergeFailed { ok = false break } case syntax.InstCapture, syntax.InstNop: ok = check(inst.Out, m) m[pc] = m[inst.Out] // pass matching runes back through these no-ops. onePassRunes[pc] = append([]rune{}, onePassRunes[inst.Out]...) inst.Next = []uint32{} for i := len(onePassRunes[pc]) / 2; i >= 0; i-- { inst.Next = append(inst.Next, inst.Out) } case syntax.InstEmptyWidth: ok = check(inst.Out, m) m[pc] = m[inst.Out] onePassRunes[pc] = append([]rune{}, onePassRunes[inst.Out]...) inst.Next = []uint32{} for i := len(onePassRunes[pc]) / 2; i >= 0; i-- { inst.Next = append(inst.Next, inst.Out) } case syntax.InstMatch, syntax.InstFail: m[pc] = inst.Op == syntax.InstMatch break case syntax.InstRune: m[pc] = false if len(inst.Next) > 0 { break } instQueue.insert(inst.Out) if len(inst.Rune) == 0 { onePassRunes[pc] = []rune{} inst.Next = []uint32{inst.Out} break } runes := make([]rune, 0) if len(inst.Rune) == 1 && syntax.Flags(inst.Arg)&syntax.FoldCase != 0 { r0 := inst.Rune[0] runes = append(runes, r0, r0) for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) { runes = append(runes, r1, r1) } sort.Sort(runeSlice(runes)) } else { runes = append(runes, inst.Rune...) } onePassRunes[pc] = runes inst.Next = []uint32{} for i := len(onePassRunes[pc]) / 2; i >= 0; i-- { inst.Next = append(inst.Next, inst.Out) } inst.Op = syntax.InstRune case syntax.InstRune1: m[pc] = false if len(inst.Next) > 0 { break } instQueue.insert(inst.Out) runes := []rune{} // expand case-folded runes if syntax.Flags(inst.Arg)&syntax.FoldCase != 0 { r0 := inst.Rune[0] runes = append(runes, r0, r0) for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) { runes = append(runes, r1, r1) } sort.Sort(runeSlice(runes)) } else { runes = append(runes, inst.Rune[0], inst.Rune[0]) } onePassRunes[pc] = runes inst.Next = []uint32{} for i := len(onePassRunes[pc]) / 2; i >= 0; i-- { inst.Next = append(inst.Next, inst.Out) } inst.Op = syntax.InstRune case syntax.InstRuneAny: m[pc] = false if len(inst.Next) > 0 { break } instQueue.insert(inst.Out) onePassRunes[pc] = append([]rune{}, anyRune...) inst.Next = []uint32{inst.Out} case syntax.InstRuneAnyNotNL: m[pc] = false if len(inst.Next) > 0 { break } instQueue.insert(inst.Out) onePassRunes[pc] = append([]rune{}, anyRuneNotNL...) inst.Next = []uint32{} for i := len(onePassRunes[pc]) / 2; i >= 0; i-- { inst.Next = append(inst.Next, inst.Out) } } return } instQueue.clear() instQueue.insert(uint32(p.Start)) m := make(map[uint32]bool, len(p.Inst)) for !instQueue.empty() { visitQueue.clear() pc := instQueue.next() if !check(uint32(pc), m) { p = notOnePass break } } if p != notOnePass { for i := range p.Inst { p.Inst[i].Rune = onePassRunes[i] } } return p }