func Lex(patterns []*Pattern, text []byte) <-chan Token { tokens := make(chan Token) programs := make([][]*inst.Inst, 0, len(patterns)) for _, pattern := range patterns { cmd := exec.Command("rajax", "--format=json", pattern.regex) if output, err := cmd.Output(); err != nil { panic("could not compile expressions") } else { var jinsts []*JsonInst insts := make([]*inst.Inst, 0) json.Unmarshal(output, &jinsts) for _, jinst := range jinsts { insts = append(insts, inst.New(opcode[jinst.Op], jinst.X, jinst.Y)) } programs = append(programs, insts) } } program := make([]*inst.Inst, 0) offsets := make([]uint32, 0) offset := uint32(len(programs) - 1) process_funcs := make(map[int]ProcessMatch) for i, insts := range programs { offsets = append(offsets, offset) offset += uint32(len(insts)) if i+1 < len(programs) { program = append(program, inst.New(inst.SPLIT, uint32(i+1), offset)) } process_funcs[int(offset-1)] = patterns[i].process } for i, insts := range programs { offset := offsets[i] for _, _inst := range insts { switch _inst.Op { case inst.SPLIT: program = append(program, inst.New(inst.SPLIT, offset+_inst.X, offset+_inst.Y)) case inst.JMP: program = append(program, inst.New(inst.JMP, offset+_inst.X, 0)) default: program = append(program, _inst) } } } go func() { success, matches := machines.LexerEngine(program, text) go func() { for match := range matches { if ok, Token := process_funcs[match.PC](match.Bytes); ok { tokens <- Token } } close(tokens) }() if !(<-success) { fmt.Fprintln(os.Stderr, "lexing failed") } }() return tokens }
func test_case(n int) (inst.InstSlice, []byte) { program := make(inst.InstSlice, n*3+1) text := make([]byte, n) i := uint32(0) for j := 0; j < n; j++ { program[i] = inst.New(inst.SPLIT, i+1, i+2) program[i+1] = inst.New(inst.CHAR, 'a', 0) i += 2 } for j := 0; j < n; j++ { text[j] = 'a' program[i] = inst.New(inst.CHAR, 'a', 0) i++ } program[i] = inst.New(inst.MATCH, 0, 0) return program, text }
func TestRecursiveMatch(t *testing.T) { //. (a|b)*cba?(c|b)bb program := make(inst.InstSlice, 20) program[0] = inst.New(inst.SPLIT, 1, 6) program[1] = inst.New(inst.SPLIT, 2, 4) program[2] = inst.New(inst.CHAR, 'a', 0) program[3] = inst.New(inst.JMP, 5, 0) program[4] = inst.New(inst.CHAR, 'b', 0) program[5] = inst.New(inst.JMP, 0, 0) program[6] = inst.New(inst.CHAR, 'c', 0) program[7] = inst.New(inst.CHAR, 'b', 0) program[8] = inst.New(inst.SPLIT, 9, 10) program[9] = inst.New(inst.CHAR, 'a', 0) program[10] = inst.New(inst.SPLIT, 11, 13) program[11] = inst.New(inst.CHAR, 'c', 0) program[12] = inst.New(inst.JMP, 14, 0) program[13] = inst.New(inst.CHAR, 'b', 0) program[14] = inst.New(inst.CHAR, 'b', 0) program[15] = inst.New(inst.CHAR, 'b', 0) program[16] = inst.New(inst.MATCH, 0, 0) t.Log(string(text)) t.Log(program) if !Recursive(program, text) { t.Error("program should have matched text but did not") } }
func TestLexerThreeStrings(t *testing.T) { var text []byte = []byte{'s', 't', 'r', 'u', 'c', 't', ' ', ' ', '*'} program := make(inst.InstSlice, 30) program[0] = inst.New(inst.SPLIT, 2, 1) // go to 1 or 2/3 program[1] = inst.New(inst.SPLIT, 9, 13) // go to 2 or 3 program[2] = inst.New(inst.CHAR, 's', 0) program[3] = inst.New(inst.CHAR, 't', 0) program[4] = inst.New(inst.CHAR, 'r', 0) program[5] = inst.New(inst.CHAR, 'u', 0) program[6] = inst.New(inst.CHAR, 'c', 0) program[7] = inst.New(inst.CHAR, 't', 0) program[8] = inst.New(inst.MATCH, 0, 0) program[9] = inst.New(inst.SPLIT, 10, 12) program[10] = inst.New(inst.CHAR, ' ', 0) program[11] = inst.New(inst.JMP, 9, 0) program[12] = inst.New(inst.MATCH, 0, 0) program[13] = inst.New(inst.CHAR, '*', 0) program[14] = inst.New(inst.MATCH, 0, 0) t.Log(string(text)) t.Log(len(text)) t.Log(program) success, matches := LexerEngine(program, text) go func() { for match := range matches { t.Log(match) } }() if ok := <-success; !ok { t.Error("program should have matched text but did not") } }
func TestLexerMatch(t *testing.T) { //. (a|b)*cba?(c|b)bb program := make(inst.InstSlice, 20) program[0] = inst.New(inst.SPLIT, 1, 6) program[1] = inst.New(inst.SPLIT, 2, 4) program[2] = inst.New(inst.CHAR, 'a', 0) program[3] = inst.New(inst.JMP, 5, 0) program[4] = inst.New(inst.CHAR, 'b', 0) program[5] = inst.New(inst.JMP, 0, 0) program[6] = inst.New(inst.CHAR, 'c', 0) program[7] = inst.New(inst.CHAR, 'b', 0) program[8] = inst.New(inst.SPLIT, 9, 10) program[9] = inst.New(inst.CHAR, 'a', 0) program[10] = inst.New(inst.SPLIT, 11, 13) program[11] = inst.New(inst.CHAR, 'c', 0) program[12] = inst.New(inst.JMP, 14, 0) program[13] = inst.New(inst.CHAR, 'b', 0) program[14] = inst.New(inst.CHAR, 'b', 0) program[15] = inst.New(inst.CHAR, 'b', 0) program[16] = inst.New(inst.MATCH, 0, 0) t.Log(string(text)) t.Log(len(text)) t.Log(program) success, matches := LexerEngine(program, text) go func() { for match := range matches { t.Log(match) } }() if ok := <-success; !ok { t.Error("program should have matched text but did not") } }