Пример #1
0
func Lex(patterns []*Pattern, text []byte) <-chan Token {
	tokens := make(chan Token)
	programs := make([][]*inst.Inst, 0, len(patterns))
	for _, pattern := range patterns {
		cmd := exec.Command("rajax", "--format=json", pattern.regex)
		if output, err := cmd.Output(); err != nil {
			panic("could not compile expressions")
		} else {
			var jinsts []*JsonInst
			insts := make([]*inst.Inst, 0)
			json.Unmarshal(output, &jinsts)
			for _, jinst := range jinsts {
				insts = append(insts, inst.New(opcode[jinst.Op], jinst.X, jinst.Y))
			}
			programs = append(programs, insts)
		}
	}
	program := make([]*inst.Inst, 0)
	offsets := make([]uint32, 0)
	offset := uint32(len(programs) - 1)
	process_funcs := make(map[int]ProcessMatch)
	for i, insts := range programs {
		offsets = append(offsets, offset)
		offset += uint32(len(insts))
		if i+1 < len(programs) {
			program = append(program, inst.New(inst.SPLIT, uint32(i+1), offset))
		}
		process_funcs[int(offset-1)] = patterns[i].process
	}
	for i, insts := range programs {
		offset := offsets[i]
		for _, _inst := range insts {
			switch _inst.Op {
			case inst.SPLIT:
				program = append(program, inst.New(inst.SPLIT, offset+_inst.X, offset+_inst.Y))
			case inst.JMP:
				program = append(program, inst.New(inst.JMP, offset+_inst.X, 0))
			default:
				program = append(program, _inst)
			}
		}
	}
	go func() {
		success, matches := machines.LexerEngine(program, text)
		go func() {
			for match := range matches {
				if ok, Token := process_funcs[match.PC](match.Bytes); ok {
					tokens <- Token
				}
			}
			close(tokens)
		}()
		if !(<-success) {
			fmt.Fprintln(os.Stderr, "lexing failed")
		}
	}()
	return tokens
}
Пример #2
0
func test_case(n int) (inst.InstSlice, []byte) {
	program := make(inst.InstSlice, n*3+1)
	text := make([]byte, n)
	i := uint32(0)
	for j := 0; j < n; j++ {
		program[i] = inst.New(inst.SPLIT, i+1, i+2)
		program[i+1] = inst.New(inst.CHAR, 'a', 0)
		i += 2
	}
	for j := 0; j < n; j++ {
		text[j] = 'a'
		program[i] = inst.New(inst.CHAR, 'a', 0)
		i++
	}
	program[i] = inst.New(inst.MATCH, 0, 0)
	return program, text
}
Пример #3
0
func TestRecursiveMatch(t *testing.T) {
	//. (a|b)*cba?(c|b)bb
	program := make(inst.InstSlice, 20)

	program[0] = inst.New(inst.SPLIT, 1, 6)
	program[1] = inst.New(inst.SPLIT, 2, 4)
	program[2] = inst.New(inst.CHAR, 'a', 0)
	program[3] = inst.New(inst.JMP, 5, 0)
	program[4] = inst.New(inst.CHAR, 'b', 0)
	program[5] = inst.New(inst.JMP, 0, 0)
	program[6] = inst.New(inst.CHAR, 'c', 0)
	program[7] = inst.New(inst.CHAR, 'b', 0)
	program[8] = inst.New(inst.SPLIT, 9, 10)
	program[9] = inst.New(inst.CHAR, 'a', 0)
	program[10] = inst.New(inst.SPLIT, 11, 13)
	program[11] = inst.New(inst.CHAR, 'c', 0)
	program[12] = inst.New(inst.JMP, 14, 0)
	program[13] = inst.New(inst.CHAR, 'b', 0)
	program[14] = inst.New(inst.CHAR, 'b', 0)
	program[15] = inst.New(inst.CHAR, 'b', 0)
	program[16] = inst.New(inst.MATCH, 0, 0)

	t.Log(string(text))
	t.Log(program)
	if !Recursive(program, text) {
		t.Error("program should have matched text but did not")
	}
}
Пример #4
0
func TestLexerThreeStrings(t *testing.T) {
	var text []byte = []byte{'s', 't', 'r', 'u', 'c', 't', ' ', ' ', '*'}
	program := make(inst.InstSlice, 30)

	program[0] = inst.New(inst.SPLIT, 2, 1)  // go to 1 or 2/3
	program[1] = inst.New(inst.SPLIT, 9, 13) // go to 2 or 3
	program[2] = inst.New(inst.CHAR, 's', 0)
	program[3] = inst.New(inst.CHAR, 't', 0)
	program[4] = inst.New(inst.CHAR, 'r', 0)
	program[5] = inst.New(inst.CHAR, 'u', 0)
	program[6] = inst.New(inst.CHAR, 'c', 0)
	program[7] = inst.New(inst.CHAR, 't', 0)
	program[8] = inst.New(inst.MATCH, 0, 0)
	program[9] = inst.New(inst.SPLIT, 10, 12)
	program[10] = inst.New(inst.CHAR, ' ', 0)
	program[11] = inst.New(inst.JMP, 9, 0)
	program[12] = inst.New(inst.MATCH, 0, 0)
	program[13] = inst.New(inst.CHAR, '*', 0)
	program[14] = inst.New(inst.MATCH, 0, 0)

	t.Log(string(text))
	t.Log(len(text))
	t.Log(program)
	success, matches := LexerEngine(program, text)
	go func() {
		for match := range matches {
			t.Log(match)
		}
	}()

	if ok := <-success; !ok {
		t.Error("program should have matched text but did not")
	}
}
Пример #5
0
func TestLexerMatch(t *testing.T) {
	//. (a|b)*cba?(c|b)bb
	program := make(inst.InstSlice, 20)

	program[0] = inst.New(inst.SPLIT, 1, 6)
	program[1] = inst.New(inst.SPLIT, 2, 4)
	program[2] = inst.New(inst.CHAR, 'a', 0)
	program[3] = inst.New(inst.JMP, 5, 0)
	program[4] = inst.New(inst.CHAR, 'b', 0)
	program[5] = inst.New(inst.JMP, 0, 0)
	program[6] = inst.New(inst.CHAR, 'c', 0)
	program[7] = inst.New(inst.CHAR, 'b', 0)
	program[8] = inst.New(inst.SPLIT, 9, 10)
	program[9] = inst.New(inst.CHAR, 'a', 0)
	program[10] = inst.New(inst.SPLIT, 11, 13)
	program[11] = inst.New(inst.CHAR, 'c', 0)
	program[12] = inst.New(inst.JMP, 14, 0)
	program[13] = inst.New(inst.CHAR, 'b', 0)
	program[14] = inst.New(inst.CHAR, 'b', 0)
	program[15] = inst.New(inst.CHAR, 'b', 0)
	program[16] = inst.New(inst.MATCH, 0, 0)

	t.Log(string(text))
	t.Log(len(text))
	t.Log(program)
	success, matches := LexerEngine(program, text)
	go func() {
		for match := range matches {
			t.Log(match)
		}
	}()

	if ok := <-success; !ok {
		t.Error("program should have matched text but did not")
	}
}