func TestEarleyDFAEpsilons(t *testing.T) { // Build a simple BNF grammar description grammar. gb := parser.OpenGrammarBuilder() gb.Name("a4"). Terminals("a"). Nonterminals("S", "A", "E"). Rule().Lhs("`*").Rhs("S", "`."). Rule().Lhs("S").Rhs("A", "A", "A", "A"). Rule().Lhs("A").Rhs("a"). Rule().Lhs("A").Rhs("E"). Rule().Lhs("E").Rhs("`e") g, err := gb.Build() if err != nil { t.Error(err) } grammar := parser.GetIndexedGrammar(g) dfa, err := BuildEpsilonLR0Dfa(grammar) if err != nil { t.Error() return } fmt.Printf("DFA has %d states\n", len(dfa.states)) for i := 0; i < len(dfa.states); i++ { fmt.Println(dfa.states[i].String()) } }
func TestEarleyDFA(t *testing.T) { gb := parser.OpenGrammarBuilder() gb.Terminals("NONTERM", "COLEQ", "PIPE", "IDENTIFIER"). Nonterminals("bnf", "ntdecl", "def", "ntort"). Rule().Lhs("bnf").Rhs("ntdecl"). Rule().Lhs("bnf").Rhs("ntdecl", "bnf"). Rule().Lhs("ntdecl").Rhs("NONTERM", "COLEQ", "def"). Rule().Lhs("ntdecl").Rhs("ntdecl", "PIPE", "def"). Rule().Lhs("def").Rhs("ntort"). Rule().Lhs("def").Rhs("ntort", "def"). Rule().Lhs("ntort").Rhs("IDENTIFIER"). Rule().Lhs("ntort").Rhs("NONTERM"). Rule().Lhs("`*").Rhs("bnf", "`."). Name("simple-bnf") g, err := gb.Build() if err != nil { t.Error(err) } grammar := parser.GetIndexedGrammar(g) dfa, err := BuildEpsilonLR0Dfa(grammar) if err != nil { t.Error() return } fmt.Printf("DFA has %d states\n", len(dfa.states)) for i := 0; i < len(dfa.states); i++ { fmt.Println(dfa.states[i].String()) } }
func TestNNF(t *testing.T) { // Build a simple BNF aGrammar description aGrammar. gb := parser.OpenGrammarBuilder() gb.Name("a4"). Terminals("a"). Nonterminals("S", "A", "E"). Rule().Lhs("`*").Rhs("S", "`."). Rule().Lhs("S").Rhs("A", "A", "A", "A"). Rule().Lhs("A").Rhs("a"). Rule().Lhs("A").Rhs("E"). Rule().Lhs("E").Rhs("`e") g, err := gb.Build() if err != nil { t.Error(err) return } var aGrammar parser.Grammar var rTransform parser.SyntaxTreeTransform nnf, err := IsNihilisticNormalForm(g) if err != nil { t.Error() return } if !nnf { fmt.Println("Grammar is not NNF, transforming.") aGrammar, rTransform, err = GetNihilisticAugmentGrammar(g) if err != nil { t.Error(err) return } } else { t.Error("Grammar returned NNF.") return } fmt.Println("Name: " + aGrammar.Name()) terms := make([]string, aGrammar.NumTerminals()) for i, t := range aGrammar.Terminals() { terms[i] = t.String() } nterms := make([]string, aGrammar.NumNonterminals()) for i, t := range aGrammar.Nonterminals() { nterms[i] = t.String() } fmt.Println("Terminals: " + strings.Join(terms, ", ")) fmt.Println("Nonterminals: " + strings.Join(nterms, ", ")) fmt.Println("Productions:") for _, p := range aGrammar.Productions() { fmt.Println(" " + p.String()) } rTransform = rTransform }
func TestGrammarIndex(t *testing.T) { // Build a simple BNF grammar description grammar. gb := parser.OpenGrammarBuilder() gb.Terminals("NONTERM", "COLEQ", "PIPE", "IDENTIFIER"). Nonterminals("bnf", "ntdecl", "def", "ntort"). Rule().Lhs("bnf").Rhs("ntdecl"). Rule().Lhs("bnf").Rhs("ntdecl", "bnf"). Rule().Lhs("ntdecl").Rhs("NONTERM", "COLEQ", "def"). Rule().Lhs("ntdecl").Rhs("ntdecl", "PIPE", "def"). Rule().Lhs("def").Rhs("ntort"). Rule().Lhs("def").Rhs("ntort", "def"). Rule().Lhs("ntort").Rhs("IDENTIFIER"). Rule().Lhs("ntort").Rhs("NONTERM"). Rule().Lhs("`*").Rhs("bnf", "`."). Name("simple-bnf") g, err := gb.Build() if err != nil { t.Error(err) } grammar := parser.GetIndexedGrammar(g) fmt.Printf("`*: 0x%8.8X\n", grammar.Asterisk()) dfa, err := BuildDfa(grammar, false) if err != nil { t.Error(err) return } fmt.Printf("DFA has %d states\n", len(dfa.states)) for i := 0; i < len(dfa.states); i++ { fmt.Println(dfa.states[i]) } fmt.Println("Done.") }
func TestParser(t *testing.T) { gb := parser.OpenGrammarBuilder() gb.Terminals("NONTERM", "COLEQ", "PIPE", "IDENTIFIER"). Nonterminals("bnf", "ntdecl", "def", "ntort"). Rule().Lhs("bnf").Rhs("ntdecl"). Rule().Lhs("bnf").Rhs("ntdecl", "bnf"). Rule().Lhs("ntdecl").Rhs("NONTERM", "COLEQ", "def"). Rule().Lhs("ntdecl").Rhs("ntdecl", "PIPE", "def"). Rule().Lhs("def").Rhs("ntort"). Rule().Lhs("def").Rhs("ntort", "def"). Rule().Lhs("ntort").Rhs("IDENTIFIER"). Rule().Lhs("ntort").Rhs("NONTERM"). Rule().Lhs("`*").Rhs("bnf", "`."). Name("simple-bnf") g, err := gb.Build() if err != nil { t.Error(err) } metaBnf := ` <bnf> := <ntdecl> | <ntdecl> <bnf> <ntdecl> := NONTERM COLEQ <def> | <ntdecl> PIPE <def> <def> := <ntort> | <ntort> <def> <ntort> := IDENTIFIER | NONTERM ` lexer, err := NewSimpleBnfLexer(g) if err != nil { t.Error(err) return } lexer.Reset(parser.NewStringReader(metaBnf)) p, err := GenerateParser(g) if err != nil { t.Error(err) return } var ast parser.SyntaxTreeNode ast, err = p.Parse(lexer, nil) if err != nil { t.Error(err) return } fmt.Println("Got AST.") fmt.Printf("Top is: %s\n", ast.Part()) cmap := make(map[parser.SyntaxTreeNode]int) outmap := make(map[int]string) var cnid, chid int DumpTree(ast.(*astNode)) stack := []parser.SyntaxTreeNode{ast} for len(stack) > 0 { var buf []byte cn := stack[len(stack)-1] stack = stack[0 : len(stack)-1] if id, has := cmap[cn]; !has { cnid = len(cmap) cmap[cn] = cnid } else { cnid = id } var desc string if cn.Part() == nil { desc = "-" } else { desc = cn.Part().String() } var valstr string if cn.NumChildren() == 0 { if vt, isVal := cn.Part().(*parser.ValueTerminal); isVal { val := vt.Value() if s, isStr := val.(string); isStr { valstr = fmt.Sprintf("\"%s\"", s) } else if s, isStr := val.(parser.Stringable); isStr { valstr = fmt.Sprintf("\"%s\"", s.String()) } else { valstr = fmt.Sprintf("0x%8.8X", reflect.ValueOf(val).Pointer()) } } else { valstr = "-" } var teststr string if cn.(*astNode).left == nil { teststr = "." } else { teststr = fmt.Sprintf("%d", cn.(*astNode).left.first) } buf = append(buf, fmt.Sprintf("[%d: leaf %s %s %s (%d-%d)]", cnid, teststr, desc, valstr, cn.First(), cn.Last())...) } else { buf = append(buf, fmt.Sprintf("[%d: %s {", cnid, cn.Rule().String())...) for i := 0; i < cn.NumChildren(); i++ { child := cn.Child(i) stack = append(stack, child) if id, has := cmap[child]; !has { chid = len(cmap) cmap[child] = chid } else { chid = id } buf = append(buf, fmt.Sprintf("%d", chid)...) if i < cn.NumChildren()-1 { buf = append(buf, ","...) } } buf = append(buf, fmt.Sprintf("} (%d-%d)]", cn.First(), cn.Last())...) } outmap[cnid] = string(buf) } for i := 0; i < len(outmap); i++ { fmt.Println(outmap[i]) } }
func TestRelex(t *testing.T) { gb := parser.OpenGrammarBuilder() gb.Name("simple-calculator"). Terminals("ID", "EQ", "PRINT", "POW", "PLUS", "MINUS", "TIMES", "DIV", "MOD", "LP", "RP", "NUM"). Nonterminals("program", "statement", "assignment", "output", "expr", "aopfree", "aop", "mopfree", "mop", "unit"). Rule().Lhs("`*").Rhs("program", "`."). Rule().Lhs("program").Rhs("statement"). Value(func(p parser.Production, values []interface{}) (interface{}, error) { return &StatementList{statements: []Statement{values[0].(Statement)}}, nil }). Rule().Lhs("program").Rhs("statement", "program"). Value(func(p parser.Production, values []interface{}) (interface{}, error) { slist := values[1].(*StatementList) slist.statements = append(slist.statements, values[0].(Statement)) return slist, nil }). Rule().Lhs("statement").Rhs("assignment"). Rule().Lhs("statement").Rhs("output"). Rule().Lhs("assignment").Rhs("ID", "EQ", "expr"). Value(func(p parser.Production, values []interface{}) (interface{}, error) { return &Assignment{varname: values[0].(parser.Stringable).String(), value: values[2].(Expression)}, nil }). Rule().Lhs("output").Rhs("PRINT", "expr"). Value(func(p parser.Production, values []interface{}) (interface{}, error) { return &Output{value: values[1].(Expression)}, nil }). Rule().Lhs("expr").Rhs("aopfree"). Rule().Lhs("expr").Rhs("expr", "aop", "aopfree").Value(binaryOpCtor). Rule().Lhs("aopfree").Rhs("aopfree", "mop", "mopfree").Value(binaryOpCtor). Rule().Lhs("mopfree").Rhs("mopfree", "POW", "unit").Value(binaryOpCtor). Rule().Lhs("aop").Rhs("PLUS"). Rule().Lhs("aop").Rhs("MINUS"). Rule().Lhs("mop").Rhs("TIMES"). Rule().Lhs("mop").Rhs("DIV"). Rule().Lhs("mop").Rhs("MOD"). Rule().Lhs("unit").Rhs("ID"). Value(func(p parser.Production, values []interface{}) (interface{}, error) { return &VariableExpression{varname: values[0].(parser.Stringable).String()}, nil }). Rule().Lhs("unit").Rhs("MINUS", "unit"). Value(func(p parser.Production, values []interface{}) (interface{}, error) { return &Negation{arg: values[1].(Expression)}, nil }). Rule().Lhs("unit").Rhs("LP", "expr", "RP"). Value(func(p parser.Production, values []interface{}) (interface{}, error) { return values[1], nil }). Rule().Lhs("unit").Rhs("NUM"). Value(func(p parser.Production, values []interface{}) (interface{}, error) { return &Literal{val: values[0].(*big.Int)}, nil }) g, err := gb.Build() if err != nil { t.Error(err) return } p, err := earley.GenerateParser(g) if err != nil { t.Error("parser generation failed: " + err.Error()) return } fmt.Println(g.Name()) lb := OpenLexerBuilder(g) lb.Token("ID").Expr(`([a-zA-Z][a-zA-Z0-9]*)`) lb.Token("EQ").Expr(`=`) lb.Token("PRINT").Expr(`print`) lb.Token("POW").Expr(`^`). Value(func(part parser.GrammarParticle, match string) interface{} { return EXPONENTIATION }) lb.Token("PLUS").Expr(`\+`). Value(func(part parser.GrammarParticle, match string) interface{} { return ADDITION }) lb.Token("MINUS").Expr(`-`). Value(func(part parser.GrammarParticle, match string) interface{} { return SUBTRACTION }) lb.Token("TIMES").Expr(`\*`). Value(func(part parser.GrammarParticle, match string) interface{} { return MULTIPLICATION }) lb.Token("DIV").Expr(`/`). Value(func(part parser.GrammarParticle, match string) interface{} { return DIVISION }) lb.Token("MOD").Expr(`%`). Value(func(part parser.GrammarParticle, match string) interface{} { return MODULUS }) lb.Token("LP").Expr(`\(`) lb.Token("RP").Expr(`\)`) lb.Token("NUM").Expr("(0|[1-9][0-9]*)"). Value(func(part parser.GrammarParticle, match string) interface{} { n := big.NewInt(0) ten := big.NewInt(10) for i := len(match) - 1; i >= 0; i-- { n.Mul(n, ten).Add(n, big.NewInt(int64(match[i]-'0'))) } return n }) lexer, err := lb.Build() if err != nil { t.Error("lexer build failed: " + err.Error()) return } lexer.Reset(parser.NewStringReader(SAMPLE_PROGRAM)) ast, err := p.Parse(lexer, nil) if err != nil { t.Error(err) return } ast = ast }
func GetNihilisticAugmentGrammar(g parser.Grammar) (parser.Grammar, parser.SyntaxTreeTransform, error) { exceptionalEpsilons := []parser.GrammarParticle{} idxg := parser.GetIndexedGrammar(g) idx, err := idxg.GetIndex(index.BASIC_INDEX) if err != nil { return nil, nil, err } bidx := idx.(*index.BasicGrammarIndex) idx, err = idxg.GetIndex(index.NAME_INDEX) if err != nil { return nil, nil, err } nidx := idx.(*index.NameIndex) for _, nt := range idxg.Nonterminals() { n, err := IsNihilistic(nt) if err != nil { return nil, nil, err } if bidx.Epsilon(nt) && !n { exceptionalEpsilons = append(exceptionalEpsilons, nt) } } gb := parser.OpenGrammarBuilder() for _, nt := range g.Nonterminals() { if nt.Asterisk() { continue } gb.Nonterminals(nt.Name()) } for _, t := range g.Terminals() { if t.Epsilon() { continue } gb.Terminals(t.Name()) } augmentMap := make(map[string]string) invMap := make(map[string]string) for _, e := range exceptionalEpsilons { eName := e.Name() + "-ε" augmentMap[e.Name()] = eName invMap[eName] = e.Name() gb.Nonterminals(eName) //gb.Rule().Lhs(eName).Rhs("`e") } for _, nt := range g.Nonterminals() { for i := 0; i < bidx.NumLhsStarts(nt); i++ { prod := bidx.LhsStart(nt, i) //fmt.Printf("LHSTART(%s,%d): %s\n", nt.String(), i, prod.String()) exIdx := []int{} rhs := []string{} for j := 0; j < prod.RhsLen(); j++ { t := prod.Rhs(j) if _, has := augmentMap[t.Name()]; has { exIdx = append(exIdx, j) //fmt.Println("exidx gets "+t.String()) } rhs = append(rhs, t.Name()) } if nt.Asterisk() { // Initial rule is special-cased. //fmt.Println("rule transfers: "+prod.String()) //fmt.Println("Args: {"+prod.Lhs(0).Name()+"}, {"+strings.Join(rhs,",")+"}") gb.Rule().Lhs(prod.Lhs(0).Name()).Rhs(rhs...) initSym := nidx.Nonterminal(rhs[0]) if initSym != nil { if bidx.Epsilon(initSym) { gb.Rule().Lhs(initSym.Name()).Rhs(augmentMap[initSym.Name()]) } } } else { rhsinst := make([]string, len(rhs)) //fmt.Printf("index len %d\n", len(exIdx)) s := cnt.FirstSelection(uint(len(exIdx))) for { nnCount := 0 for j := 0; j < len(rhs); j++ { rhsinst[j] = rhs[j] if nidx.Terminal(rhs[j]) != nil { nnCount++ } else { nt := nidx.Nonterminal(rhs[j]) nihil, err := IsNihilistic(nt) if err != nil { return nil, nil, err } if !nihil { nnCount++ } } } copy(rhsinst, rhs) for j := 0; j < len(exIdx); j++ { if s.Test(j) { //fmt.Printf("idx %d replacement: %s->%s\n", exIdx[j], rhsinst[exIdx[j]], augmentMap[rhsinst[exIdx[j]]]) rhsinst[exIdx[j]] = augmentMap[rhsinst[exIdx[j]]] nnCount-- } } var head string if nnCount == 0 { head = augmentMap[prod.Lhs(0).Name()] } else { head = prod.Lhs(0).Name() } //fmt.Println("rule transforms: "+prod.String()) //fmt.Println("Args: {"+head+"}, {"+strings.Join(rhs,",")+"}") gb.Rule().Lhs(head).Rhs(rhsinst...) if s.HasNext() { s = s.Next() } else { break } } } } } gb.Name(g.Name() + "-ε") augmentedGrammar, err := gb.Build() if err != nil { return nil, nil, err } augidx := parser.GetIndexedGrammar(augmentedGrammar) idx, err = augidx.GetIndex(index.NAME_INDEX) if err != nil { return nil, nil, err } anidx := idx.(*index.NameIndex) idx, err = augidx.GetIndex(index.BASIC_INDEX) if err != nil { return nil, nil, err } abidx := idx.(*index.BasicGrammarIndex) reverseMap := make(map[parser.GrammarParticle]parser.GrammarParticle) for k, v := range augmentMap { reverseMap[anidx.Nonterminal(v)] = nidx.Nonterminal(k) } prodMap := make(map[parser.Production]parser.Production) for _, p := range augmentedGrammar.Productions() { // Ignore the special case start rule for grammars that accept nil input. if p.LhsLen() == 1 && p.RhsLen() == 1 { initSym := abidx.LhsStart(augmentedGrammar.Asterisk(), 0).Rhs(0) if p.Lhs(0) == initSym && invMap[p.Rhs(0).Name()] == initSym.Name() { continue } } if p.RhsLen() == 1 && p.Rhs(0).Epsilon() { continue } rhs := make([]string, 0, p.RhsLen()) for i := 0; i < p.RhsLen(); i++ { rp := p.Rhs(i) if ot, has := reverseMap[rp]; has { rhs = append(rhs, ot.Name()) } else { rhs = append(rhs, rp.Name()) } } //fmt.Printf("Searching for preimage rhs %s\n", strings.Join(rhs,",")) var target parser.Production for _, cp := range nidx.RhsNames(rhs) { //fmt.Println(" considering "+cp.String()) //for _, p := range g.Productions() { //fmt.Println(p.String()) //} if nt, has := reverseMap[p.Lhs(0)]; has { if cp.Lhs(0) == nt { target = cp break } } else { if cp.Lhs(0).Name() == p.Lhs(0).Name() { target = cp break } } } if target == nil { return nil, nil, errors.New("Could not find preimage of augmented production rule: " + p.String()) } prodMap[p] = target } var rtrans func(treeNode parser.SyntaxTreeNode) (parser.SyntaxTreeNode, error) rtrans = func(treeNode parser.SyntaxTreeNode) (parser.SyntaxTreeNode, error) { part := treeNode.Part() if op, has := reverseMap[part]; has { part = op } exp := make([]parser.SyntaxTreeNode, treeNode.NumChildren()) for i := 0; i < len(exp); i++ { st := treeNode.Child(i) if _, has := reverseMap[st.Part()]; has { r, err := rtrans(st) if err != nil { return nil, err } exp[i] = r } else { exp[i] = st } } return &parser.BasicSyntaxTreeNode{ Particle: part, FirstTokenIdx: treeNode.First(), LastTokenIdx: treeNode.Last(), SyntacticValue: treeNode.Value(), Prod: prodMap[treeNode.Rule()], Expansion: exp, }, nil } return augmentedGrammar, rtrans, nil }
func TestGrammarIndex(t *testing.T) { // Build a simple BNF grammar description grammar. gb := parser.OpenGrammarBuilder() gb.Terminals("NONTERM", "COLEQ", "PIPE", "IDENTIFIER"). Nonterminals("bnf", "ntdecl", "def", "ntort"). Rule().Lhs("bnf").Rhs("ntdecl"). Rule().Lhs("bnf").Rhs("ntdecl", "bnf"). Rule().Lhs("ntdecl").Rhs("NONTERM", "COLEQ", "def"). Rule().Lhs("ntdecl").Rhs("ntdecl", "PIPE", "def"). Rule().Lhs("def").Rhs("ntort"). Rule().Lhs("def").Rhs("ntort", "def"). Rule().Lhs("ntort").Rhs("IDENTIFIER"). Rule().Lhs("ntort").Rhs("NONTERM"). Rule().Lhs("`*").Rhs("bnf", "`."). Name("simple-bnf") g, err := gb.Build() if err != nil { t.Error(err) } grammar := parser.GetIndexedGrammar(g) fmt.Println("Name: " + grammar.Name()) terms := make([]string, grammar.NumTerminals()) for i, t := range grammar.Terminals() { terms[i] = t.String() } nterms := make([]string, grammar.NumNonterminals()) for i, t := range grammar.Nonterminals() { nterms[i] = t.String() } fmt.Println("Terminals: " + strings.Join(terms, ", ")) fmt.Println("Nonterminals: " + strings.Join(nterms, ", ")) fmt.Println("Productions:") for _, p := range grammar.Productions() { fmt.Println(" " + p.String()) } idx, err := grammar.GetIndex("basic") if err != nil { t.Error() return } basicIndex := idx.(*BasicGrammarIndex) fmt.Println("Basic index type: '" + basicIndex.IndexName() + "'") fmt.Println("Production RHS starts: ") for idx := 0; idx < grammar.NumTerminals(); idx++ { term := grammar.Terminal(idx) starts := basicIndex.RhsStarts(term) if len(starts) == 0 { continue } fmt.Println(" " + term.String() + ":") for _, p := range starts { fmt.Println(" " + p.String()) } } fmt.Println("\nProduction RHS ends: ") for idx := 0; idx < grammar.NumTerminals(); idx++ { term := grammar.Terminal(idx) starts := basicIndex.RhsEnds(term) if len(starts) == 0 { continue } fmt.Println(" " + term.String() + ":") for _, p := range starts { fmt.Println(" " + p.String()) } } fmt.Println("\nProduction RHS contains: ") for idx := 0; idx < grammar.NumTerminals(); idx++ { term := grammar.Terminal(idx) starts := basicIndex.RhsContains(term) if len(starts) == 0 { continue } fmt.Println(" " + term.String() + ":") for _, p := range starts { fmt.Println(" " + p.String()) } } fmt.Println("Grammar class:") idx, err = grammar.GetIndex(GRAMMAR_CLASS_INDEX) if err != nil { t.Error(err) return } gcidx := idx.(*GrammarClassIndex) fmt.Println(" type: " + gcidx.Class().String()) fmt.Println(" regularity: " + gcidx.Regularity().String()) idx, err = grammar.GetIndex(FIRST_FOLLOW_INDEX) if err != nil { t.Error(err) return } ffidx := idx.(*FFIndex) fmt.Println("FIRST(x): ") for _, p := range g.Nonterminals() { fmt.Println(" " + p.String()) for _, k := range ffidx.Firsts(p) { fmt.Println(" " + k.String()) } } fmt.Println("FOLLOW(x): ") for _, p := range g.Nonterminals() { fmt.Println(" " + p.String()) for _, k := range ffidx.Follows(p) { fmt.Println(" " + k.String()) } } for _, p := range g.Terminals() { fmt.Println(" " + p.String()) for _, k := range ffidx.Firsts(p) { fmt.Println(" " + k.String()) } } }