func (gci *GrammarClassIndex) Initialize(g parser.Grammar) error { gci.g = g gci.gclass = parser.CONSTANT gci.greg = parser.STRICT_UNITARY for _, p := range g.Productions() { if p.Lhs(0).Asterisk() { continue } preg := parser.GetProductionClass(p) if preg > gci.gclass { gci.gclass = preg if gci.gclass > parser.REGULAR { gci.greg = parser.NONREGULAR } } if gci.gclass <= parser.REGULAR { gci.greg = gci.greg.Join(parser.GetProductionRegularity(p)) } } return nil }
func (ni *NameIndex) Initialize(g parser.Grammar) error { ni.nonterminalsByName = make(map[string]parser.GrammarParticle) ni.terminalsByName = make(map[string]parser.GrammarParticle) for i := 0; i < g.NumNonterminals(); i++ { nt := g.Nonterminal(i) ni.nonterminalsByName[nt.Name()] = nt } for i := 0; i < g.NumTerminals(); i++ { t := g.Terminal(i) ni.terminalsByName[t.Name()] = t } ni.lhsNames = make(map[string][]parser.Production) ni.rhsNames = make(map[string][]parser.Production) for _, p := range g.Productions() { var rhs, lhs []byte for i := 0; i < p.LhsLen(); i++ { lhs = append(lhs, p.Lhs(i).Name()...) if i < p.LhsLen()-1 { lhs = append(lhs, "|"...) } } if _, has := ni.lhsNames[string(lhs)]; !has { ni.lhsNames[string(lhs)] = []parser.Production{} } ni.lhsNames[string(lhs)] = append(ni.lhsNames[string(lhs)], p) for i := 0; i < p.RhsLen(); i++ { rhs = append(rhs, p.Rhs(i).Name()...) if i < p.RhsLen()-1 { rhs = append(rhs, "|"...) } } if _, has := ni.rhsNames[string(rhs)]; !has { ni.rhsNames[string(rhs)] = []parser.Production{} } ni.rhsNames[string(rhs)] = append(ni.rhsNames[string(rhs)], p) } return nil }
func TestNNF(t *testing.T) { // Build a simple BNF aGrammar description aGrammar. gb := parser.OpenGrammarBuilder() gb.Name("a4"). Terminals("a"). Nonterminals("S", "A", "E"). Rule().Lhs("`*").Rhs("S", "`."). Rule().Lhs("S").Rhs("A", "A", "A", "A"). Rule().Lhs("A").Rhs("a"). Rule().Lhs("A").Rhs("E"). Rule().Lhs("E").Rhs("`e") g, err := gb.Build() if err != nil { t.Error(err) return } var aGrammar parser.Grammar var rTransform parser.SyntaxTreeTransform nnf, err := IsNihilisticNormalForm(g) if err != nil { t.Error() return } if !nnf { fmt.Println("Grammar is not NNF, transforming.") aGrammar, rTransform, err = GetNihilisticAugmentGrammar(g) if err != nil { t.Error(err) return } } else { t.Error("Grammar returned NNF.") return } fmt.Println("Name: " + aGrammar.Name()) terms := make([]string, aGrammar.NumTerminals()) for i, t := range aGrammar.Terminals() { terms[i] = t.String() } nterms := make([]string, aGrammar.NumNonterminals()) for i, t := range aGrammar.Nonterminals() { nterms[i] = t.String() } fmt.Println("Terminals: " + strings.Join(terms, ", ")) fmt.Println("Nonterminals: " + strings.Join(nterms, ", ")) fmt.Println("Productions:") for _, p := range aGrammar.Productions() { fmt.Println(" " + p.String()) } rTransform = rTransform }
func (bgi *BasicGrammarIndex) Initialize(g parser.Grammar) error { bgi.grammar = g bgi.epsilons = make(map[parser.GrammarParticle]parser.Production) bgi.lhsIncludes = make(map[parser.GrammarParticle][]parser.Production) bgi.rhsIncludes = make(map[parser.GrammarParticle][]parser.Production) bgi.lhsStarts = make(map[parser.GrammarParticle][]parser.Production) bgi.rhsStarts = make(map[parser.GrammarParticle][]parser.Production) bgi.lhsEnds = make(map[parser.GrammarParticle][]parser.Production) bgi.rhsEnds = make(map[parser.GrammarParticle][]parser.Production) lhicn := make(map[parser.GrammarParticle]map[parser.Production]int) rhicn := make(map[parser.GrammarParticle]map[parser.Production]int) for _, p := range g.Productions() { if p.LhsLen() == 1 && p.Lhs(0).Asterisk() { bgi.lhsStarts[p.Lhs(0)] = []parser.Production{p} bgi.lhsEnds[p.Lhs(0)] = []parser.Production{p} bgi.rhsStarts[p.Lhs(0)] = []parser.Production{p} if _, has := bgi.rhsStarts[p.Rhs(0)]; !has { bgi.rhsStarts[p.Rhs(0)] = []parser.Production{} } bgi.rhsIncludes[p.Rhs(0)] = append(bgi.rhsIncludes[p.Rhs(0)], p) if _, has := bgi.rhsIncludes[p.Rhs(0)]; !has { bgi.rhsIncludes[p.Rhs(0)] = []parser.Production{} } bgi.rhsIncludes[p.Rhs(1)] = []parser.Production{p} bgi.rhsEnds[p.Rhs(1)] = []parser.Production{p} continue } if p.LhsLen() == 1 && p.Lhs(0).Nonterminal() && p.RhsLen() == 1 && p.Rhs(0).Epsilon() { bgi.epsilons[p.Lhs(0)] = p } iterm := p.Lhs(0) bgi.lhsStarts[iterm] = append(bgi.lhsStarts[iterm], p) eterm := p.Lhs(p.LhsLen() - 1) bgi.lhsEnds[eterm] = append(bgi.lhsEnds[eterm], p) iterm = p.Rhs(0) bgi.rhsStarts[iterm] = append(bgi.rhsStarts[iterm], p) eterm = p.Rhs(p.RhsLen() - 1) bgi.rhsEnds[eterm] = append(bgi.rhsEnds[eterm], p) for idx := 0; idx < p.LhsLen(); idx++ { pt := p.Lhs(idx) if m, has := lhicn[pt]; !has { m = make(map[parser.Production]int) m[p] = 1 lhicn[pt] = m } else { lhicn[pt][p] = 1 } } for idx := 0; idx < p.RhsLen(); idx++ { pt := p.Rhs(idx) if m, has := rhicn[pt]; !has { m = make(map[parser.Production]int) m[p] = 1 rhicn[pt] = m } else { rhicn[pt][p] = 1 } } } for pt, set := range lhicn { slc := make([]parser.Production, 0, len(set)) for p, _ := range set { slc = append(slc, p) } bgi.lhsIncludes[pt] = slc } for pt, set := range rhicn { slc := make([]parser.Production, 0, len(set)) for p, _ := range set { slc = append(slc, p) } bgi.rhsIncludes[pt] = slc } // Close epsilons. changed := true for changed { changed = false for _, p := range g.Productions() { if p.LhsLen() != 1 || !p.Lhs(0).Nonterminal() { continue } nt := p.Lhs(0) if bgi.Epsilon(nt) { continue } neweps := true for i := 0; i < p.RhsLen(); i++ { t := p.Rhs(i) if !t.Nonterminal() || !bgi.Epsilon(t) { neweps = false break } } if neweps { bgi.epsilons[nt] = p changed = true } } } return nil }
func (ff *FFIndex) Initialize(g parser.Grammar) error { ff.grammar = g index := parser.GetIndexedGrammar(g) idx, err := index.GetIndex(GRAMMAR_CLASS_INDEX) if err != nil { return err } cidx := idx.(*GrammarClassIndex) if cidx.Class() >= parser.CONTEXT_SENSITIVE { return errors.New("cannot first/follow index a non-context-free grammar") } idx, err = index.GetIndex(BASIC_INDEX) bidx := idx.(*BasicGrammarIndex) if err != nil { return err } // FIRST set calculation ff.firstSets = make(map[parser.GrammarParticle][]parser.GrammarParticle) for _, nt := range index.Nonterminals() { fs := tree.NewTree() ntseen := tree.NewTree() ntpending := []parser.GrammarParticle{nt} for len(ntpending) > 0 { cnt := ntpending[0] ntpending = ntpending[1:] for i := 0; i < bidx.NumLhsStarts(cnt); i++ { p := bidx.LhsStart(cnt, i) for j := 0; j < p.RhsLen(); j++ { rt := p.Rhs(j) if rt.Terminal() { fs.Insert(rt) break } else if rt.Nonterminal() { if _, has := ntseen.Lookup(c.LTE, rt); !has { ntseen.Insert(rt) fs.Insert(rt) ntpending = append(ntpending, rt) } if !bidx.Epsilon(rt) { break } } else { break } } } } ff.firstSets[nt] = make([]parser.GrammarParticle, 0, fs.Size()) for c := fs.First(); c.HasNext(); { ff.firstSets[nt] = append(ff.firstSets[nt], c.Next().(parser.GrammarParticle)) } } // LAST set calculation ff.lastSets = make(map[parser.GrammarParticle][]parser.GrammarParticle) for _, nt := range index.Nonterminals() { fs := tree.NewTree() ntseen := tree.NewTree() ntpending := []parser.GrammarParticle{nt} for len(ntpending) > 0 { cnt := ntpending[0] ntpending = ntpending[1:] for i := 0; i < bidx.NumLhsStarts(cnt); i++ { p := bidx.LhsStart(cnt, i) for j := p.RhsLen() - 1; j >= 0; j-- { rt := p.Rhs(j) if rt.Terminal() { fs.Insert(rt) break } if rt.Nonterminal() { if _, has := ntseen.Lookup(c.LTE, rt); !has { ntseen.Insert(rt) fs.Insert(rt) ntpending = append(ntpending, rt) if !bidx.Epsilon(rt) { break } } } } } } ff.lastSets[nt] = make([]parser.GrammarParticle, 0, fs.Size()) for c := fs.First(); c.HasNext(); { ff.lastSets[nt] = append(ff.lastSets[nt], c.Next().(parser.GrammarParticle)) } } // IN set calculation ff.inSets = make(map[parser.GrammarParticle][]parser.GrammarParticle) for _, nt := range index.Nonterminals() { fs := tree.NewTree() ntseen := tree.NewTree() ntpending := []parser.GrammarParticle{nt} for len(ntpending) > 0 { cnt := ntpending[0] ntpending = ntpending[1:] for i := 0; i < bidx.NumLhsStarts(cnt); i++ { p := bidx.LhsStart(cnt, i) for j := p.RhsLen() - 1; j >= 0; j-- { rt := p.Rhs(j) if rt.Terminal() { fs.Insert(rt) } if rt.Nonterminal() { if _, has := ntseen.Lookup(c.LTE, rt); !has { ntseen.Insert(rt) fs.Insert(rt) ntpending = append(ntpending, rt) } } } } } ff.inSets[nt] = make([]parser.GrammarParticle, 0, fs.Size()) for c := fs.First(); c.HasNext(); { ff.inSets[nt] = append(ff.inSets[nt], c.Next().(parser.GrammarParticle)) } } // FOLLOW set calculation followRefs := make(map[parser.GrammarParticle]tree.Tree) followSets := make(map[parser.GrammarParticle]tree.Tree) for _, p := range g.Productions() { // First-pass. for i := 0; i < p.RhsLen()-1; i++ { for j := i + 1; j < p.RhsLen(); j++ { if _, has := followSets[p.Rhs(i)]; !has { followSets[p.Rhs(i)] = tree.NewTree() } followSets[p.Rhs(i)].Insert(p.Rhs(j)) if !bidx.Epsilon(p.Rhs(j)) { break } } } tp := p.Rhs(p.RhsLen() - 1) if _, has := followRefs[tp]; !has { followRefs[tp] = tree.NewTree() } followRefs[tp].Insert(p.Lhs(0)) } var changed bool = true for changed { // Take closure. changed = false for p, prt := range followRefs { for cr := prt.First(); cr.HasNext(); { fp := cr.Next().(parser.GrammarParticle) // x in Follow(fp) -> x in Follow(p) if fromSet, has := followSets[fp]; has { if _, has := followSets[p]; !has { followSets[p] = tree.NewTree() } for k := fromSet.First(); k.HasNext(); { x := k.Next().(parser.GrammarParticle) if _, has := followSets[p].Lookup(c.LTE, x); !has { changed = true followSets[p].Insert(x) } } } } } } ff.followSets = make(map[parser.GrammarParticle][]parser.GrammarParticle) for r, v := range followSets { // Collect results. ff.followSets[r] = make([]parser.GrammarParticle, 0, v.Size()) for c := v.First(); c.HasNext(); { ff.followSets[r] = append(ff.followSets[r], c.Next().(parser.GrammarParticle)) } } return nil }
func GetNihilisticAugmentGrammar(g parser.Grammar) (parser.Grammar, parser.SyntaxTreeTransform, error) { exceptionalEpsilons := []parser.GrammarParticle{} idxg := parser.GetIndexedGrammar(g) idx, err := idxg.GetIndex(index.BASIC_INDEX) if err != nil { return nil, nil, err } bidx := idx.(*index.BasicGrammarIndex) idx, err = idxg.GetIndex(index.NAME_INDEX) if err != nil { return nil, nil, err } nidx := idx.(*index.NameIndex) for _, nt := range idxg.Nonterminals() { n, err := IsNihilistic(nt) if err != nil { return nil, nil, err } if bidx.Epsilon(nt) && !n { exceptionalEpsilons = append(exceptionalEpsilons, nt) } } gb := parser.OpenGrammarBuilder() for _, nt := range g.Nonterminals() { if nt.Asterisk() { continue } gb.Nonterminals(nt.Name()) } for _, t := range g.Terminals() { if t.Epsilon() { continue } gb.Terminals(t.Name()) } augmentMap := make(map[string]string) invMap := make(map[string]string) for _, e := range exceptionalEpsilons { eName := e.Name() + "-ε" augmentMap[e.Name()] = eName invMap[eName] = e.Name() gb.Nonterminals(eName) //gb.Rule().Lhs(eName).Rhs("`e") } for _, nt := range g.Nonterminals() { for i := 0; i < bidx.NumLhsStarts(nt); i++ { prod := bidx.LhsStart(nt, i) //fmt.Printf("LHSTART(%s,%d): %s\n", nt.String(), i, prod.String()) exIdx := []int{} rhs := []string{} for j := 0; j < prod.RhsLen(); j++ { t := prod.Rhs(j) if _, has := augmentMap[t.Name()]; has { exIdx = append(exIdx, j) //fmt.Println("exidx gets "+t.String()) } rhs = append(rhs, t.Name()) } if nt.Asterisk() { // Initial rule is special-cased. //fmt.Println("rule transfers: "+prod.String()) //fmt.Println("Args: {"+prod.Lhs(0).Name()+"}, {"+strings.Join(rhs,",")+"}") gb.Rule().Lhs(prod.Lhs(0).Name()).Rhs(rhs...) initSym := nidx.Nonterminal(rhs[0]) if initSym != nil { if bidx.Epsilon(initSym) { gb.Rule().Lhs(initSym.Name()).Rhs(augmentMap[initSym.Name()]) } } } else { rhsinst := make([]string, len(rhs)) //fmt.Printf("index len %d\n", len(exIdx)) s := cnt.FirstSelection(uint(len(exIdx))) for { nnCount := 0 for j := 0; j < len(rhs); j++ { rhsinst[j] = rhs[j] if nidx.Terminal(rhs[j]) != nil { nnCount++ } else { nt := nidx.Nonterminal(rhs[j]) nihil, err := IsNihilistic(nt) if err != nil { return nil, nil, err } if !nihil { nnCount++ } } } copy(rhsinst, rhs) for j := 0; j < len(exIdx); j++ { if s.Test(j) { //fmt.Printf("idx %d replacement: %s->%s\n", exIdx[j], rhsinst[exIdx[j]], augmentMap[rhsinst[exIdx[j]]]) rhsinst[exIdx[j]] = augmentMap[rhsinst[exIdx[j]]] nnCount-- } } var head string if nnCount == 0 { head = augmentMap[prod.Lhs(0).Name()] } else { head = prod.Lhs(0).Name() } //fmt.Println("rule transforms: "+prod.String()) //fmt.Println("Args: {"+head+"}, {"+strings.Join(rhs,",")+"}") gb.Rule().Lhs(head).Rhs(rhsinst...) if s.HasNext() { s = s.Next() } else { break } } } } } gb.Name(g.Name() + "-ε") augmentedGrammar, err := gb.Build() if err != nil { return nil, nil, err } augidx := parser.GetIndexedGrammar(augmentedGrammar) idx, err = augidx.GetIndex(index.NAME_INDEX) if err != nil { return nil, nil, err } anidx := idx.(*index.NameIndex) idx, err = augidx.GetIndex(index.BASIC_INDEX) if err != nil { return nil, nil, err } abidx := idx.(*index.BasicGrammarIndex) reverseMap := make(map[parser.GrammarParticle]parser.GrammarParticle) for k, v := range augmentMap { reverseMap[anidx.Nonterminal(v)] = nidx.Nonterminal(k) } prodMap := make(map[parser.Production]parser.Production) for _, p := range augmentedGrammar.Productions() { // Ignore the special case start rule for grammars that accept nil input. if p.LhsLen() == 1 && p.RhsLen() == 1 { initSym := abidx.LhsStart(augmentedGrammar.Asterisk(), 0).Rhs(0) if p.Lhs(0) == initSym && invMap[p.Rhs(0).Name()] == initSym.Name() { continue } } if p.RhsLen() == 1 && p.Rhs(0).Epsilon() { continue } rhs := make([]string, 0, p.RhsLen()) for i := 0; i < p.RhsLen(); i++ { rp := p.Rhs(i) if ot, has := reverseMap[rp]; has { rhs = append(rhs, ot.Name()) } else { rhs = append(rhs, rp.Name()) } } //fmt.Printf("Searching for preimage rhs %s\n", strings.Join(rhs,",")) var target parser.Production for _, cp := range nidx.RhsNames(rhs) { //fmt.Println(" considering "+cp.String()) //for _, p := range g.Productions() { //fmt.Println(p.String()) //} if nt, has := reverseMap[p.Lhs(0)]; has { if cp.Lhs(0) == nt { target = cp break } } else { if cp.Lhs(0).Name() == p.Lhs(0).Name() { target = cp break } } } if target == nil { return nil, nil, errors.New("Could not find preimage of augmented production rule: " + p.String()) } prodMap[p] = target } var rtrans func(treeNode parser.SyntaxTreeNode) (parser.SyntaxTreeNode, error) rtrans = func(treeNode parser.SyntaxTreeNode) (parser.SyntaxTreeNode, error) { part := treeNode.Part() if op, has := reverseMap[part]; has { part = op } exp := make([]parser.SyntaxTreeNode, treeNode.NumChildren()) for i := 0; i < len(exp); i++ { st := treeNode.Child(i) if _, has := reverseMap[st.Part()]; has { r, err := rtrans(st) if err != nil { return nil, err } exp[i] = r } else { exp[i] = st } } return &parser.BasicSyntaxTreeNode{ Particle: part, FirstTokenIdx: treeNode.First(), LastTokenIdx: treeNode.Last(), SyntacticValue: treeNode.Value(), Prod: prodMap[treeNode.Rule()], Expansion: exp, }, nil } return augmentedGrammar, rtrans, nil }