Beispiel #1
0
func (gci *GrammarClassIndex) Initialize(g parser.Grammar) error {
	gci.g = g
	gci.gclass = parser.CONSTANT
	gci.greg = parser.STRICT_UNITARY
	for _, p := range g.Productions() {
		if p.Lhs(0).Asterisk() {
			continue
		}
		preg := parser.GetProductionClass(p)
		if preg > gci.gclass {
			gci.gclass = preg
			if gci.gclass > parser.REGULAR {
				gci.greg = parser.NONREGULAR
			}
		}
		if gci.gclass <= parser.REGULAR {
			gci.greg = gci.greg.Join(parser.GetProductionRegularity(p))
		}
	}
	return nil
}
Beispiel #2
0
func (ni *NameIndex) Initialize(g parser.Grammar) error {
	ni.nonterminalsByName = make(map[string]parser.GrammarParticle)
	ni.terminalsByName = make(map[string]parser.GrammarParticle)
	for i := 0; i < g.NumNonterminals(); i++ {
		nt := g.Nonterminal(i)
		ni.nonterminalsByName[nt.Name()] = nt
	}
	for i := 0; i < g.NumTerminals(); i++ {
		t := g.Terminal(i)
		ni.terminalsByName[t.Name()] = t
	}
	ni.lhsNames = make(map[string][]parser.Production)
	ni.rhsNames = make(map[string][]parser.Production)
	for _, p := range g.Productions() {
		var rhs, lhs []byte
		for i := 0; i < p.LhsLen(); i++ {
			lhs = append(lhs, p.Lhs(i).Name()...)
			if i < p.LhsLen()-1 {
				lhs = append(lhs, "|"...)
			}
		}
		if _, has := ni.lhsNames[string(lhs)]; !has {
			ni.lhsNames[string(lhs)] = []parser.Production{}
		}
		ni.lhsNames[string(lhs)] = append(ni.lhsNames[string(lhs)], p)
		for i := 0; i < p.RhsLen(); i++ {
			rhs = append(rhs, p.Rhs(i).Name()...)
			if i < p.RhsLen()-1 {
				rhs = append(rhs, "|"...)
			}
		}
		if _, has := ni.rhsNames[string(rhs)]; !has {
			ni.rhsNames[string(rhs)] = []parser.Production{}
		}
		ni.rhsNames[string(rhs)] = append(ni.rhsNames[string(rhs)], p)
	}
	return nil
}
Beispiel #3
0
func TestNNF(t *testing.T) {
	// Build a simple BNF aGrammar description aGrammar.
	gb := parser.OpenGrammarBuilder()

	gb.Name("a4").
		Terminals("a").
		Nonterminals("S", "A", "E").
		Rule().Lhs("`*").Rhs("S", "`.").
		Rule().Lhs("S").Rhs("A", "A", "A", "A").
		Rule().Lhs("A").Rhs("a").
		Rule().Lhs("A").Rhs("E").
		Rule().Lhs("E").Rhs("`e")

	g, err := gb.Build()
	if err != nil {
		t.Error(err)
		return
	}

	var aGrammar parser.Grammar
	var rTransform parser.SyntaxTreeTransform

	nnf, err := IsNihilisticNormalForm(g)
	if err != nil {
		t.Error()
		return
	}
	if !nnf {
		fmt.Println("Grammar is not NNF, transforming.")
		aGrammar, rTransform, err = GetNihilisticAugmentGrammar(g)
		if err != nil {
			t.Error(err)
			return
		}
	} else {
		t.Error("Grammar returned NNF.")
		return
	}

	fmt.Println("Name: " + aGrammar.Name())
	terms := make([]string, aGrammar.NumTerminals())
	for i, t := range aGrammar.Terminals() {
		terms[i] = t.String()
	}
	nterms := make([]string, aGrammar.NumNonterminals())
	for i, t := range aGrammar.Nonterminals() {
		nterms[i] = t.String()
	}
	fmt.Println("Terminals: " + strings.Join(terms, ", "))
	fmt.Println("Nonterminals: " + strings.Join(nterms, ", "))
	fmt.Println("Productions:")
	for _, p := range aGrammar.Productions() {
		fmt.Println("   " + p.String())
	}
	rTransform = rTransform
}
Beispiel #4
0
func (bgi *BasicGrammarIndex) Initialize(g parser.Grammar) error {
	bgi.grammar = g
	bgi.epsilons = make(map[parser.GrammarParticle]parser.Production)
	bgi.lhsIncludes = make(map[parser.GrammarParticle][]parser.Production)
	bgi.rhsIncludes = make(map[parser.GrammarParticle][]parser.Production)
	bgi.lhsStarts = make(map[parser.GrammarParticle][]parser.Production)
	bgi.rhsStarts = make(map[parser.GrammarParticle][]parser.Production)
	bgi.lhsEnds = make(map[parser.GrammarParticle][]parser.Production)
	bgi.rhsEnds = make(map[parser.GrammarParticle][]parser.Production)
	lhicn := make(map[parser.GrammarParticle]map[parser.Production]int)
	rhicn := make(map[parser.GrammarParticle]map[parser.Production]int)
	for _, p := range g.Productions() {
		if p.LhsLen() == 1 && p.Lhs(0).Asterisk() {
			bgi.lhsStarts[p.Lhs(0)] = []parser.Production{p}
			bgi.lhsEnds[p.Lhs(0)] = []parser.Production{p}
			bgi.rhsStarts[p.Lhs(0)] = []parser.Production{p}
			if _, has := bgi.rhsStarts[p.Rhs(0)]; !has {
				bgi.rhsStarts[p.Rhs(0)] = []parser.Production{}
			}
			bgi.rhsIncludes[p.Rhs(0)] = append(bgi.rhsIncludes[p.Rhs(0)], p)
			if _, has := bgi.rhsIncludes[p.Rhs(0)]; !has {
				bgi.rhsIncludes[p.Rhs(0)] = []parser.Production{}
			}
			bgi.rhsIncludes[p.Rhs(1)] = []parser.Production{p}
			bgi.rhsEnds[p.Rhs(1)] = []parser.Production{p}
			continue
		}
		if p.LhsLen() == 1 && p.Lhs(0).Nonterminal() && p.RhsLen() == 1 && p.Rhs(0).Epsilon() {
			bgi.epsilons[p.Lhs(0)] = p
		}
		iterm := p.Lhs(0)
		bgi.lhsStarts[iterm] = append(bgi.lhsStarts[iterm], p)
		eterm := p.Lhs(p.LhsLen() - 1)
		bgi.lhsEnds[eterm] = append(bgi.lhsEnds[eterm], p)
		iterm = p.Rhs(0)
		bgi.rhsStarts[iterm] = append(bgi.rhsStarts[iterm], p)
		eterm = p.Rhs(p.RhsLen() - 1)
		bgi.rhsEnds[eterm] = append(bgi.rhsEnds[eterm], p)
		for idx := 0; idx < p.LhsLen(); idx++ {
			pt := p.Lhs(idx)
			if m, has := lhicn[pt]; !has {
				m = make(map[parser.Production]int)
				m[p] = 1
				lhicn[pt] = m
			} else {
				lhicn[pt][p] = 1
			}
		}
		for idx := 0; idx < p.RhsLen(); idx++ {
			pt := p.Rhs(idx)
			if m, has := rhicn[pt]; !has {
				m = make(map[parser.Production]int)
				m[p] = 1
				rhicn[pt] = m
			} else {
				rhicn[pt][p] = 1
			}
		}
	}
	for pt, set := range lhicn {
		slc := make([]parser.Production, 0, len(set))
		for p, _ := range set {
			slc = append(slc, p)
		}
		bgi.lhsIncludes[pt] = slc
	}
	for pt, set := range rhicn {
		slc := make([]parser.Production, 0, len(set))
		for p, _ := range set {
			slc = append(slc, p)
		}
		bgi.rhsIncludes[pt] = slc
	}

	// Close epsilons.
	changed := true
	for changed {
		changed = false
		for _, p := range g.Productions() {
			if p.LhsLen() != 1 || !p.Lhs(0).Nonterminal() {
				continue
			}
			nt := p.Lhs(0)
			if bgi.Epsilon(nt) {
				continue
			}
			neweps := true
			for i := 0; i < p.RhsLen(); i++ {
				t := p.Rhs(i)
				if !t.Nonterminal() || !bgi.Epsilon(t) {
					neweps = false
					break
				}
			}
			if neweps {
				bgi.epsilons[nt] = p
				changed = true
			}
		}
	}

	return nil
}
Beispiel #5
0
func (ff *FFIndex) Initialize(g parser.Grammar) error {
	ff.grammar = g
	index := parser.GetIndexedGrammar(g)
	idx, err := index.GetIndex(GRAMMAR_CLASS_INDEX)
	if err != nil {
		return err
	}
	cidx := idx.(*GrammarClassIndex)
	if cidx.Class() >= parser.CONTEXT_SENSITIVE {
		return errors.New("cannot first/follow index a non-context-free grammar")
	}
	idx, err = index.GetIndex(BASIC_INDEX)
	bidx := idx.(*BasicGrammarIndex)
	if err != nil {
		return err
	}

	// FIRST set calculation
	ff.firstSets = make(map[parser.GrammarParticle][]parser.GrammarParticle)
	for _, nt := range index.Nonterminals() {
		fs := tree.NewTree()
		ntseen := tree.NewTree()
		ntpending := []parser.GrammarParticle{nt}
		for len(ntpending) > 0 {
			cnt := ntpending[0]
			ntpending = ntpending[1:]
			for i := 0; i < bidx.NumLhsStarts(cnt); i++ {
				p := bidx.LhsStart(cnt, i)
				for j := 0; j < p.RhsLen(); j++ {
					rt := p.Rhs(j)
					if rt.Terminal() {
						fs.Insert(rt)
						break
					} else if rt.Nonterminal() {
						if _, has := ntseen.Lookup(c.LTE, rt); !has {
							ntseen.Insert(rt)
							fs.Insert(rt)
							ntpending = append(ntpending, rt)
						}
						if !bidx.Epsilon(rt) {
							break
						}
					} else {
						break
					}
				}
			}
		}
		ff.firstSets[nt] = make([]parser.GrammarParticle, 0, fs.Size())
		for c := fs.First(); c.HasNext(); {
			ff.firstSets[nt] = append(ff.firstSets[nt], c.Next().(parser.GrammarParticle))
		}
	}

	// LAST set calculation
	ff.lastSets = make(map[parser.GrammarParticle][]parser.GrammarParticle)
	for _, nt := range index.Nonterminals() {
		fs := tree.NewTree()
		ntseen := tree.NewTree()
		ntpending := []parser.GrammarParticle{nt}
		for len(ntpending) > 0 {
			cnt := ntpending[0]
			ntpending = ntpending[1:]
			for i := 0; i < bidx.NumLhsStarts(cnt); i++ {
				p := bidx.LhsStart(cnt, i)
				for j := p.RhsLen() - 1; j >= 0; j-- {
					rt := p.Rhs(j)
					if rt.Terminal() {
						fs.Insert(rt)
						break
					}
					if rt.Nonterminal() {
						if _, has := ntseen.Lookup(c.LTE, rt); !has {
							ntseen.Insert(rt)
							fs.Insert(rt)
							ntpending = append(ntpending, rt)
							if !bidx.Epsilon(rt) {
								break
							}
						}
					}
				}
			}
		}
		ff.lastSets[nt] = make([]parser.GrammarParticle, 0, fs.Size())
		for c := fs.First(); c.HasNext(); {
			ff.lastSets[nt] = append(ff.lastSets[nt], c.Next().(parser.GrammarParticle))
		}
	}

	// IN set calculation
	ff.inSets = make(map[parser.GrammarParticle][]parser.GrammarParticle)
	for _, nt := range index.Nonterminals() {
		fs := tree.NewTree()
		ntseen := tree.NewTree()
		ntpending := []parser.GrammarParticle{nt}
		for len(ntpending) > 0 {
			cnt := ntpending[0]
			ntpending = ntpending[1:]
			for i := 0; i < bidx.NumLhsStarts(cnt); i++ {
				p := bidx.LhsStart(cnt, i)
				for j := p.RhsLen() - 1; j >= 0; j-- {
					rt := p.Rhs(j)
					if rt.Terminal() {
						fs.Insert(rt)
					}
					if rt.Nonterminal() {
						if _, has := ntseen.Lookup(c.LTE, rt); !has {
							ntseen.Insert(rt)
							fs.Insert(rt)
							ntpending = append(ntpending, rt)
						}
					}
				}
			}
		}
		ff.inSets[nt] = make([]parser.GrammarParticle, 0, fs.Size())
		for c := fs.First(); c.HasNext(); {
			ff.inSets[nt] = append(ff.inSets[nt], c.Next().(parser.GrammarParticle))
		}
	}

	// FOLLOW set calculation
	followRefs := make(map[parser.GrammarParticle]tree.Tree)
	followSets := make(map[parser.GrammarParticle]tree.Tree)
	for _, p := range g.Productions() { // First-pass.
		for i := 0; i < p.RhsLen()-1; i++ {
			for j := i + 1; j < p.RhsLen(); j++ {
				if _, has := followSets[p.Rhs(i)]; !has {
					followSets[p.Rhs(i)] = tree.NewTree()
				}
				followSets[p.Rhs(i)].Insert(p.Rhs(j))
				if !bidx.Epsilon(p.Rhs(j)) {
					break
				}
			}
		}
		tp := p.Rhs(p.RhsLen() - 1)
		if _, has := followRefs[tp]; !has {
			followRefs[tp] = tree.NewTree()
		}
		followRefs[tp].Insert(p.Lhs(0))
	}
	var changed bool = true
	for changed { // Take closure.
		changed = false
		for p, prt := range followRefs {
			for cr := prt.First(); cr.HasNext(); {
				fp := cr.Next().(parser.GrammarParticle) // x in Follow(fp) -> x in Follow(p)
				if fromSet, has := followSets[fp]; has {
					if _, has := followSets[p]; !has {
						followSets[p] = tree.NewTree()
					}
					for k := fromSet.First(); k.HasNext(); {
						x := k.Next().(parser.GrammarParticle)
						if _, has := followSets[p].Lookup(c.LTE, x); !has {
							changed = true
							followSets[p].Insert(x)
						}
					}
				}
			}
		}
	}
	ff.followSets = make(map[parser.GrammarParticle][]parser.GrammarParticle)
	for r, v := range followSets { // Collect results.
		ff.followSets[r] = make([]parser.GrammarParticle, 0, v.Size())
		for c := v.First(); c.HasNext(); {
			ff.followSets[r] = append(ff.followSets[r], c.Next().(parser.GrammarParticle))
		}
	}

	return nil
}
Beispiel #6
0
func GetNihilisticAugmentGrammar(g parser.Grammar) (parser.Grammar, parser.SyntaxTreeTransform, error) {
	exceptionalEpsilons := []parser.GrammarParticle{}
	idxg := parser.GetIndexedGrammar(g)
	idx, err := idxg.GetIndex(index.BASIC_INDEX)
	if err != nil {
		return nil, nil, err
	}
	bidx := idx.(*index.BasicGrammarIndex)
	idx, err = idxg.GetIndex(index.NAME_INDEX)
	if err != nil {
		return nil, nil, err
	}
	nidx := idx.(*index.NameIndex)

	for _, nt := range idxg.Nonterminals() {
		n, err := IsNihilistic(nt)
		if err != nil {
			return nil, nil, err
		}
		if bidx.Epsilon(nt) && !n {
			exceptionalEpsilons = append(exceptionalEpsilons, nt)
		}
	}

	gb := parser.OpenGrammarBuilder()
	for _, nt := range g.Nonterminals() {
		if nt.Asterisk() {
			continue
		}
		gb.Nonterminals(nt.Name())
	}
	for _, t := range g.Terminals() {
		if t.Epsilon() {
			continue
		}
		gb.Terminals(t.Name())
	}
	augmentMap := make(map[string]string)
	invMap := make(map[string]string)
	for _, e := range exceptionalEpsilons {
		eName := e.Name() + "-ε"
		augmentMap[e.Name()] = eName
		invMap[eName] = e.Name()
		gb.Nonterminals(eName)
		//gb.Rule().Lhs(eName).Rhs("`e")
	}
	for _, nt := range g.Nonterminals() {
		for i := 0; i < bidx.NumLhsStarts(nt); i++ {
			prod := bidx.LhsStart(nt, i)
			//fmt.Printf("LHSTART(%s,%d): %s\n", nt.String(), i, prod.String())
			exIdx := []int{}
			rhs := []string{}
			for j := 0; j < prod.RhsLen(); j++ {
				t := prod.Rhs(j)
				if _, has := augmentMap[t.Name()]; has {
					exIdx = append(exIdx, j)
					//fmt.Println("exidx gets "+t.String())
				}
				rhs = append(rhs, t.Name())
			}
			if nt.Asterisk() { // Initial rule is special-cased.
				//fmt.Println("rule transfers:  "+prod.String())
				//fmt.Println("Args: {"+prod.Lhs(0).Name()+"}, {"+strings.Join(rhs,",")+"}")
				gb.Rule().Lhs(prod.Lhs(0).Name()).Rhs(rhs...)
				initSym := nidx.Nonterminal(rhs[0])
				if initSym != nil {
					if bidx.Epsilon(initSym) {
						gb.Rule().Lhs(initSym.Name()).Rhs(augmentMap[initSym.Name()])
					}
				}
			} else {
				rhsinst := make([]string, len(rhs))
				//fmt.Printf("index len %d\n", len(exIdx))
				s := cnt.FirstSelection(uint(len(exIdx)))
				for {
					nnCount := 0
					for j := 0; j < len(rhs); j++ {
						rhsinst[j] = rhs[j]
						if nidx.Terminal(rhs[j]) != nil {
							nnCount++
						} else {
							nt := nidx.Nonterminal(rhs[j])
							nihil, err := IsNihilistic(nt)
							if err != nil {
								return nil, nil, err
							}
							if !nihil {
								nnCount++
							}
						}
					}
					copy(rhsinst, rhs)
					for j := 0; j < len(exIdx); j++ {
						if s.Test(j) {
							//fmt.Printf("idx %d replacement: %s->%s\n", exIdx[j], rhsinst[exIdx[j]], augmentMap[rhsinst[exIdx[j]]])
							rhsinst[exIdx[j]] = augmentMap[rhsinst[exIdx[j]]]
							nnCount--
						}
					}
					var head string
					if nnCount == 0 {
						head = augmentMap[prod.Lhs(0).Name()]
					} else {
						head = prod.Lhs(0).Name()
					}
					//fmt.Println("rule transforms:  "+prod.String())
					//fmt.Println("Args: {"+head+"}, {"+strings.Join(rhs,",")+"}")
					gb.Rule().Lhs(head).Rhs(rhsinst...)
					if s.HasNext() {
						s = s.Next()
					} else {
						break
					}
				}
			}
		}
	}
	gb.Name(g.Name() + "-ε")
	augmentedGrammar, err := gb.Build()
	if err != nil {
		return nil, nil, err
	}
	augidx := parser.GetIndexedGrammar(augmentedGrammar)
	idx, err = augidx.GetIndex(index.NAME_INDEX)
	if err != nil {
		return nil, nil, err
	}
	anidx := idx.(*index.NameIndex)

	idx, err = augidx.GetIndex(index.BASIC_INDEX)
	if err != nil {
		return nil, nil, err
	}
	abidx := idx.(*index.BasicGrammarIndex)

	reverseMap := make(map[parser.GrammarParticle]parser.GrammarParticle)
	for k, v := range augmentMap {
		reverseMap[anidx.Nonterminal(v)] = nidx.Nonterminal(k)
	}
	prodMap := make(map[parser.Production]parser.Production)
	for _, p := range augmentedGrammar.Productions() {

		// Ignore the special case start rule for grammars that accept nil input.
		if p.LhsLen() == 1 && p.RhsLen() == 1 {
			initSym := abidx.LhsStart(augmentedGrammar.Asterisk(), 0).Rhs(0)
			if p.Lhs(0) == initSym && invMap[p.Rhs(0).Name()] == initSym.Name() {
				continue
			}
		}
		if p.RhsLen() == 1 && p.Rhs(0).Epsilon() {
			continue
		}
		rhs := make([]string, 0, p.RhsLen())
		for i := 0; i < p.RhsLen(); i++ {
			rp := p.Rhs(i)
			if ot, has := reverseMap[rp]; has {
				rhs = append(rhs, ot.Name())
			} else {
				rhs = append(rhs, rp.Name())
			}
		}
		//fmt.Printf("Searching for preimage rhs %s\n", strings.Join(rhs,","))
		var target parser.Production
		for _, cp := range nidx.RhsNames(rhs) {
			//fmt.Println("  considering "+cp.String())
			//for _, p := range g.Productions() {
			//fmt.Println(p.String())
			//}
			if nt, has := reverseMap[p.Lhs(0)]; has {
				if cp.Lhs(0) == nt {
					target = cp
					break
				}
			} else {
				if cp.Lhs(0).Name() == p.Lhs(0).Name() {
					target = cp
					break
				}
			}
		}
		if target == nil {
			return nil, nil, errors.New("Could not find preimage of augmented production rule: " + p.String())
		}
		prodMap[p] = target
	}
	var rtrans func(treeNode parser.SyntaxTreeNode) (parser.SyntaxTreeNode, error)

	rtrans = func(treeNode parser.SyntaxTreeNode) (parser.SyntaxTreeNode, error) {
		part := treeNode.Part()
		if op, has := reverseMap[part]; has {
			part = op
		}
		exp := make([]parser.SyntaxTreeNode, treeNode.NumChildren())
		for i := 0; i < len(exp); i++ {
			st := treeNode.Child(i)
			if _, has := reverseMap[st.Part()]; has {
				r, err := rtrans(st)
				if err != nil {
					return nil, err
				}
				exp[i] = r
			} else {
				exp[i] = st
			}
		}
		return &parser.BasicSyntaxTreeNode{
			Particle:       part,
			FirstTokenIdx:  treeNode.First(),
			LastTokenIdx:   treeNode.Last(),
			SyntacticValue: treeNode.Value(),
			Prod:           prodMap[treeNode.Rule()],
			Expansion:      exp,
		}, nil
	}
	return augmentedGrammar, rtrans, nil
}