Beispiel #1
0
 public Production(Nonterminal lhs, GrammarSymbol[] rhsButLast, GrammarSymbol last)
 {
     this.Lhs = lhs;
     this.Rhs = new GrammarSymbol[rhsButLast.Length + 1];
     Array.Copy(rhsButLast, this.Rhs, rhsButLast.Length);
     this.Rhs[rhsButLast.Length] = last;
 }
        static IEnumerable <ConsList <GrammarSymbol> > EnumerateNullableFreeVariations(ConsList <GrammarSymbol> symbols, HashSet <Nonterminal> nullables)
        {
            if (symbols == null)
            {
                yield return(null);
            }
            else
            {
                foreach (var rest in EnumerateNullableFreeVariations(symbols.Rest, nullables))
                {
                    GrammarSymbol first    = symbols.First;
                    Nonterminal   variable = first as Nonterminal;
                    if (variable == null || !nullables.Contains(variable))
                    {
                        yield return(new ConsList <GrammarSymbol>(first, rest));
                    }
                    else
                    {
                        yield return(rest);

                        yield return(new ConsList <GrammarSymbol>(first, rest));
                    }
                }
            }
        }
Beispiel #3
0
 public Production(Nonterminal lhs, GrammarSymbol[] rhsButLast, GrammarSymbol last)
 {
     this.Lhs = lhs;
     this.Rhs = new GrammarSymbol[rhsButLast.Length + 1];
     Array.Copy(rhsButLast, this.Rhs, rhsButLast.Length);
     this.Rhs[rhsButLast.Length] = last;
 }
Beispiel #4
0
 internal bool RhsContainsSymbol(GrammarSymbol symbol)
 {
     foreach (GrammarSymbol s in Rhs)
     {
         if (s.Equals(symbol))
         {
             return(true);
         }
     }
     return(false);
 }
Beispiel #5
0
        public Production(Nonterminal lhs, GrammarSymbol[] rhsButLast, GrammarSymbol last)
        {
            if (lhs == null || last == null || Array.Exists(rhsButLast, x => x == null))
            {
                throw new ArgumentNullException();
            }

            this.Lhs = lhs;
            this.Rhs = new GrammarSymbol[rhsButLast.Length + 1];
            Array.Copy(rhsButLast, this.Rhs, rhsButLast.Length);
            this.Rhs[rhsButLast.Length] = last;
        }
        /// <summary>
        /// Produces the GNF (Greibach Normal Form) for the grammar g.
        /// If g is not already in GNF, first makes CNF.
        /// Implements a variation of the Koch-Blum algorithm. (STACS 97, pp. 47-54)
        /// </summary>
        /// <param name="g"></param>
        /// <param name="removeEpsilonsUselessSymbolsUnitsProductions"></param>
        /// <returns></returns>
        public static ContextFreeGrammar MkGNF(ContextFreeGrammar g, bool removeEpsilonsUselessSymbolsUnitsProductions)
        {
            if (removeEpsilonsUselessSymbolsUnitsProductions)
               g = g.RemoveEpsilonsAndUselessSymbols().RemoveUnitProductions();
            if (g.IsInGNF())
                return g;

            ContextFreeGrammar cnf = MkCNF(g,false);
            var Vars = cnf.variables;

            int nonterminalID = 0;

            var M = new Dictionary<Nonterminal, Automaton<GrammarSymbol>>();

            #region construct the automata M[B] for all variables B
            int id = 0;
            var initStateMap = new Dictionary<Nonterminal, int>();
            var finalStateMap = new Dictionary<Nonterminal, int>();
            foreach (Nonterminal B in Vars)
            {
                initStateMap[B] = id++;
                finalStateMap[B] = id++;
            }

            var movesOfM = new Dictionary<Nonterminal, List<Move<GrammarSymbol>>>();

            foreach (Nonterminal B in Vars)
                movesOfM[B] = new List<Move<GrammarSymbol>>();

            #region construct the moves of the automata
            foreach (Nonterminal B in Vars)
            {
                var variableToStateMap = new Dictionary<Nonterminal, int>();
                Stack<Nonterminal> stack = new Stack<Nonterminal>();
                stack.Push(B);
                int initState = initStateMap[B];
                variableToStateMap[B] = finalStateMap[B];
                while (stack.Count > 0)
                {
                    Nonterminal C = stack.Pop();
                    foreach (Production p in cnf.GetProductions(C))
                    {
                        if (p.IsSingleExprinal)
                            movesOfM[B].Add(Move<GrammarSymbol>.Create(initState, variableToStateMap[C], p.First));
                        else
                        {
                            Nonterminal D = (Nonterminal)p.First; //using the fact that the grammar is in CNF
                            if (!variableToStateMap.ContainsKey(D))
                            {
                                //visit all variables reachable that have not already been visited
                                variableToStateMap.Add(D,id++);
                                stack.Push(D);
                            }
                            GrammarSymbol E = p.Rhs[1];
                            movesOfM[B].Add(Move<GrammarSymbol>.Create(variableToStateMap[D], variableToStateMap[C], E));

                        }
                    }
                }
            }
            #endregion

            foreach (Nonterminal B in Vars)
                M[B] = Automaton<GrammarSymbol>.Create(null, initStateMap[B], new int[] {finalStateMap[B]}, movesOfM[B]);
            #endregion

            var G_ = new Dictionary<Nonterminal, ContextFreeGrammar>();

            #region construct corresponding intermediate grammars G_[B] corresponding to M[B]
            foreach (Nonterminal B in Vars)
            {
                var MB = M[B];
                bool MBfinalStateHasVariableMoves = FinalStateHasVariableMoves(MB);
                var productions = new Dictionary<Nonterminal, List<Production>>();
                Nonterminal startSymbol = new Nonterminal(nonterminalID++);
                var vars = new List<Nonterminal>();
                vars.Add(startSymbol);
                productions[startSymbol] = new List<Production>();

                foreach (var move in MB.GetMovesFrom(MB.InitialState))
                {
                    if (move.TargetState == MB.FinalState)
                        productions[startSymbol].Add(new Production(startSymbol, move.Label));
                    if (move.TargetState != MB.FinalState || MBfinalStateHasVariableMoves)
                    {
                        var C = new Nonterminal("Q" + move.TargetState);
                        productions[startSymbol].Add(new Production(startSymbol, move.Label, C));
                        if (!productions.ContainsKey(C))
                        {
                            productions[C] = new List<Production>();
                            vars.Add(C);
                        }
                    }
                }

                foreach (int state in MB.States)
                    if (state != MB.InitialState)
                        foreach (Move<GrammarSymbol> move in MB.GetMovesFrom(state))
                        {
                            Nonterminal D = new Nonterminal("Q" + state);
                            Nonterminal C = new Nonterminal("Q" + move.TargetState);
                            if (!productions.ContainsKey(D))
                            {
                                productions[D] = new List<Production>();
                                vars.Add(D);
                            }
                            Nonterminal E = (Nonterminal)move.Label;
                            if (move.TargetState == MB.FinalState)
                                productions[D].Add(new Production(D, E));
                            if (move.TargetState != MB.FinalState || MBfinalStateHasVariableMoves)
                            {
                                productions[D].Add(new Production(D, E, C));
                                //we pretend here that E is a terminal
                                if (!productions.ContainsKey(C))
                                {
                                    productions[C] = new List<Production>();
                                    vars.Add(C);
                                }
                            }
                        }
                G_[B] = new ContextFreeGrammar(vars, startSymbol, productions);
            }
            #endregion

            var G = new Dictionary<Nonterminal, ContextFreeGrammar>();

            #region construct the corresponding temporary G[B]'s
            foreach (Nonterminal B in Vars)
            {
                var G_B = G_[B];
                var productions = new Dictionary<Nonterminal, List<Production>>();
                //var vars = new List<Variable>();
                Nonterminal startSymbol = G_B.startSymbol;
                productions[startSymbol] = G_B.productionMap[startSymbol];
                foreach (Nonterminal D in G_B.variables)
                    if (!D.Equals(startSymbol))
                    {
                        var productions_D = new List<Production>();
                        productions[D] = productions_D;
                        foreach (Production p in G_B.productionMap[D])
                        {
                            Nonterminal E = (Nonterminal)p.First;
                            var G_E = G_[E];
                            if (p.IsUnit)
                                foreach (Production q in G_E.productionMap[G_E.startSymbol])
                                    productions_D.Add(new Production(D, q.Rhs));
                            else
                                foreach (Production q in G_E.productionMap[G_E.startSymbol])
                                {
                                    GrammarSymbol[] symbols = new GrammarSymbol[q.Rhs.Length + 1];
                                    Array.Copy(q.Rhs, symbols, q.Rhs.Length);
                                    symbols[q.Rhs.Length] = p.Rhs[1];
                                    productions_D.Add(new Production(D, symbols));
                                }
                        }
                    }
                //ignore the variable list, it is not used
                G[B] = new ContextFreeGrammar(null, startSymbol, productions);
            }
            #endregion

            #region construct the final GNF from the G[B]'s
            var productionsGNF = new List<Production>();
            foreach (Nonterminal A in cnf.variables)
            {
                foreach (Production p in cnf.productionMap[A])
                {
                    if (p.IsSingleExprinal)
                        productionsGNF.Add(p);
                    else
                    {
                        Nonterminal B = (Nonterminal)p.Rhs[0];
                        Nonterminal C = (Nonterminal)p.Rhs[1];
                        var GB = G[B];
                        foreach (Production q in GB.productionMap[GB.startSymbol])
                        {
                            GrammarSymbol[] symbols = new GrammarSymbol[q.Rhs.Length + 1];
                            Array.Copy(q.Rhs, symbols, q.Rhs.Length);
                            symbols[q.Rhs.Length] = C;
                            productionsGNF.Add(new Production(A, symbols));
                        }
                    }
                }
            }
            foreach (Nonterminal B in Vars)
            {
                var GB = G[B];
                foreach (var kv in GB.productionMap)
                    if (!kv.Key.Equals(GB.startSymbol))
                        productionsGNF.AddRange(kv.Value);
            }
            #endregion

            ContextFreeGrammar gnf = new ContextFreeGrammar(cnf.startSymbol, productionsGNF);
            return gnf;
        }
        /// <summary>
        /// Produces the EGNF (Extended Greibach Normal Form) for the grammar g. 
        /// Implements a variation of the Blum-Koch algorithm. 
        /// (Inf. and Comp. vol.150, pp.112-118, 1999)
        /// </summary>
        /// <param name="g">the grammar to be normalized</param>
        /// <param name="removeEpsilonsAndUselessSymbols">if true, first removes epsilons and useless symbols, otherwise assumes that epsilons do not occur</param>
        /// <returns>Extended Greibach Normal Form of g</returns>
        public static ContextFreeGrammar MkEGNF(ContextFreeGrammar g, bool removeEpsilonsAndUselessSymbols)
        {
            if (removeEpsilonsAndUselessSymbols)
                g = g.RemoveEpsilonsAndUselessSymbols();

            if (g.IsInGNF())
                return g;

            var leavesP = new List<Production>();
            var revP = new Dictionary<Nonterminal, List<Pair<GrammarSymbol[], Nonterminal>>>();

            int nonterminalID = 0;

            #region compute leavesP and revP
            foreach (Nonterminal v in g.variables)
                revP[v] = new List<Pair<GrammarSymbol[], Nonterminal>>();

            foreach (Production p in g.GetProductions())
                if (!(p.First is Nonterminal))
                    leavesP.Add(p);
                else
                    revP[(Nonterminal)p.First].Add(new Pair<GrammarSymbol[], Nonterminal>(p.Rest, p.Lhs));
            #endregion

            var W = new Dictionary<Nonterminal, HashSet<Nonterminal>>();
            var startSymbol = new Dictionary<Nonterminal, Nonterminal>();

            #region create new start symbols and compute unit closures
            foreach (Nonterminal v in g.variables)
            {
                W[v] = g.GetUnitClosure(v);
                startSymbol[v] = new Nonterminal(nonterminalID++);
            }
            #endregion

            var P = new Dictionary<Nonterminal, List<Production>>();

            #region construct intermediate productions in P for each variable B
            foreach (Nonterminal B in g.variables)
            {
                var S_B = startSymbol[B];
                var W_B = W[B]; //unit closure of B
                var Bvar = new Dictionary<Nonterminal, Nonterminal>();
                Stack<Nonterminal> stack = new Stack<Nonterminal>();
                HashSet<Nonterminal> visited = new HashSet<Nonterminal>();
                var S_B_list = new List<Production>();
                P[S_B] = S_B_list;
                foreach (Production p in leavesP)
                {
                    S_B_list.Add(new Production(S_B, p.Rhs, Lookup(Bvar, p.Lhs, ref nonterminalID)));
                    if (visited.Add(p.Lhs))
                        stack.Push(p.Lhs);
                    if (W_B.Contains(p.Lhs))
                        S_B_list.Add(new Production(S_B, p.Rhs));
                }

                while (stack.Count > 0)
                {
                    Nonterminal C = stack.Pop();
                    Nonterminal C_B = Lookup(Bvar, C, ref nonterminalID);
                    List<Production> C_B_list;
                    if (!P.TryGetValue(C_B, out C_B_list))
                    {
                        C_B_list = new List<Production>();
                        P[C_B] = C_B_list;
                    }
                    foreach (var t in revP[C])
                    {
                        Nonterminal D = t.Second;
                        Nonterminal D_B = Lookup(Bvar, D, ref nonterminalID);
                        C_B_list.Add(new Production(C_B, t.First, D_B));
                        if (t.First.Length > 0 && W_B.Contains(D))
                            C_B_list.Add(new Production(C_B, t.First));
                        if (visited.Add(D))
                            stack.Push(D);
                    }
                }
            }
            #endregion

            //produce the union of P and g.productionMap in H
            //and replace each production 'A ::= B alpha' by 'A ::= S_B alpha"

            var Hprods = new Dictionary<Nonterminal, List<Production>>();
            #region compute Hprods
            foreach (Nonterminal A in g.variables)
            {
                var A_prods = new List<Production>();
                Hprods[A] = A_prods;
                foreach (Production p in g.productionMap[A])
                {
                    if (p.First is Nonterminal && !p.IsUnit)
                    {
                        GrammarSymbol[] rhs = new GrammarSymbol[p.Rhs.Length];
                        rhs[0] = startSymbol[(Nonterminal)p.First];
                        Array.Copy(p.Rhs, 1, rhs, 1, rhs.Length - 1);
                        Production q = new Production(p.Lhs, rhs);
                        A_prods.Add(q);
                    }
                    else
                        A_prods.Add(p);
                }
            }
            foreach (Nonterminal A in P.Keys)
            {
                var A_prods = new List<Production>();
                Hprods[A] = A_prods;
                foreach (Production p in P[A])
                {
                    if (p.First is Nonterminal && !p.IsUnit)
                    {
                        GrammarSymbol[] rhs = new GrammarSymbol[p.Rhs.Length];
                        rhs[0] = startSymbol[(Nonterminal)p.First];
                        Array.Copy(p.Rhs, 1, rhs, 1, rhs.Length - 1);
                        Production q = new Production(p.Lhs, rhs);
                        A_prods.Add(q);
                    }
                    else
                        A_prods.Add(p);
                }
            }
            #endregion
            ContextFreeGrammar H = new ContextFreeGrammar(new List<Nonterminal>(Hprods.Keys), g.startSymbol, Hprods);

            //Console.WriteLine("--------- H:");
            //H.Display(Console.Out);

            //eliminate useless symbols from H
            //this may dramatically decrease the number of productions
            ContextFreeGrammar H1 = H.RemoveUselessSymbols();

            //Console.WriteLine("---------- H1:");
            //H1.Display(Console.Out);

            List<Nonterminal> egnfVars = new List<Nonterminal>();
            Dictionary<Nonterminal, List<Production>> egnfProds = new Dictionary<Nonterminal, List<Production>>();
            Stack<Nonterminal> egnfStack = new Stack<Nonterminal>();
            HashSet<Nonterminal> egnfVisited = new HashSet<Nonterminal>();
            egnfStack.Push(H1.startSymbol);
            egnfVisited.Add(H1.startSymbol);
            egnfVars.Add(H1.startSymbol);
            egnfProds[H1.startSymbol] = new List<Production>();

            #region eliminate temp start symbols and produce the EGNF form
            while (egnfStack.Count > 0)
            {
                var A = egnfStack.Pop();
                List<Production> A_prods = egnfProds[A];
                foreach (Production p in H1.productionMap[A])
                {
                    if (!(p.First is Nonterminal) || p.IsUnit)
                    {
                        A_prods.Add(p);
                        foreach (Nonterminal x in p.GetVariables())
                            if (egnfVisited.Add(x))
                            {
                                egnfStack.Push(x);
                                egnfVars.Add(x);
                                egnfProds[x] = new List<Production>();
                            }
                    }
                    else
                    {
                        Nonterminal S_B = (Nonterminal)p.First; //here we know that S_B is a temp start symbol
                        foreach (Production t in H1.productionMap[S_B])
                        {
                            int k = t.Rhs.Length;
                            GrammarSymbol[] rhs = new GrammarSymbol[k + p.Rhs.Length - 1];
                            for (int i = 0; i < k; i++)
                                rhs[i] = t.Rhs[i];
                            for (int i = 1; i < p.Rhs.Length; i++)
                                rhs[k + i - 1] = p.Rhs[i];
                            Production q = new Production(A, rhs);
                            A_prods.Add(q);
                            foreach (Nonterminal x in q.GetVariables())
                                if (egnfVisited.Add(x))
                                {
                                    egnfStack.Push(x);
                                    egnfVars.Add(x);
                                    egnfProds[x] = new List<Production>();
                                }
                        }
                    }
                }
            }
            #endregion

            ContextFreeGrammar egnf = new ContextFreeGrammar(egnfVars, H1.startSymbol, egnfProds);
            return egnf;
        }
        /// <summary>
        /// Produces the CNF (Chomsky Normal Form) for the grammar g.
        /// It first eliminates epsilons, useless symbols, and unit productions.
        /// If Assumes that there are no epsilons, useless symbols or unit productions
        /// </summary>
        public static ContextFreeGrammar MkCNF(ContextFreeGrammar g, bool removeEpsilonsUselessSymbolsUnitsProductions)
        {
            if (removeEpsilonsUselessSymbolsUnitsProductions)
                g = g.RemoveEpsilonsAndUselessSymbols().RemoveUnitProductions();
            var productions = new Dictionary<Nonterminal, List<Production>>();
            List<Nonterminal> variables = new List<Nonterminal>(g.variables);
            foreach (Nonterminal v in g.variables)
                productions[v] = new List<Production>();

            int nonterminalID = 0;

            //Implements algo in Theorem 4.5, page 92-93, in Hopcroft-Ullman

            #region make productions of the form V --> V0...Vn or V --> a
            var freshVarMap = new Dictionary<GrammarSymbol, Nonterminal>();
            foreach (Nonterminal v in g.variables)
                foreach (Production p in g.productionMap[v])
                    if (p.ContainsNoExprinals || p.IsCNF)
                        productions[v].Add(p);
                    else
                    {
                        GrammarSymbol[] rhs = new GrammarSymbol[p.Rhs.Length];
                        for (int i = 0; i < rhs.Length; i++)
                        {
                            if (p.Rhs[i] is Nonterminal)
                                rhs[i] = p.Rhs[i];
                            else
                            {
                                Nonterminal u;
                                if (!freshVarMap.TryGetValue(p.Rhs[i], out u))
                                {
                                    u = new Nonterminal(nonterminalID++);
                                    freshVarMap[p.Rhs[i]] = u;
                                    variables.Add(u);
                                    var prods = new List<Production>();
                                    prods.Add(new Production(u, p.Rhs[i]));
                                    productions[u] = prods;
                                }
                                rhs[i] = u;
                            }
                        }
                        productions[v].Add(new Production(v, rhs));
                    }
            #endregion

            var productionsCNF = new Dictionary<Nonterminal, List<Production>>();
            List<Nonterminal> variablesCNF = new List<Nonterminal>(variables);
            foreach (Nonterminal v in variablesCNF)
                productionsCNF[v] = new List<Production>();

            #region replace V --> V0V1...Vn (n > 2), by V --> V0U0, U0 --> V1U1, ..., Un-2 --> Vn-1Vn
            foreach (Nonterminal v in variables)
                foreach (Production p in productions[v])
                    if (p.IsCNF)
                        productionsCNF[v].Add(p);
                    else
                    {
                        Nonterminal x = v;
                        Nonterminal y = new Nonterminal(nonterminalID++);
                        variablesCNF.Add(y);
                        productionsCNF[y] = new List<Production>();
                        for (int i = 0; i < p.Rhs.Length - 2; i++)
                        {
                            productionsCNF[x].Add(new Production(x, p.Rhs[i], y));
                            if (i < p.Rhs.Length - 3)
                            {
                                x = y;
                                y = new Nonterminal(nonterminalID++);
                                variablesCNF.Add(y);
                                productionsCNF[y] = new List<Production>();
                            }
                        }
                        productionsCNF[y].Add(new Production(y, p.Rhs[p.Rhs.Length - 2], p.Rhs[p.Rhs.Length - 1]));
                    }
            #endregion

            ContextFreeGrammar cnf = new ContextFreeGrammar(variablesCNF, g.startSymbol, productionsCNF);
            return cnf;
        }
Beispiel #9
0
        private void Parse()
        {
            bool  done = false;
            Token cur  = null;
            Token last = null;

            Grammars.Nonterminal curlhs = ExpectNT();
            startvar = curlhs;

            ExpectArrow();
            List <Grammars.GrammarSymbol> currhs = new List <Grammars.GrammarSymbol>();

            while (!done)
            {
                last = cur;
                cur  = lexer.Next();

                switch (cur.t)
                {
                case TokenType.NT:
                    currhs.Add(Grammars.Nonterminal.CreateByParser(cur.content));
                    break;

                case TokenType.T:
                {
                    GrammarSymbol[] symbs;
                    if (!terminalMap.TryGetValue(cur.content, out symbs))
                    {
                        var aut = parseRegex(cur.content);
                        #region parse this terminal-regex as an automaton and compute symbs or set symbs to top nonterminal
                        int seq_length = -1;
                        if (aut.IsEpsilon)
                        {
                            symbs = new GrammarSymbol[] { };
                        }
                        else if (aut.InitialStateIsSource && aut.HasSingleFinalSink && aut.MoveCount == 1)
                        {
                            //just a single terminal
                            var move = aut.GetMoveFrom(aut.InitialState);
                            symbs = new GrammarSymbol[] { new Terminal <T>(move.Label) };
                        }
                        else if (aut.CheckIfSequence(out seq_length) && aut.HasSingleFinalSink && aut.IsEpsilonFree)
                        {
                            //collect all the elements and map them to individual terminals
                            //inline the automaton as sequence of terminals
                            symbs = new GrammarSymbol[seq_length];
                            int q = aut.InitialState;
                            int i = 0;
                            while (!aut.IsFinalState(q))
                            {
                                var move = aut.GetMoveFrom(q);
                                q        = move.TargetState;
                                symbs[i] = new Terminal <T>(move.Label);
                                i       += 1;
                            }
                        }
                        else
                        {
                            //introduce new nonterminal for the automaton
                            int id = __regexId++;
                            var nt = Nonterminal.MkNonterminalForRegex(id);
                            parsedRegexes[nt] = aut;
                            symbs             = new GrammarSymbol[] { nt };
                        }
                        terminalMap[cur.content] = symbs;
                        #endregion
                    }
                    currhs.AddRange(symbs);
                    //---
                    break;
                }

                case TokenType.OR:
                    productions.Add(new Grammars.Production(curlhs, currhs.ToArray()));
                    currhs.Clear();
                    break;

                case TokenType.ARR:
                    if (currhs.Count < 1)
                    {
                        throw new ParseException();
                    }
                    if (last.t != TokenType.NT)
                    {
                        throw new ParseException();
                    }

                    // downcast :(
                    Grammars.Nonterminal newlhs = (Grammars.Nonterminal)currhs[currhs.Count - 1];
                    currhs.RemoveAt(currhs.Count - 1);
                    productions.Add(new Grammars.Production(curlhs, currhs.ToArray()));
                    currhs.Clear();
                    curlhs = newlhs;
                    break;

                case TokenType.EOS:
                    productions.Add(new Grammars.Production(curlhs, currhs.ToArray()));
                    currhs.Clear();
                    done = true;
                    break;

                default:
                    throw new ParseException();
                }
            }
        }
        /// <summary>
        /// Produces the EGNF (Extended Greibach Normal Form) for the grammar g.
        /// Implements a variation of the Blum-Koch algorithm.
        /// (Inf. and Comp. vol.150, pp.112-118, 1999)
        /// </summary>
        /// <param name="g">the grammar to be normalized</param>
        /// <param name="removeEpsilonsAndUselessSymbols">if true, first removes epsilons and useless symbols, otherwise assumes that epsilons do not occur</param>
        /// <returns>Extended Greibach Normal Form of g</returns>
        public static ContextFreeGrammar MkEGNF(ContextFreeGrammar g, bool removeEpsilonsAndUselessSymbols)
        {
            if (removeEpsilonsAndUselessSymbols)
            {
                g = g.RemoveEpsilonsAndUselessSymbols();
            }

            if (g.IsInGNF())
            {
                return(g);
            }

            var leavesP = new List <Production>();
            var revP    = new Dictionary <Nonterminal, List <Tuple <GrammarSymbol[], Nonterminal> > >();

            int nonterminalID = 0;

            #region compute leavesP and revP
            foreach (Nonterminal v in g.variables)
            {
                revP[v] = new List <Tuple <GrammarSymbol[], Nonterminal> >();
            }

            foreach (Production p in g.GetProductions())
            {
                if (!(p.First is Nonterminal))
                {
                    leavesP.Add(p);
                }
                else
                {
                    revP[(Nonterminal)p.First].Add(new Tuple <GrammarSymbol[], Nonterminal>(p.Rest, p.Lhs));
                }
            }
            #endregion

            var W           = new Dictionary <Nonterminal, HashSet <Nonterminal> >();
            var startSymbol = new Dictionary <Nonterminal, Nonterminal>();

            #region create new start symbols and compute unit closures
            foreach (Nonterminal v in g.variables)
            {
                W[v]           = g.GetUnitClosure(v);
                startSymbol[v] = new Nonterminal(nonterminalID++);
            }
            #endregion

            var P = new Dictionary <Nonterminal, List <Production> >();

            #region construct intermediate productions in P for each variable B
            foreach (Nonterminal B in g.variables)
            {
                var S_B  = startSymbol[B];
                var W_B  = W[B]; //unit closure of B
                var Bvar = new Dictionary <Nonterminal, Nonterminal>();
                Stack <Nonterminal>   stack   = new Stack <Nonterminal>();
                HashSet <Nonterminal> visited = new HashSet <Nonterminal>();
                var S_B_list = new List <Production>();
                P[S_B] = S_B_list;
                foreach (Production p in leavesP)
                {
                    S_B_list.Add(new Production(S_B, p.Rhs, Lookup(Bvar, p.Lhs, ref nonterminalID)));
                    if (visited.Add(p.Lhs))
                    {
                        stack.Push(p.Lhs);
                    }
                    if (W_B.Contains(p.Lhs))
                    {
                        S_B_list.Add(new Production(S_B, p.Rhs));
                    }
                }

                while (stack.Count > 0)
                {
                    Nonterminal       C   = stack.Pop();
                    Nonterminal       C_B = Lookup(Bvar, C, ref nonterminalID);
                    List <Production> C_B_list;
                    if (!P.TryGetValue(C_B, out C_B_list))
                    {
                        C_B_list = new List <Production>();
                        P[C_B]   = C_B_list;
                    }
                    foreach (var t in revP[C])
                    {
                        Nonterminal D   = t.Item2;
                        Nonterminal D_B = Lookup(Bvar, D, ref nonterminalID);
                        C_B_list.Add(new Production(C_B, t.Item1, D_B));
                        if (t.Item1.Length > 0 && W_B.Contains(D))
                        {
                            C_B_list.Add(new Production(C_B, t.Item1));
                        }
                        if (visited.Add(D))
                        {
                            stack.Push(D);
                        }
                    }
                }
            }
            #endregion


            //produce the union of P and g.productionMap in H
            //and replace each production 'A ::= B alpha' by 'A ::= S_B alpha"

            var Hprods = new Dictionary <Nonterminal, List <Production> >();
            #region compute Hprods
            foreach (Nonterminal A in g.variables)
            {
                var A_prods = new List <Production>();
                Hprods[A] = A_prods;
                foreach (Production p in g.productionMap[A])
                {
                    if (p.First is Nonterminal && !p.IsUnit)
                    {
                        GrammarSymbol[] rhs = new GrammarSymbol[p.Rhs.Length];
                        rhs[0] = startSymbol[(Nonterminal)p.First];
                        Array.Copy(p.Rhs, 1, rhs, 1, rhs.Length - 1);
                        Production q = new Production(p.Lhs, rhs);
                        A_prods.Add(q);
                    }
                    else
                    {
                        A_prods.Add(p);
                    }
                }
            }
            foreach (Nonterminal A in P.Keys)
            {
                var A_prods = new List <Production>();
                Hprods[A] = A_prods;
                foreach (Production p in P[A])
                {
                    if (p.First is Nonterminal && !p.IsUnit)
                    {
                        GrammarSymbol[] rhs = new GrammarSymbol[p.Rhs.Length];
                        rhs[0] = startSymbol[(Nonterminal)p.First];
                        Array.Copy(p.Rhs, 1, rhs, 1, rhs.Length - 1);
                        Production q = new Production(p.Lhs, rhs);
                        A_prods.Add(q);
                    }
                    else
                    {
                        A_prods.Add(p);
                    }
                }
            }
            #endregion
            ContextFreeGrammar H = new ContextFreeGrammar(new List <Nonterminal>(Hprods.Keys), g.startSymbol, Hprods);

            //Console.WriteLine("--------- H:");
            //H.Display(Console.Out);

            //eliminate useless symbols from H
            //this may dramatically decrease the number of productions
            ContextFreeGrammar H1 = H.RemoveUselessSymbols();

            //Console.WriteLine("---------- H1:");
            //H1.Display(Console.Out);


            List <Nonterminal> egnfVars = new List <Nonterminal>();
            Dictionary <Nonterminal, List <Production> > egnfProds = new Dictionary <Nonterminal, List <Production> >();
            Stack <Nonterminal>   egnfStack   = new Stack <Nonterminal>();
            HashSet <Nonterminal> egnfVisited = new HashSet <Nonterminal>();
            egnfStack.Push(H1.startSymbol);
            egnfVisited.Add(H1.startSymbol);
            egnfVars.Add(H1.startSymbol);
            egnfProds[H1.startSymbol] = new List <Production>();

            #region eliminate temp start symbols and produce the EGNF form
            while (egnfStack.Count > 0)
            {
                var A = egnfStack.Pop();
                List <Production> A_prods = egnfProds[A];
                foreach (Production p in H1.productionMap[A])
                {
                    if (!(p.First is Nonterminal) || p.IsUnit)
                    {
                        A_prods.Add(p);
                        foreach (Nonterminal x in p.GetVariables())
                        {
                            if (egnfVisited.Add(x))
                            {
                                egnfStack.Push(x);
                                egnfVars.Add(x);
                                egnfProds[x] = new List <Production>();
                            }
                        }
                    }
                    else
                    {
                        Nonterminal S_B = (Nonterminal)p.First; //here we know that S_B is a temp start symbol
                        foreach (Production t in H1.productionMap[S_B])
                        {
                            int             k   = t.Rhs.Length;
                            GrammarSymbol[] rhs = new GrammarSymbol[k + p.Rhs.Length - 1];
                            for (int i = 0; i < k; i++)
                            {
                                rhs[i] = t.Rhs[i];
                            }
                            for (int i = 1; i < p.Rhs.Length; i++)
                            {
                                rhs[k + i - 1] = p.Rhs[i];
                            }
                            Production q = new Production(A, rhs);
                            A_prods.Add(q);
                            foreach (Nonterminal x in q.GetVariables())
                            {
                                if (egnfVisited.Add(x))
                                {
                                    egnfStack.Push(x);
                                    egnfVars.Add(x);
                                    egnfProds[x] = new List <Production>();
                                }
                            }
                        }
                    }
                }
            }
            #endregion

            ContextFreeGrammar egnf = new ContextFreeGrammar(egnfVars, H1.startSymbol, egnfProds);
            return(egnf);
        }
        /// <summary>
        /// Produces the GNF (Greibach Normal Form) for the grammar g.
        /// If g is not already in GNF, first makes CNF.
        /// Implements a variation of the Koch-Blum algorithm. (STACS 97, pp. 47-54)
        /// </summary>
        /// <param name="g"></param>
        /// <param name="removeEpsilonsUselessSymbolsUnitsProductions"></param>
        /// <returns></returns>
        public static ContextFreeGrammar MkGNF(ContextFreeGrammar g, bool removeEpsilonsUselessSymbolsUnitsProductions)
        {
            if (removeEpsilonsUselessSymbolsUnitsProductions)
            {
                g = g.RemoveEpsilonsAndUselessSymbols().RemoveUnitProductions();
            }
            if (g.IsInGNF())
            {
                return(g);
            }

            ContextFreeGrammar cnf = MkCNF(g, false);
            var Vars = cnf.variables;

            int nonterminalID = 0;

            var M = new Dictionary <Nonterminal, Automaton <GrammarSymbol> >();

            #region construct the automata M[B] for all variables B
            int id            = 0;
            var initStateMap  = new Dictionary <Nonterminal, int>();
            var finalStateMap = new Dictionary <Nonterminal, int>();
            foreach (Nonterminal B in Vars)
            {
                initStateMap[B]  = id++;
                finalStateMap[B] = id++;
            }

            var movesOfM = new Dictionary <Nonterminal, List <Move <GrammarSymbol> > >();

            foreach (Nonterminal B in Vars)
            {
                movesOfM[B] = new List <Move <GrammarSymbol> >();
            }

            #region construct the moves of the automata
            foreach (Nonterminal B in Vars)
            {
                var variableToStateMap    = new Dictionary <Nonterminal, int>();
                Stack <Nonterminal> stack = new Stack <Nonterminal>();
                stack.Push(B);
                int initState = initStateMap[B];
                variableToStateMap[B] = finalStateMap[B];
                while (stack.Count > 0)
                {
                    Nonterminal C = stack.Pop();
                    foreach (Production p in cnf.GetProductions(C))
                    {
                        if (p.IsSingleExprinal)
                        {
                            movesOfM[B].Add(Move <GrammarSymbol> .Create(initState, variableToStateMap[C], p.First));
                        }
                        else
                        {
                            Nonterminal D = (Nonterminal)p.First; //using the fact that the grammar is in CNF
                            if (!variableToStateMap.ContainsKey(D))
                            {
                                //visit all variables reachable that have not already been visited
                                variableToStateMap.Add(D, id++);
                                stack.Push(D);
                            }
                            GrammarSymbol E = p.Rhs[1];
                            movesOfM[B].Add(Move <GrammarSymbol> .Create(variableToStateMap[D], variableToStateMap[C], E));
                        }
                    }
                }
            }
            #endregion

            foreach (Nonterminal B in Vars)
            {
                M[B] = Automaton <GrammarSymbol> .Create(null, initStateMap[B], new int[] { finalStateMap[B] }, movesOfM[B]);
            }
            #endregion

            var G_ = new Dictionary <Nonterminal, ContextFreeGrammar>();

            #region construct corresponding intermediate grammars G_[B] corresponding to M[B]
            foreach (Nonterminal B in Vars)
            {
                var         MB = M[B];
                bool        MBfinalStateHasVariableMoves = FinalStateHasVariableMoves(MB);
                var         productions = new Dictionary <Nonterminal, List <Production> >();
                Nonterminal startSymbol = new Nonterminal(nonterminalID++);
                var         vars        = new List <Nonterminal>();
                vars.Add(startSymbol);
                productions[startSymbol] = new List <Production>();

                foreach (var move in MB.GetMovesFrom(MB.InitialState))
                {
                    if (move.TargetState == MB.FinalState)
                    {
                        productions[startSymbol].Add(new Production(startSymbol, move.Label));
                    }
                    if (move.TargetState != MB.FinalState || MBfinalStateHasVariableMoves)
                    {
                        var C = new Nonterminal("Q" + move.TargetState);
                        productions[startSymbol].Add(new Production(startSymbol, move.Label, C));
                        if (!productions.ContainsKey(C))
                        {
                            productions[C] = new List <Production>();
                            vars.Add(C);
                        }
                    }
                }

                foreach (int state in MB.States)
                {
                    if (state != MB.InitialState)
                    {
                        foreach (Move <GrammarSymbol> move in MB.GetMovesFrom(state))
                        {
                            Nonterminal D = new Nonterminal("Q" + state);
                            Nonterminal C = new Nonterminal("Q" + move.TargetState);
                            if (!productions.ContainsKey(D))
                            {
                                productions[D] = new List <Production>();
                                vars.Add(D);
                            }
                            Nonterminal E = (Nonterminal)move.Label;
                            if (move.TargetState == MB.FinalState)
                            {
                                productions[D].Add(new Production(D, E));
                            }
                            if (move.TargetState != MB.FinalState || MBfinalStateHasVariableMoves)
                            {
                                productions[D].Add(new Production(D, E, C));
                                //we pretend here that E is a terminal
                                if (!productions.ContainsKey(C))
                                {
                                    productions[C] = new List <Production>();
                                    vars.Add(C);
                                }
                            }
                        }
                    }
                }
                G_[B] = new ContextFreeGrammar(vars, startSymbol, productions);
            }
            #endregion

            var G = new Dictionary <Nonterminal, ContextFreeGrammar>();

            #region construct the corresponding temporary G[B]'s
            foreach (Nonterminal B in Vars)
            {
                var G_B         = G_[B];
                var productions = new Dictionary <Nonterminal, List <Production> >();
                //var vars = new List<Variable>();
                Nonterminal startSymbol = G_B.startSymbol;
                productions[startSymbol] = G_B.productionMap[startSymbol];
                foreach (Nonterminal D in G_B.variables)
                {
                    if (!D.Equals(startSymbol))
                    {
                        var productions_D = new List <Production>();
                        productions[D] = productions_D;
                        foreach (Production p in G_B.productionMap[D])
                        {
                            Nonterminal E   = (Nonterminal)p.First;
                            var         G_E = G_[E];
                            if (p.IsUnit)
                            {
                                foreach (Production q in G_E.productionMap[G_E.startSymbol])
                                {
                                    productions_D.Add(new Production(D, q.Rhs));
                                }
                            }
                            else
                            {
                                foreach (Production q in G_E.productionMap[G_E.startSymbol])
                                {
                                    GrammarSymbol[] symbols = new GrammarSymbol[q.Rhs.Length + 1];
                                    Array.Copy(q.Rhs, symbols, q.Rhs.Length);
                                    symbols[q.Rhs.Length] = p.Rhs[1];
                                    productions_D.Add(new Production(D, symbols));
                                }
                            }
                        }
                    }
                }
                //ignore the variable list, it is not used
                G[B] = new ContextFreeGrammar(null, startSymbol, productions);
            }
            #endregion

            #region construct the final GNF from the G[B]'s
            var productionsGNF = new List <Production>();
            foreach (Nonterminal A in cnf.variables)
            {
                foreach (Production p in cnf.productionMap[A])
                {
                    if (p.IsSingleExprinal)
                    {
                        productionsGNF.Add(p);
                    }
                    else
                    {
                        Nonterminal B  = (Nonterminal)p.Rhs[0];
                        Nonterminal C  = (Nonterminal)p.Rhs[1];
                        var         GB = G[B];
                        foreach (Production q in GB.productionMap[GB.startSymbol])
                        {
                            GrammarSymbol[] symbols = new GrammarSymbol[q.Rhs.Length + 1];
                            Array.Copy(q.Rhs, symbols, q.Rhs.Length);
                            symbols[q.Rhs.Length] = C;
                            productionsGNF.Add(new Production(A, symbols));
                        }
                    }
                }
            }
            foreach (Nonterminal B in Vars)
            {
                var GB = G[B];
                foreach (var kv in GB.productionMap)
                {
                    if (!kv.Key.Equals(GB.startSymbol))
                    {
                        productionsGNF.AddRange(kv.Value);
                    }
                }
            }
            #endregion

            ContextFreeGrammar gnf = new ContextFreeGrammar(cnf.startSymbol, productionsGNF);
            return(gnf);
        }
        /// <summary>
        /// Produces the CNF (Chomsky Normal Form) for the grammar g.
        /// It first eliminates epsilons, useless symbols, and unit productions.
        /// If Assumes that there are no epsilons, useless symbols or unit productions
        /// </summary>
        public static ContextFreeGrammar MkCNF(ContextFreeGrammar g, bool removeEpsilonsUselessSymbolsUnitsProductions)
        {
            if (removeEpsilonsUselessSymbolsUnitsProductions)
            {
                g = g.RemoveEpsilonsAndUselessSymbols().RemoveUnitProductions();
            }
            var productions = new Dictionary <Nonterminal, List <Production> >();
            List <Nonterminal> variables = new List <Nonterminal>(g.variables);

            foreach (Nonterminal v in g.variables)
            {
                productions[v] = new List <Production>();
            }

            int nonterminalID = 0;

            //Implements algo in Theorem 4.5, page 92-93, in Hopcroft-Ullman

            #region make productions of the form V --> V0...Vn or V --> a
            var freshVarMap = new Dictionary <GrammarSymbol, Nonterminal>();
            foreach (Nonterminal v in g.variables)
            {
                foreach (Production p in g.productionMap[v])
                {
                    if (p.ContainsNoExprinals || p.IsCNF)
                    {
                        productions[v].Add(p);
                    }
                    else
                    {
                        GrammarSymbol[] rhs = new GrammarSymbol[p.Rhs.Length];
                        for (int i = 0; i < rhs.Length; i++)
                        {
                            if (p.Rhs[i] is Nonterminal)
                            {
                                rhs[i] = p.Rhs[i];
                            }
                            else
                            {
                                Nonterminal u;
                                if (!freshVarMap.TryGetValue(p.Rhs[i], out u))
                                {
                                    u = new Nonterminal(nonterminalID++);
                                    freshVarMap[p.Rhs[i]] = u;
                                    variables.Add(u);
                                    var prods = new List <Production>();
                                    prods.Add(new Production(u, p.Rhs[i]));
                                    productions[u] = prods;
                                }
                                rhs[i] = u;
                            }
                        }
                        productions[v].Add(new Production(v, rhs));
                    }
                }
            }
            #endregion


            var productionsCNF = new Dictionary <Nonterminal, List <Production> >();
            List <Nonterminal> variablesCNF = new List <Nonterminal>(variables);
            foreach (Nonterminal v in variablesCNF)
            {
                productionsCNF[v] = new List <Production>();
            }

            #region replace V --> V0V1...Vn (n > 2), by V --> V0U0, U0 --> V1U1, ..., Un-2 --> Vn-1Vn
            foreach (Nonterminal v in variables)
            {
                foreach (Production p in productions[v])
                {
                    if (p.IsCNF)
                    {
                        productionsCNF[v].Add(p);
                    }
                    else
                    {
                        Nonterminal x = v;
                        Nonterminal y = new Nonterminal(nonterminalID++);
                        variablesCNF.Add(y);
                        productionsCNF[y] = new List <Production>();
                        for (int i = 0; i < p.Rhs.Length - 2; i++)
                        {
                            productionsCNF[x].Add(new Production(x, p.Rhs[i], y));
                            if (i < p.Rhs.Length - 3)
                            {
                                x = y;
                                y = new Nonterminal(nonterminalID++);
                                variablesCNF.Add(y);
                                productionsCNF[y] = new List <Production>();
                            }
                        }
                        productionsCNF[y].Add(new Production(y, p.Rhs[p.Rhs.Length - 2], p.Rhs[p.Rhs.Length - 1]));
                    }
                }
            }
            #endregion

            ContextFreeGrammar cnf = new ContextFreeGrammar(variablesCNF, g.startSymbol, productionsCNF);
            return(cnf);
        }
Beispiel #13
0
 internal bool RhsContainsSymbol(GrammarSymbol symbol)
 {
     foreach (GrammarSymbol s in Rhs)
         if (s.Equals(symbol))
             return true;
     return false;
 }