GetUnitClosure() private method

private GetUnitClosure ( Nonterminal v ) : HashSet
v Nonterminal
return HashSet
        /// <summary>
        /// Produces the EGNF (Extended Greibach Normal Form) for the grammar g. 
        /// Implements a variation of the Blum-Koch algorithm. 
        /// (Inf. and Comp. vol.150, pp.112-118, 1999)
        /// </summary>
        /// <param name="g">the grammar to be normalized</param>
        /// <param name="removeEpsilonsAndUselessSymbols">if true, first removes epsilons and useless symbols, otherwise assumes that epsilons do not occur</param>
        /// <returns>Extended Greibach Normal Form of g</returns>
        public static ContextFreeGrammar MkEGNF(ContextFreeGrammar g, bool removeEpsilonsAndUselessSymbols)
        {
            if (removeEpsilonsAndUselessSymbols)
                g = g.RemoveEpsilonsAndUselessSymbols();

            if (g.IsInGNF())
                return g;

            var leavesP = new List<Production>();
            var revP = new Dictionary<Nonterminal, List<Pair<GrammarSymbol[], Nonterminal>>>();

            int nonterminalID = 0;

            #region compute leavesP and revP
            foreach (Nonterminal v in g.variables)
                revP[v] = new List<Pair<GrammarSymbol[], Nonterminal>>();

            foreach (Production p in g.GetProductions())
                if (!(p.First is Nonterminal))
                    leavesP.Add(p);
                else
                    revP[(Nonterminal)p.First].Add(new Pair<GrammarSymbol[], Nonterminal>(p.Rest, p.Lhs));
            #endregion

            var W = new Dictionary<Nonterminal, HashSet<Nonterminal>>();
            var startSymbol = new Dictionary<Nonterminal, Nonterminal>();

            #region create new start symbols and compute unit closures
            foreach (Nonterminal v in g.variables)
            {
                W[v] = g.GetUnitClosure(v);
                startSymbol[v] = new Nonterminal(nonterminalID++);
            }
            #endregion

            var P = new Dictionary<Nonterminal, List<Production>>();

            #region construct intermediate productions in P for each variable B
            foreach (Nonterminal B in g.variables)
            {
                var S_B = startSymbol[B];
                var W_B = W[B]; //unit closure of B
                var Bvar = new Dictionary<Nonterminal, Nonterminal>();
                Stack<Nonterminal> stack = new Stack<Nonterminal>();
                HashSet<Nonterminal> visited = new HashSet<Nonterminal>();
                var S_B_list = new List<Production>();
                P[S_B] = S_B_list;
                foreach (Production p in leavesP)
                {
                    S_B_list.Add(new Production(S_B, p.Rhs, Lookup(Bvar, p.Lhs, ref nonterminalID)));
                    if (visited.Add(p.Lhs))
                        stack.Push(p.Lhs);
                    if (W_B.Contains(p.Lhs))
                        S_B_list.Add(new Production(S_B, p.Rhs));
                }

                while (stack.Count > 0)
                {
                    Nonterminal C = stack.Pop();
                    Nonterminal C_B = Lookup(Bvar, C, ref nonterminalID);
                    List<Production> C_B_list;
                    if (!P.TryGetValue(C_B, out C_B_list))
                    {
                        C_B_list = new List<Production>();
                        P[C_B] = C_B_list;
                    }
                    foreach (var t in revP[C])
                    {
                        Nonterminal D = t.Second;
                        Nonterminal D_B = Lookup(Bvar, D, ref nonterminalID);
                        C_B_list.Add(new Production(C_B, t.First, D_B));
                        if (t.First.Length > 0 && W_B.Contains(D))
                            C_B_list.Add(new Production(C_B, t.First));
                        if (visited.Add(D))
                            stack.Push(D);
                    }
                }
            }
            #endregion

            //produce the union of P and g.productionMap in H
            //and replace each production 'A ::= B alpha' by 'A ::= S_B alpha"

            var Hprods = new Dictionary<Nonterminal, List<Production>>();
            #region compute Hprods
            foreach (Nonterminal A in g.variables)
            {
                var A_prods = new List<Production>();
                Hprods[A] = A_prods;
                foreach (Production p in g.productionMap[A])
                {
                    if (p.First is Nonterminal && !p.IsUnit)
                    {
                        GrammarSymbol[] rhs = new GrammarSymbol[p.Rhs.Length];
                        rhs[0] = startSymbol[(Nonterminal)p.First];
                        Array.Copy(p.Rhs, 1, rhs, 1, rhs.Length - 1);
                        Production q = new Production(p.Lhs, rhs);
                        A_prods.Add(q);
                    }
                    else
                        A_prods.Add(p);
                }
            }
            foreach (Nonterminal A in P.Keys)
            {
                var A_prods = new List<Production>();
                Hprods[A] = A_prods;
                foreach (Production p in P[A])
                {
                    if (p.First is Nonterminal && !p.IsUnit)
                    {
                        GrammarSymbol[] rhs = new GrammarSymbol[p.Rhs.Length];
                        rhs[0] = startSymbol[(Nonterminal)p.First];
                        Array.Copy(p.Rhs, 1, rhs, 1, rhs.Length - 1);
                        Production q = new Production(p.Lhs, rhs);
                        A_prods.Add(q);
                    }
                    else
                        A_prods.Add(p);
                }
            }
            #endregion
            ContextFreeGrammar H = new ContextFreeGrammar(new List<Nonterminal>(Hprods.Keys), g.startSymbol, Hprods);

            //Console.WriteLine("--------- H:");
            //H.Display(Console.Out);

            //eliminate useless symbols from H
            //this may dramatically decrease the number of productions
            ContextFreeGrammar H1 = H.RemoveUselessSymbols();

            //Console.WriteLine("---------- H1:");
            //H1.Display(Console.Out);

            List<Nonterminal> egnfVars = new List<Nonterminal>();
            Dictionary<Nonterminal, List<Production>> egnfProds = new Dictionary<Nonterminal, List<Production>>();
            Stack<Nonterminal> egnfStack = new Stack<Nonterminal>();
            HashSet<Nonterminal> egnfVisited = new HashSet<Nonterminal>();
            egnfStack.Push(H1.startSymbol);
            egnfVisited.Add(H1.startSymbol);
            egnfVars.Add(H1.startSymbol);
            egnfProds[H1.startSymbol] = new List<Production>();

            #region eliminate temp start symbols and produce the EGNF form
            while (egnfStack.Count > 0)
            {
                var A = egnfStack.Pop();
                List<Production> A_prods = egnfProds[A];
                foreach (Production p in H1.productionMap[A])
                {
                    if (!(p.First is Nonterminal) || p.IsUnit)
                    {
                        A_prods.Add(p);
                        foreach (Nonterminal x in p.GetVariables())
                            if (egnfVisited.Add(x))
                            {
                                egnfStack.Push(x);
                                egnfVars.Add(x);
                                egnfProds[x] = new List<Production>();
                            }
                    }
                    else
                    {
                        Nonterminal S_B = (Nonterminal)p.First; //here we know that S_B is a temp start symbol
                        foreach (Production t in H1.productionMap[S_B])
                        {
                            int k = t.Rhs.Length;
                            GrammarSymbol[] rhs = new GrammarSymbol[k + p.Rhs.Length - 1];
                            for (int i = 0; i < k; i++)
                                rhs[i] = t.Rhs[i];
                            for (int i = 1; i < p.Rhs.Length; i++)
                                rhs[k + i - 1] = p.Rhs[i];
                            Production q = new Production(A, rhs);
                            A_prods.Add(q);
                            foreach (Nonterminal x in q.GetVariables())
                                if (egnfVisited.Add(x))
                                {
                                    egnfStack.Push(x);
                                    egnfVars.Add(x);
                                    egnfProds[x] = new List<Production>();
                                }
                        }
                    }
                }
            }
            #endregion

            ContextFreeGrammar egnf = new ContextFreeGrammar(egnfVars, H1.startSymbol, egnfProds);
            return egnf;
        }
        /// <summary>
        /// Produces the EGNF (Extended Greibach Normal Form) for the grammar g.
        /// Implements a variation of the Blum-Koch algorithm.
        /// (Inf. and Comp. vol.150, pp.112-118, 1999)
        /// </summary>
        /// <param name="g">the grammar to be normalized</param>
        /// <param name="removeEpsilonsAndUselessSymbols">if true, first removes epsilons and useless symbols, otherwise assumes that epsilons do not occur</param>
        /// <returns>Extended Greibach Normal Form of g</returns>
        public static ContextFreeGrammar MkEGNF(ContextFreeGrammar g, bool removeEpsilonsAndUselessSymbols)
        {
            if (removeEpsilonsAndUselessSymbols)
            {
                g = g.RemoveEpsilonsAndUselessSymbols();
            }

            if (g.IsInGNF())
            {
                return(g);
            }

            var leavesP = new List <Production>();
            var revP    = new Dictionary <Nonterminal, List <Tuple <GrammarSymbol[], Nonterminal> > >();

            int nonterminalID = 0;

            #region compute leavesP and revP
            foreach (Nonterminal v in g.variables)
            {
                revP[v] = new List <Tuple <GrammarSymbol[], Nonterminal> >();
            }

            foreach (Production p in g.GetProductions())
            {
                if (!(p.First is Nonterminal))
                {
                    leavesP.Add(p);
                }
                else
                {
                    revP[(Nonterminal)p.First].Add(new Tuple <GrammarSymbol[], Nonterminal>(p.Rest, p.Lhs));
                }
            }
            #endregion

            var W           = new Dictionary <Nonterminal, HashSet <Nonterminal> >();
            var startSymbol = new Dictionary <Nonterminal, Nonterminal>();

            #region create new start symbols and compute unit closures
            foreach (Nonterminal v in g.variables)
            {
                W[v]           = g.GetUnitClosure(v);
                startSymbol[v] = new Nonterminal(nonterminalID++);
            }
            #endregion

            var P = new Dictionary <Nonterminal, List <Production> >();

            #region construct intermediate productions in P for each variable B
            foreach (Nonterminal B in g.variables)
            {
                var S_B  = startSymbol[B];
                var W_B  = W[B]; //unit closure of B
                var Bvar = new Dictionary <Nonterminal, Nonterminal>();
                Stack <Nonterminal>   stack   = new Stack <Nonterminal>();
                HashSet <Nonterminal> visited = new HashSet <Nonterminal>();
                var S_B_list = new List <Production>();
                P[S_B] = S_B_list;
                foreach (Production p in leavesP)
                {
                    S_B_list.Add(new Production(S_B, p.Rhs, Lookup(Bvar, p.Lhs, ref nonterminalID)));
                    if (visited.Add(p.Lhs))
                    {
                        stack.Push(p.Lhs);
                    }
                    if (W_B.Contains(p.Lhs))
                    {
                        S_B_list.Add(new Production(S_B, p.Rhs));
                    }
                }

                while (stack.Count > 0)
                {
                    Nonterminal       C   = stack.Pop();
                    Nonterminal       C_B = Lookup(Bvar, C, ref nonterminalID);
                    List <Production> C_B_list;
                    if (!P.TryGetValue(C_B, out C_B_list))
                    {
                        C_B_list = new List <Production>();
                        P[C_B]   = C_B_list;
                    }
                    foreach (var t in revP[C])
                    {
                        Nonterminal D   = t.Item2;
                        Nonterminal D_B = Lookup(Bvar, D, ref nonterminalID);
                        C_B_list.Add(new Production(C_B, t.Item1, D_B));
                        if (t.Item1.Length > 0 && W_B.Contains(D))
                        {
                            C_B_list.Add(new Production(C_B, t.Item1));
                        }
                        if (visited.Add(D))
                        {
                            stack.Push(D);
                        }
                    }
                }
            }
            #endregion


            //produce the union of P and g.productionMap in H
            //and replace each production 'A ::= B alpha' by 'A ::= S_B alpha"

            var Hprods = new Dictionary <Nonterminal, List <Production> >();
            #region compute Hprods
            foreach (Nonterminal A in g.variables)
            {
                var A_prods = new List <Production>();
                Hprods[A] = A_prods;
                foreach (Production p in g.productionMap[A])
                {
                    if (p.First is Nonterminal && !p.IsUnit)
                    {
                        GrammarSymbol[] rhs = new GrammarSymbol[p.Rhs.Length];
                        rhs[0] = startSymbol[(Nonterminal)p.First];
                        Array.Copy(p.Rhs, 1, rhs, 1, rhs.Length - 1);
                        Production q = new Production(p.Lhs, rhs);
                        A_prods.Add(q);
                    }
                    else
                    {
                        A_prods.Add(p);
                    }
                }
            }
            foreach (Nonterminal A in P.Keys)
            {
                var A_prods = new List <Production>();
                Hprods[A] = A_prods;
                foreach (Production p in P[A])
                {
                    if (p.First is Nonterminal && !p.IsUnit)
                    {
                        GrammarSymbol[] rhs = new GrammarSymbol[p.Rhs.Length];
                        rhs[0] = startSymbol[(Nonterminal)p.First];
                        Array.Copy(p.Rhs, 1, rhs, 1, rhs.Length - 1);
                        Production q = new Production(p.Lhs, rhs);
                        A_prods.Add(q);
                    }
                    else
                    {
                        A_prods.Add(p);
                    }
                }
            }
            #endregion
            ContextFreeGrammar H = new ContextFreeGrammar(new List <Nonterminal>(Hprods.Keys), g.startSymbol, Hprods);

            //Console.WriteLine("--------- H:");
            //H.Display(Console.Out);

            //eliminate useless symbols from H
            //this may dramatically decrease the number of productions
            ContextFreeGrammar H1 = H.RemoveUselessSymbols();

            //Console.WriteLine("---------- H1:");
            //H1.Display(Console.Out);


            List <Nonterminal> egnfVars = new List <Nonterminal>();
            Dictionary <Nonterminal, List <Production> > egnfProds = new Dictionary <Nonterminal, List <Production> >();
            Stack <Nonterminal>   egnfStack   = new Stack <Nonterminal>();
            HashSet <Nonterminal> egnfVisited = new HashSet <Nonterminal>();
            egnfStack.Push(H1.startSymbol);
            egnfVisited.Add(H1.startSymbol);
            egnfVars.Add(H1.startSymbol);
            egnfProds[H1.startSymbol] = new List <Production>();

            #region eliminate temp start symbols and produce the EGNF form
            while (egnfStack.Count > 0)
            {
                var A = egnfStack.Pop();
                List <Production> A_prods = egnfProds[A];
                foreach (Production p in H1.productionMap[A])
                {
                    if (!(p.First is Nonterminal) || p.IsUnit)
                    {
                        A_prods.Add(p);
                        foreach (Nonterminal x in p.GetVariables())
                        {
                            if (egnfVisited.Add(x))
                            {
                                egnfStack.Push(x);
                                egnfVars.Add(x);
                                egnfProds[x] = new List <Production>();
                            }
                        }
                    }
                    else
                    {
                        Nonterminal S_B = (Nonterminal)p.First; //here we know that S_B is a temp start symbol
                        foreach (Production t in H1.productionMap[S_B])
                        {
                            int             k   = t.Rhs.Length;
                            GrammarSymbol[] rhs = new GrammarSymbol[k + p.Rhs.Length - 1];
                            for (int i = 0; i < k; i++)
                            {
                                rhs[i] = t.Rhs[i];
                            }
                            for (int i = 1; i < p.Rhs.Length; i++)
                            {
                                rhs[k + i - 1] = p.Rhs[i];
                            }
                            Production q = new Production(A, rhs);
                            A_prods.Add(q);
                            foreach (Nonterminal x in q.GetVariables())
                            {
                                if (egnfVisited.Add(x))
                                {
                                    egnfStack.Push(x);
                                    egnfVars.Add(x);
                                    egnfProds[x] = new List <Production>();
                                }
                            }
                        }
                    }
                }
            }
            #endregion

            ContextFreeGrammar egnf = new ContextFreeGrammar(egnfVars, H1.startSymbol, egnfProds);
            return(egnf);
        }