/// <summary>
        /// Generates a CNF that accepts the prefix closure of a given grammar.
        /// </summary>
        /// <param name="g">the original grammar</param>
        /// <returns>the prefix closure</returns>
        public static ContextFreeGrammar getCNFPrefixClosure(ContextFreeGrammar g)
        {
            if (g == null)
            {
                return(g);
            }
            if (!g.IsInCNF())
            {
                g = getEquivalentCNF(g);
            }
            if (g == null)
            {
                return(g);
            }

            var prefixClosure = getPrefixClosure(g);

            prefixClosure = getEquivalentCNF(prefixClosure); // !!ATTENTION!! this may remove old productions

            var productions = g.GetProductions();

            productions = productions.Concat(prefixClosure.GetProductions());

            return(new ContextFreeGrammar(prefixClosure.StartSymbol, productions));
        }
        public static Tuple <int, IEnumerable <String> > gradeFindDerivation(ContextFreeGrammar grammar, String word, List <GrammarSymbol[]> derivation, int maxGrade, int derivationType = Derivation.DERIVATION_ALL)
        {
            var           comp     = new DerivationComparator();
            List <String> feedback = new List <String>();

            if (derivation.Count == 0) //empty
            {
                feedback.Add("The derivation was empty... The first step is always the start symbol.");
                return(Tuple.Create(0, (IEnumerable <String>)feedback));
            }

            if (!comp.Equals(derivation[0], new GrammarSymbol[] { grammar.StartSymbol }))
            {
                feedback.Add("The first step of the derivation has to be the start symbol!");
                return(Tuple.Create(0, (IEnumerable <String>)feedback));
            }

            bool correct = true;
            int  points  = 0;

            for (int i = 1; i < derivation.Count; i++)
            {
                bool b = Derivation.isValidDerivationStep(grammar.GetProductions(), derivation[i - 1], derivation[i], derivationType);
                if (!b)
                {
                    correct = false;
                    feedback.Add(String.Format("There is no rule that leads from '{0}' to '{1}'", Derivation.partialWordToString(derivation[i - 1]).Replace(" ", ""), Derivation.partialWordToString(derivation[i]).Replace(" ", "")));
                    if (derivationType != Derivation.DERIVATION_ALL && Derivation.isValidDerivationStep(grammar.GetProductions(), derivation[i - 1], derivation[i]))
                    {
                        feedback.Add(String.Format("Make sure to give a derivation of the correct type (e.g. leftmost / rightmost)!", Derivation.partialWordToString(derivation[i - 1]), Derivation.partialWordToString(derivation[i])));
                    }
                    break;
                }
                points++;
            }

            String lastStep = Derivation.partialWordToString(derivation[derivation.Count - 1]);

            if (!lastStep.Equals(word))
            {
                correct = false;
                feedback.Add(String.Format("The last step should be '{0}'", word));
            }

            if (correct) //perfekt
            {
                feedback.Add("Correct!");
                return(Tuple.Create(maxGrade, (IEnumerable <String>)feedback));
            }

            return(Tuple.Create(0, (IEnumerable <String>)feedback));
        }
        /// <summary>
        /// Genereates warnings for useless variables.
        /// </summary>
        /// <param name="g">the grammar</param>
        /// <returns></returns>
        public static List <string> getGrammarWarnings(ContextFreeGrammar g)
        {
            List <string>    res       = new List <string>();
            HashSet <string> variables = new HashSet <string>();

            foreach (var n in g.Variables)
            {
                variables.Add(n.ToString());
            }

            var productiv   = g.GetUsefulNonterminals(true);
            var unproductiv = variables.Except(productiv);

            if (unproductiv.Count() > 0)
            {
                res.Add(string.Format("Warning: There are unproductive variables! ({0})", string.Join(", ", unproductiv)));
            }

            var reachable = new HashSet <string>();
            //Lemma 4.2, p. 89, Hopcroft-Ullman
            Stack <Nonterminal> stack = new Stack <Nonterminal>();

            stack.Push(g.StartSymbol);
            reachable.Add(g.StartSymbol.ToString());
            while (stack.Count > 0)
            {
                Nonterminal v = stack.Pop();
                foreach (Production p in g.GetProductions(v))
                {
                    foreach (Nonterminal u in p.GetVariables())
                    {
                        if (!reachable.Contains(u.ToString()))
                        {
                            reachable.Add(u.ToString());
                            stack.Push(u);
                        }
                    }
                }
            }
            var unreachable = variables.Except(reachable);

            if (unproductiv.Count() > 0)
            {
                res.Add(string.Format("Warning: There are unreachable variables! ({0})", string.Join(", ", unreachable)));
            }

            return(res);
        }
        /// <summary>
        /// Generates a CFG that accepts the prefix closure of a given grammar.
        /// </summary>
        /// <param name="g">the original grammar</param>
        /// <returns>the prefix closure</returns>
        public static ContextFreeGrammar getPrefixClosure(ContextFreeGrammar g)
        {
            Func <Nonterminal, Nonterminal> prefixFor = delegate(Nonterminal x)
            {
                return(new Nonterminal(x.Name + "PREFIX"));
            };

            if (g == null)
            {
                return(g);
            }
            if (!g.IsInCNF())
            {
                g = getEquivalentCNF(g);
            }
            if (g == null)
            {
                return(g);
            }
            Nonterminal prefixStart       = prefixFor(g.StartSymbol);
            var         prefixProductions = new List <Production>();

            foreach (Production p in g.GetProductions())
            {
                //add original
                prefixProductions.Add(p);

                Nonterminal prefixNT = prefixFor(p.Lhs);
                if (p.Rhs.Length == 2) // case:  X->AB      ==>     X' ->A' | AB'
                {
                    prefixProductions.Add(new Production(prefixNT, new GrammarSymbol[] { p.Rhs[0], prefixFor((Nonterminal)p.Rhs[1]) }));
                    prefixProductions.Add(new Production(prefixNT, new GrammarSymbol[] { prefixFor((Nonterminal)p.Rhs[0]) }));
                }
                else // case:  X->a   ==>    X'->a
                {
                    prefixProductions.Add(new Production(prefixNT, new GrammarSymbol[] { p.Rhs[0] }));
                }
            }

            var res = new ContextFreeGrammar(prefixStart, prefixProductions);

            res.setAcceptanceForEmptyString(true);

            return(res);
        }
        private static HashSet <string> generateWordsWithLength(ContextFreeGrammar cnf, int length, Dictionary <Nonterminal, Dictionary <int, HashSet <string> > > dp)
        {
            HashSet <string> res = new HashSet <string>();

            if (cnf == null)
            {
                return(res); //empty grammar -> can't generate any words
            }
            if (length == 0) //case: length = 0
            {
                if (cnf.acceptsEmptyString())
                {
                    res.Add("");
                }
            }
            else if (length == 1) //case: length = 1
            {
                foreach (Nonterminal nt in cnf.Variables)
                {
                    //init dp[nt]
                    Dictionary <int, HashSet <string> > curDP = new Dictionary <int, HashSet <string> >();
                    dp.Add(nt, curDP);

                    //find words of length 1
                    HashSet <string> l = new HashSet <string>();
                    foreach (Production p in cnf.GetProductions(nt))
                    {
                        if (p.IsSingleExprinal)
                        {
                            l.Add(p.Rhs[0].ToString());
                        }
                    }
                    curDP.Add(1, l);
                    if (nt.Equals(cnf.StartSymbol))
                    {
                        res = l;
                    }
                }
            }
            else //case: length > 1
            {
                foreach (KeyValuePair <Nonterminal, Dictionary <int, HashSet <string> > > entry in dp)
                {
                    Nonterminal cur = entry.Key;
                    Dictionary <int, HashSet <string> > curDP = entry.Value;
                    HashSet <string> curSet = new HashSet <string>();
                    curDP.Add(length, curSet);
                    if (cur.Equals(cnf.StartSymbol))
                    {
                        res = curSet;
                    }

                    foreach (Production p in cnf.GetProductions(entry.Key))
                    {
                        if (p.Rhs.Length != 2)
                        {
                            continue;                    //ignore productions that don't have form X->AB
                        }
                        Nonterminal left = (Nonterminal)p.Rhs[0];
                        Dictionary <int, HashSet <string> > leftDP = null;
                        dp.TryGetValue(left, out leftDP);

                        Nonterminal right = (Nonterminal)p.Rhs[1];
                        Dictionary <int, HashSet <string> > rightDP = null;
                        dp.TryGetValue(right, out rightDP);

                        for (int leftPart = 1; leftPart < length; leftPart++)
                        {
                            int rightPart = length - leftPart;

                            HashSet <string> leftPossibilities = null;
                            leftDP.TryGetValue(leftPart, out leftPossibilities);
                            HashSet <string> rightPossibilities = null;
                            rightDP.TryGetValue(rightPart, out rightPossibilities);

                            foreach (string leftString in leftPossibilities)
                            {
                                foreach (string rightString in rightPossibilities)
                                {
                                    curSet.Add(leftString + rightString);
                                }
                            }
                        }
                    }
                }
            }

            return(res);
        }
        /// <summary>
        /// Performs the CYK-algorithm
        /// </summary>
        /// <param name="grammar">the grammar (in CNF)</param>
        /// <param name="word">the word (not null)</param>
        /// <returns>the filled table of the cyk-algorithm</returns>
        public static Tuple <HashSet <Nonterminal>, List <Tuple <Production, int> > >[][] cyk(ContextFreeGrammar grammar, string word)
        {
            /*
             * Every entry in the table consists of 2 parts:
             *      1. The HasSet of all Nonterminals that produce the corresponding subword
             *      2. All possible subtrees encodes as pair (p,x)
             *          where p is the applicable production and
             *          x is the lengt of the word produced by the first grammarsymbol on the right hand side of p
             */

            //prepare CYK table
            int n = word.Length;

            Tuple <HashSet <Nonterminal>, List <Tuple <Production, int> > >[][] cyk = new Tuple <HashSet <Nonterminal>, List <Tuple <Production, int> > > [n][];
            for (int i = 0; i < n; i++)
            {
                cyk[i] = new Tuple <HashSet <Nonterminal>, List <Tuple <Production, int> > > [n - i];
                for (int j = 0; j < n - i; j++)
                {
                    cyk[i][j] = new Tuple <HashSet <Nonterminal>, List <Tuple <Production, int> > >(new HashSet <Nonterminal>(), new List <Tuple <Production, int> >());
                }
            }

            //prepare lookups (productions for a given NT or pair of NTs)
            Dictionary <Tuple <Nonterminal, Nonterminal>, HashSet <Production> > lookupNT = new Dictionary <Tuple <Nonterminal, Nonterminal>, HashSet <Production> >();
            Dictionary <string, HashSet <Production> > lookupT = new Dictionary <string, HashSet <Production> >();

            foreach (Production p in grammar.GetProductions())
            {
                if (p.IsSingleExprinal) //form: X -> a
                {
                    HashSet <Production> hashset = null;
                    if (!lookupT.TryGetValue(p.Rhs[0].Name, out hashset))
                    {
                        hashset = new HashSet <Production>();
                        lookupT.Add(p.Rhs[0].Name, hashset);
                    }
                    hashset.Add(p);
                }
                else if (p.Rhs.Length == 2)//form: X -> A B
                {
                    HashSet <Production> hashset = null;
                    var tuple = new Tuple <Nonterminal, Nonterminal>((Nonterminal)p.Rhs[0], (Nonterminal)p.Rhs[1]);
                    if (!lookupNT.TryGetValue(tuple, out hashset))
                    {
                        hashset = new HashSet <Production>();
                        lookupNT.Add(tuple, hashset);
                    }
                    hashset.Add(p);
                }
            }

            //CYK algorithm
            //first row (check for Productions X -> a)
            for (int i = 0; i < n; i++)
            {
                HashSet <Production> applicable = null;
                if (lookupT.TryGetValue(word.Substring(i, 1), out applicable))
                {
                    foreach (Production p in applicable)
                    {
                        cyk[0][i].Item1.Add(p.Lhs);
                        cyk[0][i].Item2.Add(new Tuple <Production, int>(p, 1));
                    }
                }
            }
            //fill rest
            for (int length = 1; length < n; length++)
            {
                for (int start = 0; start + length < n; start++)
                {
                    //to_fill: cyk[length][start]
                    for (int part1 = 0; part1 < length; part1++)
                    {
                        var left  = cyk[part1][start].Item1;
                        var right = cyk[length - 1 - part1][start + 1 + part1].Item1;
                        if (left.Count > 0 && right.Count > 0)
                        {
                            foreach (Nonterminal leftNT in left)
                            {
                                foreach (Nonterminal rightNT in right)
                                {
                                    var tuple = new Tuple <Nonterminal, Nonterminal>(leftNT, rightNT);
                                    HashSet <Production> applicable = null;
                                    if (lookupNT.TryGetValue(new Tuple <Nonterminal, Nonterminal>(leftNT, rightNT), out applicable))
                                    {
                                        foreach (Production p in applicable)
                                        {
                                            cyk[length][start].Item1.Add(p.Lhs);
                                            cyk[length][start].Item2.Add(new Tuple <Production, int>(p, part1 + 1));
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }

            return(cyk);
        }
        /// <summary>
        /// Produces the EGNF (Extended Greibach Normal Form) for the grammar g.
        /// Implements a variation of the Blum-Koch algorithm.
        /// (Inf. and Comp. vol.150, pp.112-118, 1999)
        /// </summary>
        /// <param name="g">the grammar to be normalized</param>
        /// <param name="removeEpsilonsAndUselessSymbols">if true, first removes epsilons and useless symbols, otherwise assumes that epsilons do not occur</param>
        /// <returns>Extended Greibach Normal Form of g</returns>
        public static ContextFreeGrammar MkEGNF(ContextFreeGrammar g, bool removeEpsilonsAndUselessSymbols)
        {
            if (removeEpsilonsAndUselessSymbols)
            {
                g = g.RemoveEpsilonsAndUselessSymbols();
            }

            if (g.IsInGNF())
            {
                return(g);
            }

            var leavesP = new List <Production>();
            var revP    = new Dictionary <Nonterminal, List <Pair <GrammarSymbol[], Nonterminal> > >();

            int nonterminalID = 0;

            #region compute leavesP and revP
            foreach (Nonterminal v in g.variables)
            {
                revP[v] = new List <Pair <GrammarSymbol[], Nonterminal> >();
            }

            foreach (Production p in g.GetProductions())
            {
                if (!(p.First is Nonterminal))
                {
                    leavesP.Add(p);
                }
                else
                {
                    revP[(Nonterminal)p.First].Add(new Pair <GrammarSymbol[], Nonterminal>(p.Rest, p.Lhs));
                }
            }
            #endregion

            var W           = new Dictionary <Nonterminal, HashSet <Nonterminal> >();
            var startSymbol = new Dictionary <Nonterminal, Nonterminal>();

            #region create new start symbols and compute unit closures
            foreach (Nonterminal v in g.variables)
            {
                W[v]           = g.GetUnitClosure(v);
                startSymbol[v] = new Nonterminal(nonterminalID++);
            }
            #endregion

            var P = new Dictionary <Nonterminal, List <Production> >();

            #region construct intermediate productions in P for each variable B
            foreach (Nonterminal B in g.variables)
            {
                var S_B  = startSymbol[B];
                var W_B  = W[B]; //unit closure of B
                var Bvar = new Dictionary <Nonterminal, Nonterminal>();
                Stack <Nonterminal>   stack   = new Stack <Nonterminal>();
                HashSet <Nonterminal> visited = new HashSet <Nonterminal>();
                var S_B_list = new List <Production>();
                P[S_B] = S_B_list;
                foreach (Production p in leavesP)
                {
                    S_B_list.Add(new Production(S_B, p.Rhs, Lookup(Bvar, p.Lhs, ref nonterminalID)));
                    if (visited.Add(p.Lhs))
                    {
                        stack.Push(p.Lhs);
                    }
                    if (W_B.Contains(p.Lhs))
                    {
                        S_B_list.Add(new Production(S_B, p.Rhs));
                    }
                }

                while (stack.Count > 0)
                {
                    Nonterminal       C   = stack.Pop();
                    Nonterminal       C_B = Lookup(Bvar, C, ref nonterminalID);
                    List <Production> C_B_list;
                    if (!P.TryGetValue(C_B, out C_B_list))
                    {
                        C_B_list = new List <Production>();
                        P[C_B]   = C_B_list;
                    }
                    foreach (var t in revP[C])
                    {
                        Nonterminal D   = t.Second;
                        Nonterminal D_B = Lookup(Bvar, D, ref nonterminalID);
                        C_B_list.Add(new Production(C_B, t.First, D_B));
                        if (t.First.Length > 0 && W_B.Contains(D))
                        {
                            C_B_list.Add(new Production(C_B, t.First));
                        }
                        if (visited.Add(D))
                        {
                            stack.Push(D);
                        }
                    }
                }
            }
            #endregion


            //produce the union of P and g.productionMap in H
            //and replace each production 'A ::= B alpha' by 'A ::= S_B alpha"

            var Hprods = new Dictionary <Nonterminal, List <Production> >();
            #region compute Hprods
            foreach (Nonterminal A in g.variables)
            {
                var A_prods = new List <Production>();
                Hprods[A] = A_prods;
                foreach (Production p in g.productionMap[A])
                {
                    if (p.First is Nonterminal && !p.IsUnit)
                    {
                        GrammarSymbol[] rhs = new GrammarSymbol[p.Rhs.Length];
                        rhs[0] = startSymbol[(Nonterminal)p.First];
                        Array.Copy(p.Rhs, 1, rhs, 1, rhs.Length - 1);
                        Production q = new Production(p.Lhs, rhs);
                        A_prods.Add(q);
                    }
                    else
                    {
                        A_prods.Add(p);
                    }
                }
            }
            foreach (Nonterminal A in P.Keys)
            {
                var A_prods = new List <Production>();
                Hprods[A] = A_prods;
                foreach (Production p in P[A])
                {
                    if (p.First is Nonterminal && !p.IsUnit)
                    {
                        GrammarSymbol[] rhs = new GrammarSymbol[p.Rhs.Length];
                        rhs[0] = startSymbol[(Nonterminal)p.First];
                        Array.Copy(p.Rhs, 1, rhs, 1, rhs.Length - 1);
                        Production q = new Production(p.Lhs, rhs);
                        A_prods.Add(q);
                    }
                    else
                    {
                        A_prods.Add(p);
                    }
                }
            }
            #endregion
            ContextFreeGrammar H = new ContextFreeGrammar(new List <Nonterminal>(Hprods.Keys), g.startSymbol, Hprods);

            //Console.WriteLine("--------- H:");
            //H.Display(Console.Out);

            //eliminate useless symbols from H
            //this may dramatically decrease the number of productions
            ContextFreeGrammar H1 = H.RemoveUselessSymbols();

            //Console.WriteLine("---------- H1:");
            //H1.Display(Console.Out);


            List <Nonterminal> egnfVars = new List <Nonterminal>();
            Dictionary <Nonterminal, List <Production> > egnfProds = new Dictionary <Nonterminal, List <Production> >();
            Stack <Nonterminal>   egnfStack   = new Stack <Nonterminal>();
            HashSet <Nonterminal> egnfVisited = new HashSet <Nonterminal>();
            egnfStack.Push(H1.startSymbol);
            egnfVisited.Add(H1.startSymbol);
            egnfVars.Add(H1.startSymbol);
            egnfProds[H1.startSymbol] = new List <Production>();

            #region eliminate temp start symbols and produce the EGNF form
            while (egnfStack.Count > 0)
            {
                var A = egnfStack.Pop();
                List <Production> A_prods = egnfProds[A];
                foreach (Production p in H1.productionMap[A])
                {
                    if (!(p.First is Nonterminal) || p.IsUnit)
                    {
                        A_prods.Add(p);
                        foreach (Nonterminal x in p.GetVariables())
                        {
                            if (egnfVisited.Add(x))
                            {
                                egnfStack.Push(x);
                                egnfVars.Add(x);
                                egnfProds[x] = new List <Production>();
                            }
                        }
                    }
                    else
                    {
                        Nonterminal S_B = (Nonterminal)p.First; //here we know that S_B is a temp start symbol
                        foreach (Production t in H1.productionMap[S_B])
                        {
                            int             k   = t.Rhs.Length;
                            GrammarSymbol[] rhs = new GrammarSymbol[k + p.Rhs.Length - 1];
                            for (int i = 0; i < k; i++)
                            {
                                rhs[i] = t.Rhs[i];
                            }
                            for (int i = 1; i < p.Rhs.Length; i++)
                            {
                                rhs[k + i - 1] = p.Rhs[i];
                            }
                            Production q = new Production(A, rhs);
                            A_prods.Add(q);
                            foreach (Nonterminal x in q.GetVariables())
                            {
                                if (egnfVisited.Add(x))
                                {
                                    egnfStack.Push(x);
                                    egnfVars.Add(x);
                                    egnfProds[x] = new List <Production>();
                                }
                            }
                        }
                    }
                }
            }
            #endregion

            ContextFreeGrammar egnf = new ContextFreeGrammar(egnfVars, H1.startSymbol, egnfProds);
            return(egnf);
        }
        /// <summary>
        /// Produces the GNF (Greibach Normal Form) for the grammar g.
        /// If g is not already in GNF, first makes CNF.
        /// Implements a variation of the Koch-Blum algorithm. (STACS 97, pp. 47-54)
        /// </summary>
        /// <param name="g"></param>
        /// <param name="removeEpsilonsUselessSymbolsUnitsProductions"></param>
        /// <returns></returns>
        public static ContextFreeGrammar MkGNF(ContextFreeGrammar g, bool removeEpsilonsUselessSymbolsUnitsProductions)
        {
            if (removeEpsilonsUselessSymbolsUnitsProductions)
            {
                g = g.RemoveEpsilonsAndUselessSymbols().RemoveUnitProductions();
            }
            if (g.IsInGNF())
            {
                return(g);
            }

            ContextFreeGrammar cnf = MkCNF(g, false);
            var Vars = cnf.variables;

            int nonterminalID = 0;

            var M = new Dictionary <Nonterminal, Automaton <GrammarSymbol> >();

            #region construct the automata M[B] for all variables B
            int id            = 0;
            var initStateMap  = new Dictionary <Nonterminal, int>();
            var finalStateMap = new Dictionary <Nonterminal, int>();
            foreach (Nonterminal B in Vars)
            {
                initStateMap[B]  = id++;
                finalStateMap[B] = id++;
            }

            var movesOfM = new Dictionary <Nonterminal, List <Move <GrammarSymbol> > >();

            foreach (Nonterminal B in Vars)
            {
                movesOfM[B] = new List <Move <GrammarSymbol> >();
            }

            #region construct the moves of the automata
            foreach (Nonterminal B in Vars)
            {
                var variableToStateMap    = new Dictionary <Nonterminal, int>();
                Stack <Nonterminal> stack = new Stack <Nonterminal>();
                stack.Push(B);
                int initState = initStateMap[B];
                variableToStateMap[B] = finalStateMap[B];
                while (stack.Count > 0)
                {
                    Nonterminal C = stack.Pop();
                    foreach (Production p in cnf.GetProductions(C))
                    {
                        if (p.IsSingleExprinal)
                        {
                            movesOfM[B].Add(Move <GrammarSymbol> .Create(initState, variableToStateMap[C], p.First));
                        }
                        else
                        {
                            Nonterminal D = (Nonterminal)p.First; //using the fact that the grammar is in CNF
                            if (!variableToStateMap.ContainsKey(D))
                            {
                                //visit all variables reachable that have not already been visited
                                variableToStateMap.Add(D, id++);
                                stack.Push(D);
                            }
                            GrammarSymbol E = p.Rhs[1];
                            movesOfM[B].Add(Move <GrammarSymbol> .Create(variableToStateMap[D], variableToStateMap[C], E));
                        }
                    }
                }
            }
            #endregion

            foreach (Nonterminal B in Vars)
            {
                M[B] = Automaton <GrammarSymbol> .Create(initStateMap[B], new int[] { finalStateMap[B] }, movesOfM[B]);
            }
            #endregion

            var G_ = new Dictionary <Nonterminal, ContextFreeGrammar>();

            #region construct corresponding intermediate grammars G_[B] corresponding to M[B]
            foreach (Nonterminal B in Vars)
            {
                var         MB = M[B];
                bool        MBfinalStateHasVariableMoves = FinalStateHasVariableMoves(MB);
                var         productions = new Dictionary <Nonterminal, List <Production> >();
                Nonterminal startSymbol = new Nonterminal(nonterminalID++);
                var         vars        = new List <Nonterminal>();
                vars.Add(startSymbol);
                productions[startSymbol] = new List <Production>();

                foreach (var move in MB.GetMovesFrom(MB.InitialState))
                {
                    if (move.TargetState == MB.FinalState)
                    {
                        productions[startSymbol].Add(new Production(startSymbol, move.Label));
                    }
                    if (move.TargetState != MB.FinalState || MBfinalStateHasVariableMoves)
                    {
                        var C = new Nonterminal("Q" + move.TargetState);
                        productions[startSymbol].Add(new Production(startSymbol, move.Label, C));
                        if (!productions.ContainsKey(C))
                        {
                            productions[C] = new List <Production>();
                            vars.Add(C);
                        }
                    }
                }

                foreach (int state in MB.States)
                {
                    if (state != MB.InitialState)
                    {
                        foreach (Move <GrammarSymbol> move in MB.GetMovesFrom(state))
                        {
                            Nonterminal D = new Nonterminal("Q" + state);
                            Nonterminal C = new Nonterminal("Q" + move.TargetState);
                            if (!productions.ContainsKey(D))
                            {
                                productions[D] = new List <Production>();
                                vars.Add(D);
                            }
                            Nonterminal E = (Nonterminal)move.Label;
                            if (move.TargetState == MB.FinalState)
                            {
                                productions[D].Add(new Production(D, E));
                            }
                            if (move.TargetState != MB.FinalState || MBfinalStateHasVariableMoves)
                            {
                                productions[D].Add(new Production(D, E, C));
                                //we pretend here that E is a terminal
                                if (!productions.ContainsKey(C))
                                {
                                    productions[C] = new List <Production>();
                                    vars.Add(C);
                                }
                            }
                        }
                    }
                }
                G_[B] = new ContextFreeGrammar(vars, startSymbol, productions);
            }
            #endregion

            var G = new Dictionary <Nonterminal, ContextFreeGrammar>();

            #region construct the corresponding temporary G[B]'s
            foreach (Nonterminal B in Vars)
            {
                var G_B         = G_[B];
                var productions = new Dictionary <Nonterminal, List <Production> >();
                //var vars = new List<Variable>();
                Nonterminal startSymbol = G_B.startSymbol;
                productions[startSymbol] = G_B.productionMap[startSymbol];
                foreach (Nonterminal D in G_B.variables)
                {
                    if (!D.Equals(startSymbol))
                    {
                        var productions_D = new List <Production>();
                        productions[D] = productions_D;
                        foreach (Production p in G_B.productionMap[D])
                        {
                            Nonterminal E   = (Nonterminal)p.First;
                            var         G_E = G_[E];
                            if (p.IsUnit)
                            {
                                foreach (Production q in G_E.productionMap[G_E.startSymbol])
                                {
                                    productions_D.Add(new Production(D, q.Rhs));
                                }
                            }
                            else
                            {
                                foreach (Production q in G_E.productionMap[G_E.startSymbol])
                                {
                                    GrammarSymbol[] symbols = new GrammarSymbol[q.Rhs.Length + 1];
                                    Array.Copy(q.Rhs, symbols, q.Rhs.Length);
                                    symbols[q.Rhs.Length] = p.Rhs[1];
                                    productions_D.Add(new Production(D, symbols));
                                }
                            }
                        }
                    }
                }
                //ignore the variable list, it is not used
                G[B] = new ContextFreeGrammar(null, startSymbol, productions);
            }
            #endregion

            #region construct the final GNF from the G[B]'s
            var productionsGNF = new List <Production>();
            foreach (Nonterminal A in cnf.variables)
            {
                foreach (Production p in cnf.productionMap[A])
                {
                    if (p.IsSingleExprinal)
                    {
                        productionsGNF.Add(p);
                    }
                    else
                    {
                        Nonterminal B  = (Nonterminal)p.Rhs[0];
                        Nonterminal C  = (Nonterminal)p.Rhs[1];
                        var         GB = G[B];
                        foreach (Production q in GB.productionMap[GB.startSymbol])
                        {
                            GrammarSymbol[] symbols = new GrammarSymbol[q.Rhs.Length + 1];
                            Array.Copy(q.Rhs, symbols, q.Rhs.Length);
                            symbols[q.Rhs.Length] = C;
                            productionsGNF.Add(new Production(A, symbols));
                        }
                    }
                }
            }
            foreach (Nonterminal B in Vars)
            {
                var GB = G[B];
                foreach (var kv in GB.productionMap)
                {
                    if (!kv.Key.Equals(GB.startSymbol))
                    {
                        productionsGNF.AddRange(kv.Value);
                    }
                }
            }
            #endregion

            ContextFreeGrammar gnf = new ContextFreeGrammar(cnf.startSymbol, productionsGNF);
            return(gnf);
        }
        /// <summary>
        /// Return all useful nonterminal symbols. If checkBackwardsOnly is true, assume that all symbols are reachable from the start symbol.
        /// </summary>
        public HashSet <string> GetUsefulNonterminals(bool checkBackwardsOnly)
        {
            HashSet <Nonterminal> useful_backwards = new HashSet <Nonterminal>();

            //Lemma 4.1, p. 88, Hopcroft-Ullman
            #region backward reachability
            var variableNodeMap = new Dictionary <Nonterminal, VariableNode>();
            foreach (Nonterminal v in this.variables)
            {
                variableNodeMap[v] = new VariableNode();
            }


            List <ProductionNode> productionLeaves = new List <ProductionNode>();

            foreach (Nonterminal v in this.variables)
            {
                VariableNode parent = variableNodeMap[v];
                foreach (Production p in this.productionMap[v])
                {
                    var            children = Array.ConvertAll(new List <Nonterminal>(p.GetVariables()).ToArray(), w => variableNodeMap[w]);
                    ProductionNode pn       = new ProductionNode(parent, children);
                    if (children.Length == 0)
                    {
                        productionLeaves.Add(pn);
                    }
                    else
                    {
                        foreach (VariableNode child in children)
                        {
                            child.parents.Add(pn);
                        }
                    }
                }
            }

            foreach (ProductionNode leaf in productionLeaves)
            {
                leaf.PropagateMark();
            }

            foreach (Nonterminal v in this.variables)
            {
                if (variableNodeMap[v].isMarked)
                {
                    useful_backwards.Add(v);
                }
            }
            #endregion

            //returns the empty set because the language is empty
            if (!useful_backwards.Contains(this.startSymbol))
            {
                return(new HashSet <string>());
            }

            //don't bother to check forward
            if (checkBackwardsOnly)
            {
                var res = new HashSet <string>();
                foreach (var nt in useful_backwards)
                {
                    res.Add(nt.Name);
                }
                return(res);
            }

            ContextFreeGrammar g1 = this.RestrictToVariables(useful_backwards);

            HashSet <Nonterminal> useful_forwards = new HashSet <Nonterminal>();

            //Lemma 4.2, p. 89, Hopcroft-Ullman
            #region forward reachability
            Stack <Nonterminal> stack = new Stack <Nonterminal>();
            stack.Push(g1.StartSymbol);
            useful_forwards.Add(g1.StartSymbol);

            while (stack.Count > 0)
            {
                Nonterminal v = stack.Pop();
                foreach (Production p in g1.GetProductions(v))
                {
                    foreach (Nonterminal u in p.GetVariables())
                    {
                        if (!useful_forwards.Contains(u))
                        {
                            useful_forwards.Add(u);
                            stack.Push(u);
                        }
                    }
                }
            }

            #endregion

            HashSet <string> usefulSymbols = new HashSet <string>();
            foreach (var nt in useful_forwards)
            {
                if (useful_backwards.Contains(nt))
                {
                    usefulSymbols.Add(nt.Name);
                }
            }
            return(usefulSymbols);
        }
        /// <summary>
        /// Removes useless symbols from the grammar.
        /// Assumes that the language is nonempty.
        /// </summary>
        public ContextFreeGrammar RemoveUselessSymbols()
        {
            HashSet <Nonterminal> useful_backwards = new HashSet <Nonterminal>();

            //Lemma 4.1, p. 88, Hopcroft-Ullman
            #region backward reachability
            var variableNodeMap = new Dictionary <Nonterminal, VariableNode>();
            foreach (Nonterminal v in this.variables)
            {
                variableNodeMap[v] = new VariableNode();
            }


            List <ProductionNode> productionLeaves = new List <ProductionNode>();

            foreach (Nonterminal v in this.variables)
            {
                VariableNode parent = variableNodeMap[v];
                foreach (Production p in this.productionMap[v])
                {
                    var            children = Array.ConvertAll(new List <Nonterminal>(p.GetVariables()).ToArray(), w => variableNodeMap[w]);
                    ProductionNode pn       = new ProductionNode(parent, children);
                    if (children.Length == 0)
                    {
                        productionLeaves.Add(pn);
                    }
                    else
                    {
                        foreach (VariableNode child in children)
                        {
                            child.parents.Add(pn);
                        }
                    }
                }
            }

            foreach (ProductionNode leaf in productionLeaves)
            {
                leaf.PropagateMark();
            }

            foreach (Nonterminal v in this.variables)
            {
                if (variableNodeMap[v].isMarked)
                {
                    useful_backwards.Add(v);
                }
            }
            #endregion

            if (!useful_backwards.Contains(this.startSymbol))
            {
                throw new AutomataException(AutomataExceptionKind.LanguageOfGrammarIsEmpty);
            }

            ContextFreeGrammar g1 = this.RestrictToVariables(useful_backwards);

            HashSet <Nonterminal> useful_forwards = new HashSet <Nonterminal>();

            //Lemma 4.2, p. 89, Hopcroft-Ullman
            #region forward reachability
            Stack <Nonterminal> stack = new Stack <Nonterminal>();
            stack.Push(g1.StartSymbol);
            useful_forwards.Add(g1.StartSymbol);

            while (stack.Count > 0)
            {
                Nonterminal v = stack.Pop();
                foreach (Production p in g1.GetProductions(v))
                {
                    foreach (Nonterminal u in p.GetVariables())
                    {
                        if (!useful_forwards.Contains(u))
                        {
                            useful_forwards.Add(u);
                            stack.Push(u);
                        }
                    }
                }
            }

            #endregion

            ContextFreeGrammar g2 = g1.RestrictToVariables(useful_forwards);

            return(g2);
        }