public static Tuple <int, IEnumerable <String> > gradeGrammarEquality(ContextFreeGrammar solution, ContextFreeGrammar attempt, int maxGrade, long timelimit)
        {
            List <String> feedback = new List <String>();

            Tuple <long, List <String>, List <String> > res = GrammarUtilities.findDifferenceWithTimelimit(solution, attempt, true, timelimit);
            long          correct    = res.Item1;
            List <String> missing    = res.Item2;
            List <String> tooMuch    = res.Item3;
            long          allChecked = correct + missing.Count + tooMuch.Count;

            if (missing.Count == 0 && tooMuch.Count == 0) //correct
            {
                feedback.Add(String.Format("All tests passed! (checked {0} words)", correct));
                return(Tuple.Create(maxGrade, (IEnumerable <String>)feedback));
            }

            //wrong
            int    grade       = (int)Math.Floor(correct * maxGrade / (double)allChecked);
            double percMissing = missing.Count * 100 / (double)allChecked;
            double percTooMuch = tooMuch.Count * 100 / (double)allChecked;

            if (missing.Count > 0)
            {
                feedback.Add(String.Format("Your solution misses words (~{0:F2}% of checked words). One of them is \"{1}\".", percMissing, missing[0]));
            }
            if (tooMuch.Count > 0)
            {
                feedback.Add(String.Format("Your solution accepts too many words (~{0:F2}% of checked words). One of them is \"{1}\".", percTooMuch, tooMuch[0]));
            }

            return(Tuple.Create(grade, (IEnumerable <String>)feedback));
        }
        /// <summary>
        /// Restrict the grammar to the given variables.
        /// </summary>
        public ContextFreeGrammar RestrictToVariables(HashSet <Nonterminal> varSet)
        {
            Dictionary <Nonterminal, List <Production> > productions = new Dictionary <Nonterminal, List <Production> >();

            foreach (Nonterminal v in varSet)
            {
                productions[v] = new List <Production>();
            }

            List <Nonterminal> varList = new List <Nonterminal>();

            foreach (Nonterminal v in variables)
            {
                if (varSet.Contains(v))
                {
                    varList.Add(v);
                    foreach (Production p in productionMap[v])
                    {
                        if (p.AreVariablesContainedIn(varSet))
                        {
                            productions[v].Add(p);
                        }
                    }
                }
            }
            if (!productions.ContainsKey(startSymbol) || productions[startSymbol].Count == 0)
            {
                throw new ArgumentException("Start symbol is not the LHS of any production.");
            }

            ContextFreeGrammar g = new ContextFreeGrammar(varList, startSymbol, productions);

            return(g);
        }
        /// <summary>
        /// Generates a CNF that accepts the prefix closure of a given grammar.
        /// </summary>
        /// <param name="g">the original grammar</param>
        /// <returns>the prefix closure</returns>
        public static ContextFreeGrammar getCNFPrefixClosure(ContextFreeGrammar g)
        {
            if (g == null)
            {
                return(g);
            }
            if (!g.IsInCNF())
            {
                g = getEquivalentCNF(g);
            }
            if (g == null)
            {
                return(g);
            }

            var prefixClosure = getPrefixClosure(g);

            prefixClosure = getEquivalentCNF(prefixClosure); // !!ATTENTION!! this may remove old productions

            var productions = g.GetProductions();

            productions = productions.Concat(prefixClosure.GetProductions());

            return(new ContextFreeGrammar(prefixClosure.StartSymbol, productions));
        }
        /// <summary>
        /// Checks if a word is recognized by the given grammar. (CYK-algorithm)
        /// </summary>
        /// <param name="grammar">the grammar</param>
        /// <param name="word">the word</param>
        /// <returns>true, if there exists a dereviation from the startsymbol to the word</returns>
        public static bool isWordInGrammar(ContextFreeGrammar grammar, string word)
        {
            if (word == null || grammar == null)
            {
                return(false);
            }
            if (!grammar.IsInCNF())
            {
                grammar = getEquivalentCNF(grammar);
            }
            if (grammar == null)
            {
                return(false);
            }

            //empty word
            if (word.Length == 0)
            {
                return(grammar.acceptsEmptyString());
            }

            //CYK
            var cyk_table = cyk(grammar, word);

            return(cyk_table[word.Length - 1][0].Item1.Contains(grammar.StartSymbol));
        }
        /// <summary>
        /// Removes all productions of the form A->B where A and B are variables.
        /// Removes also all the useless symbols after the unit production elimination.
        /// Assumes that the grammar has no epsilon productions.
        /// </summary>
        public ContextFreeGrammar RemoveUnitProductions()
        {
            var newProductions = new Dictionary <Nonterminal, List <Production> >();

            foreach (Nonterminal v in variables)
            {
                newProductions[v] = new List <Production>();
            }

            foreach (Nonterminal v in variables)
            {
                foreach (Nonterminal u in GetUnitClosure(v))
                {
                    foreach (Production p in productionMap[u])
                    {
                        if (!p.IsUnit)
                        {
                            newProductions[v].Add(new Production(v, p.Rhs));
                        }
                    }
                }
            }

            ContextFreeGrammar g  = new ContextFreeGrammar(variables, startSymbol, newProductions);
            ContextFreeGrammar g1 = g.RemoveUselessSymbols();

            return(g1);
        }
        /// <summary>
        /// Generates a CNF for a given grammar or returns null if the Grammar doesn't produce any words.
        /// </summary>
        /// <param name="g">the original grammar</param>
        /// <returns>the CNF or null</returns>
        public static ContextFreeGrammar getEquivalentCNF(ContextFreeGrammar g)
        {
            if (g == null)
            {
                return(null);
            }
            if (g.IsInCNF())
            {
                return(g);
            }

            try
            {
                ContextFreeGrammar res = ContextFreeGrammar.MkCNF(g);

                //handle empty string
                res.setAcceptanceForEmptyString(g.acceptsEmptyString());

                return(res);
            }
            catch (AutomataException e)
            {
                if (g.acceptsEmptyString())
                {
                    var res = new ContextFreeGrammar(new Nonterminal("S"), new Production[] { new Production(new Nonterminal("S"), new GrammarSymbol[] { new Nonterminal("S"), new Nonterminal("S") }) });
                    res.setAcceptanceForEmptyString(true);
                    return(res);
                }
                return(null);
            }
        }
Ejemplo n.º 7
0
        public static ContextFreeGrammar Parse(Func <char, T> mkExprinal, string buf)
        {
            Lexer lex = new Lexer(buf);
            var   gp  = new GrammarParser <T>(lex, mkExprinal);

            gp.Parse();
            ContextFreeGrammar G = gp.GetGrammar();

            return(G);
        }
        public static Tuple <int, IEnumerable <String> > gradeFindDerivation(ContextFreeGrammar grammar, String word, List <GrammarSymbol[]> derivation, int maxGrade, int derivationType = Derivation.DERIVATION_ALL)
        {
            var           comp     = new DerivationComparator();
            List <String> feedback = new List <String>();

            if (derivation.Count == 0) //empty
            {
                feedback.Add("The derivation was empty... The first step is always the start symbol.");
                return(Tuple.Create(0, (IEnumerable <String>)feedback));
            }

            if (!comp.Equals(derivation[0], new GrammarSymbol[] { grammar.StartSymbol }))
            {
                feedback.Add("The first step of the derivation has to be the start symbol!");
                return(Tuple.Create(0, (IEnumerable <String>)feedback));
            }

            bool correct = true;
            int  points  = 0;

            for (int i = 1; i < derivation.Count; i++)
            {
                bool b = Derivation.isValidDerivationStep(grammar.GetProductions(), derivation[i - 1], derivation[i], derivationType);
                if (!b)
                {
                    correct = false;
                    feedback.Add(String.Format("There is no rule that leads from '{0}' to '{1}'", Derivation.partialWordToString(derivation[i - 1]).Replace(" ", ""), Derivation.partialWordToString(derivation[i]).Replace(" ", "")));
                    if (derivationType != Derivation.DERIVATION_ALL && Derivation.isValidDerivationStep(grammar.GetProductions(), derivation[i - 1], derivation[i]))
                    {
                        feedback.Add(String.Format("Make sure to give a derivation of the correct type (e.g. leftmost / rightmost)!", Derivation.partialWordToString(derivation[i - 1]), Derivation.partialWordToString(derivation[i])));
                    }
                    break;
                }
                points++;
            }

            String lastStep = Derivation.partialWordToString(derivation[derivation.Count - 1]);

            if (!lastStep.Equals(word))
            {
                correct = false;
                feedback.Add(String.Format("The last step should be '{0}'", word));
            }

            if (correct) //perfekt
            {
                feedback.Add("Correct!");
                return(Tuple.Create(maxGrade, (IEnumerable <String>)feedback));
            }

            return(Tuple.Create(0, (IEnumerable <String>)feedback));
        }
        public static List <string> generateShortestWords(ContextFreeGrammar grammar, int maxLength)
        {
            var cnf = getEquivalentCNF(grammar);
            Dictionary <Nonterminal, Dictionary <int, HashSet <string> > > dp = new Dictionary <Nonterminal, Dictionary <int, HashSet <string> > >();

            List <String> res = new List <String>();

            for (int i = 0; i <= maxLength; i++)
            {
                res.AddRange(generateWordsWithLength(cnf, i, dp));
            }

            return(res);
        }
        /// <summary>
        /// Genereates warnings for useless variables.
        /// </summary>
        /// <param name="g">the grammar</param>
        /// <returns></returns>
        public static List <string> getGrammarWarnings(ContextFreeGrammar g)
        {
            List <string>    res       = new List <string>();
            HashSet <string> variables = new HashSet <string>();

            foreach (var n in g.Variables)
            {
                variables.Add(n.ToString());
            }

            var productiv   = g.GetUsefulNonterminals(true);
            var unproductiv = variables.Except(productiv);

            if (unproductiv.Count() > 0)
            {
                res.Add(string.Format("Warning: There are unproductive variables! ({0})", string.Join(", ", unproductiv)));
            }

            var reachable = new HashSet <string>();
            //Lemma 4.2, p. 89, Hopcroft-Ullman
            Stack <Nonterminal> stack = new Stack <Nonterminal>();

            stack.Push(g.StartSymbol);
            reachable.Add(g.StartSymbol.ToString());
            while (stack.Count > 0)
            {
                Nonterminal v = stack.Pop();
                foreach (Production p in g.GetProductions(v))
                {
                    foreach (Nonterminal u in p.GetVariables())
                    {
                        if (!reachable.Contains(u.ToString()))
                        {
                            reachable.Add(u.ToString());
                            stack.Push(u);
                        }
                    }
                }
            }
            var unreachable = variables.Except(reachable);

            if (unproductiv.Count() > 0)
            {
                res.Add(string.Format("Warning: There are unreachable variables! ({0})", string.Join(", ", unreachable)));
            }

            return(res);
        }
        /// <summary>
        /// Finds the longest prefix of a given word that is still recognized by a given grammar. (CYK algorithm with prefix closure)
        /// </summary>
        /// <param name="grammar">the grammar</param>
        /// <param name="word">the word</param>
        /// <returns>-1 if the grammar is empty; -2 if the word is in the grammar; n (if the substring up to index n is the longest prefix)</returns>
        public static int longestPrefixLength(ContextFreeGrammar grammar, string word)
        {
            if (word == null || grammar == null)
            {
                return(-1);
            }
            if (!grammar.IsInCNF())
            {
                grammar = getEquivalentCNF(grammar);
            }
            if (grammar == null)
            {
                return(-1);
            }

            //empty word
            if (word.Length == 0)
            {
                if (grammar.acceptsEmptyString())
                {
                    return(-2);
                }
                return(0);
            }

            //prefix closure
            var prefixGrammar = getCNFPrefixClosure(grammar);

            //CYK
            var cyk_table = cyk(prefixGrammar, word);

            //check if word was in original grammar
            if (cyk_table[word.Length - 1][0].Item1.Contains(grammar.StartSymbol))
            {
                return(-2);
            }

            //check for startsymbol in first row
            for (int i = word.Length - 1; i >= 0; i--)
            {
                if (cyk_table[i][0].Item1.Contains(prefixGrammar.StartSymbol))
                {
                    return(i + 1);
                }
            }
            return(0);
        }
        /// <summary>
        /// Generates a CFG that accepts the prefix closure of a given grammar.
        /// </summary>
        /// <param name="g">the original grammar</param>
        /// <returns>the prefix closure</returns>
        public static ContextFreeGrammar getPrefixClosure(ContextFreeGrammar g)
        {
            Func <Nonterminal, Nonterminal> prefixFor = delegate(Nonterminal x)
            {
                return(new Nonterminal(x.Name + "PREFIX"));
            };

            if (g == null)
            {
                return(g);
            }
            if (!g.IsInCNF())
            {
                g = getEquivalentCNF(g);
            }
            if (g == null)
            {
                return(g);
            }
            Nonterminal prefixStart       = prefixFor(g.StartSymbol);
            var         prefixProductions = new List <Production>();

            foreach (Production p in g.GetProductions())
            {
                //add original
                prefixProductions.Add(p);

                Nonterminal prefixNT = prefixFor(p.Lhs);
                if (p.Rhs.Length == 2) // case:  X->AB      ==>     X' ->A' | AB'
                {
                    prefixProductions.Add(new Production(prefixNT, new GrammarSymbol[] { p.Rhs[0], prefixFor((Nonterminal)p.Rhs[1]) }));
                    prefixProductions.Add(new Production(prefixNT, new GrammarSymbol[] { prefixFor((Nonterminal)p.Rhs[0]) }));
                }
                else // case:  X->a   ==>    X'->a
                {
                    prefixProductions.Add(new Production(prefixNT, new GrammarSymbol[] { p.Rhs[0] }));
                }
            }

            var res = new ContextFreeGrammar(prefixStart, prefixProductions);

            res.setAcceptanceForEmptyString(true);

            return(res);
        }
        // Return how many words of length UP TO 'lenght' are in/out of the language
        public static Tuple <int, int> getMinWordInAndOut(ContextFreeGrammar grammar, int length, int minAcc, int minRej)
        {
            int acc = 0, rej = 0;
            var literals = grammar.GetNonVariableSymbols();
            HashSet <string> wordsOfEqualLength = new HashSet <string>();

            for (int i = 0; i <= length; i++)
            {
                if (wordsOfEqualLength.Count == 0)
                {
                    wordsOfEqualLength.Add("");
                }
                else
                {
                    var cur = new HashSet <string>();
                    foreach (string word in wordsOfEqualLength)
                    {
                        foreach (GrammarSymbol literal in literals)
                        {
                            cur.Add(word + literal.ToString());
                        }
                    }
                    wordsOfEqualLength = cur;
                }
                // Checking for each word of a particular length if it is accepted by the language
                foreach (string word in wordsOfEqualLength)
                {
                    if (isWordInGrammar(grammar, word))
                    {
                        acc++;
                    }
                    else
                    {
                        rej++;
                    }
                }

                if (acc >= minAcc && rej >= minRej)
                {
                    break;
                }
            }

            return(new Tuple <int, int>(acc, rej));
        }
        /// <summary>
        /// Removes epsilon productions and then removes useless symbols.
        /// Assumes that the grammar does not accept the empty string and that the language is nonempty.
        /// </summary>
        public ContextFreeGrammar RemoveEpsilonsAndUselessSymbols()
        {
            //--- eliminate epsilon productions
            //based on algo in Theorem 4.3, p. 90-91, Hopcroft-Ullman
            HashSet <Nonterminal> nullables = GetNullables();

            Dictionary <Nonterminal, List <Production> > prodMap = new Dictionary <Nonterminal, List <Production> >();

            foreach (Nonterminal v in this.variables)
            {
                prodMap[v] = new List <Production>(EliminateNullables(v, nullables));
            }
            ContextFreeGrammar g1 = new ContextFreeGrammar(this.variables, this.StartSymbol, prodMap);

            ContextFreeGrammar g2 = g1.RemoveUselessSymbols();

            return(g2);
        }
        private static HashSet <string> generateWordsWithLength(ContextFreeGrammar cnf, int length, Dictionary <Nonterminal, Dictionary <int, HashSet <string> > > dp)
        {
            HashSet <string> res = new HashSet <string>();

            if (cnf == null)
            {
                return(res); //empty grammar -> can't generate any words
            }
            if (length == 0) //case: length = 0
            {
                if (cnf.acceptsEmptyString())
                {
                    res.Add("");
                }
            }
            else if (length == 1) //case: length = 1
            {
                foreach (Nonterminal nt in cnf.Variables)
                {
                    //init dp[nt]
                    Dictionary <int, HashSet <string> > curDP = new Dictionary <int, HashSet <string> >();
                    dp.Add(nt, curDP);

                    //find words of length 1
                    HashSet <string> l = new HashSet <string>();
                    foreach (Production p in cnf.GetProductions(nt))
                    {
                        if (p.IsSingleExprinal)
                        {
                            l.Add(p.Rhs[0].ToString());
                        }
                    }
                    curDP.Add(1, l);
                    if (nt.Equals(cnf.StartSymbol))
                    {
                        res = l;
                    }
                }
            }
            else //case: length > 1
            {
                foreach (KeyValuePair <Nonterminal, Dictionary <int, HashSet <string> > > entry in dp)
                {
                    Nonterminal cur = entry.Key;
                    Dictionary <int, HashSet <string> > curDP = entry.Value;
                    HashSet <string> curSet = new HashSet <string>();
                    curDP.Add(length, curSet);
                    if (cur.Equals(cnf.StartSymbol))
                    {
                        res = curSet;
                    }

                    foreach (Production p in cnf.GetProductions(entry.Key))
                    {
                        if (p.Rhs.Length != 2)
                        {
                            continue;                    //ignore productions that don't have form X->AB
                        }
                        Nonterminal left = (Nonterminal)p.Rhs[0];
                        Dictionary <int, HashSet <string> > leftDP = null;
                        dp.TryGetValue(left, out leftDP);

                        Nonterminal right = (Nonterminal)p.Rhs[1];
                        Dictionary <int, HashSet <string> > rightDP = null;
                        dp.TryGetValue(right, out rightDP);

                        for (int leftPart = 1; leftPart < length; leftPart++)
                        {
                            int rightPart = length - leftPart;

                            HashSet <string> leftPossibilities = null;
                            leftDP.TryGetValue(leftPart, out leftPossibilities);
                            HashSet <string> rightPossibilities = null;
                            rightDP.TryGetValue(rightPart, out rightPossibilities);

                            foreach (string leftString in leftPossibilities)
                            {
                                foreach (string rightString in rightPossibilities)
                                {
                                    curSet.Add(leftString + rightString);
                                }
                            }
                        }
                    }
                }
            }

            return(res);
        }
        /// <summary>
        /// Finds the difference of 2 grammars.
        /// </summary>
        /// <param name="grammar1">the first grammar</param>
        /// <param name="grammar2">the second grammar</param>
        /// <param name="multiple">true (find all), false (just 1)</param>
        /// <param name="timelimit">the time after with the check ends (CARE: there can still be a long rekusion step at the end such that the </param>
        /// <returns>a 3-Tuple, first = number of found words that are in both grammars, second = list of words that are only in grammar 1, third = list of words only in grammar 2</returns>
        public static Tuple <long, List <String>, List <String> > findDifferenceWithTimelimit(ContextFreeGrammar grammar1, ContextFreeGrammar grammar2, bool multiple, long timelimit)
        {
            var watch = new System.Diagnostics.Stopwatch();

            watch.Start();

            var cnf1 = getEquivalentCNF(grammar1);
            var cnf2 = getEquivalentCNF(grammar2);

            long          correct = 0;
            List <String> g1extra = new List <String>();
            List <String> g2extra = new List <String>();
            List <String> ggg     = new List <String>();

            if (cnf1 == null && cnf2 == null)
            {
                return(Tuple.Create(correct, g1extra, g2extra));
            }
            ;                                                                                   //both empty

            Dictionary <Nonterminal, Dictionary <int, HashSet <string> > > dp1 = new Dictionary <Nonterminal, Dictionary <int, HashSet <string> > >();
            Dictionary <Nonterminal, Dictionary <int, HashSet <string> > > dp2 = new Dictionary <Nonterminal, Dictionary <int, HashSet <string> > >();

            int length = 0;

            while (watch.ElapsedMilliseconds < timelimit)
            {
                var words1 = generateWordsWithLength(cnf1, length, dp1);
                var words2 = generateWordsWithLength(cnf2, length, dp2);
                foreach (string w1 in words1)
                {
                    if (!words2.Contains(w1))
                    {
                        g1extra.Add(w1);
                        if (!multiple)
                        {
                            return(Tuple.Create(correct, g1extra, g2extra));
                        }
                    }
                    else
                    {
                        ggg.Add(w1);
                        correct++;
                    }
                }
                foreach (string w2 in words2)
                {
                    if (!words1.Contains(w2))
                    {
                        g2extra.Add(w2);
                        if (!multiple)
                        {
                            return(Tuple.Create(correct, g1extra, g2extra));
                        }
                    }
                }

                length++;
            }

            return(Tuple.Create(correct, g1extra, g2extra));
        }
        /// <summary>
        /// Finds the difference of 2 grammars.
        /// </summary>
        /// <param name="grammar1">the first grammar</param>
        /// <param name="grammar2">the second grammar</param>
        /// <param name="multiple">true (find all), false (just 1)</param>
        /// <param name="max_length">the maximal word length to be checked</param>
        /// <returns>a 3-Tuple, first = number of found words that are in both grammars, second = list of words that are only in grammar 1, third = list of words only in grammar 2</returns>
        public static Tuple <long, List <String>, List <String> > findDifference(ContextFreeGrammar grammar1, ContextFreeGrammar grammar2, bool multiple, int max_length)
        {
            var cnf1 = getEquivalentCNF(grammar1);
            var cnf2 = getEquivalentCNF(grammar2);

            long          correct = 0;
            List <String> g1extra = new List <String>();
            List <String> g2extra = new List <String>();

            if (cnf1 == null && cnf2 == null)
            {
                return(Tuple.Create(correct, g1extra, g2extra));
            }
            ;                                                                                   //both empty

            //check for empty word
            if (cnf1.acceptsEmptyString() && !cnf2.acceptsEmptyString())
            {
                g1extra.Add("");
            }
            else if (!cnf1.acceptsEmptyString() && cnf2.acceptsEmptyString())
            {
                g2extra.Add("");
            }
            else
            {
                correct++;
            }


            Dictionary <Nonterminal, Dictionary <int, HashSet <string> > > dp1 = new Dictionary <Nonterminal, Dictionary <int, HashSet <string> > >();
            Dictionary <Nonterminal, Dictionary <int, HashSet <string> > > dp2 = new Dictionary <Nonterminal, Dictionary <int, HashSet <string> > >();

            for (int length = 1; length <= max_length; length++)
            {
                var words1 = generateWordsWithLength(cnf1, length, dp1);
                var words2 = generateWordsWithLength(cnf2, length, dp2);
                foreach (string w1 in words1)
                {
                    if (!words2.Contains(w1))
                    {
                        g1extra.Add(w1);
                        if (!multiple)
                        {
                            return(Tuple.Create(correct, g1extra, g2extra));
                        }
                    }
                    else
                    {
                        correct++;
                    }
                }
                foreach (string w2 in words2)
                {
                    if (!words1.Contains(w2))
                    {
                        g2extra.Add(w2);
                        if (!multiple)
                        {
                            return(Tuple.Create(correct, g1extra, g2extra));
                        }
                    }
                }
            }

            return(Tuple.Create(correct, g1extra, g2extra));
        }
        /// <summary>
        /// Performs the CYK-algorithm
        /// </summary>
        /// <param name="grammar">the grammar (in CNF)</param>
        /// <param name="word">the word (not null)</param>
        /// <returns>the filled table of the cyk-algorithm</returns>
        public static Tuple <HashSet <Nonterminal>, List <Tuple <Production, int> > >[][] cyk(ContextFreeGrammar grammar, string word)
        {
            /*
             * Every entry in the table consists of 2 parts:
             *      1. The HasSet of all Nonterminals that produce the corresponding subword
             *      2. All possible subtrees encodes as pair (p,x)
             *          where p is the applicable production and
             *          x is the lengt of the word produced by the first grammarsymbol on the right hand side of p
             */

            //prepare CYK table
            int n = word.Length;

            Tuple <HashSet <Nonterminal>, List <Tuple <Production, int> > >[][] cyk = new Tuple <HashSet <Nonterminal>, List <Tuple <Production, int> > > [n][];
            for (int i = 0; i < n; i++)
            {
                cyk[i] = new Tuple <HashSet <Nonterminal>, List <Tuple <Production, int> > > [n - i];
                for (int j = 0; j < n - i; j++)
                {
                    cyk[i][j] = new Tuple <HashSet <Nonterminal>, List <Tuple <Production, int> > >(new HashSet <Nonterminal>(), new List <Tuple <Production, int> >());
                }
            }

            //prepare lookups (productions for a given NT or pair of NTs)
            Dictionary <Tuple <Nonterminal, Nonterminal>, HashSet <Production> > lookupNT = new Dictionary <Tuple <Nonterminal, Nonterminal>, HashSet <Production> >();
            Dictionary <string, HashSet <Production> > lookupT = new Dictionary <string, HashSet <Production> >();

            foreach (Production p in grammar.GetProductions())
            {
                if (p.IsSingleExprinal) //form: X -> a
                {
                    HashSet <Production> hashset = null;
                    if (!lookupT.TryGetValue(p.Rhs[0].Name, out hashset))
                    {
                        hashset = new HashSet <Production>();
                        lookupT.Add(p.Rhs[0].Name, hashset);
                    }
                    hashset.Add(p);
                }
                else if (p.Rhs.Length == 2)//form: X -> A B
                {
                    HashSet <Production> hashset = null;
                    var tuple = new Tuple <Nonterminal, Nonterminal>((Nonterminal)p.Rhs[0], (Nonterminal)p.Rhs[1]);
                    if (!lookupNT.TryGetValue(tuple, out hashset))
                    {
                        hashset = new HashSet <Production>();
                        lookupNT.Add(tuple, hashset);
                    }
                    hashset.Add(p);
                }
            }

            //CYK algorithm
            //first row (check for Productions X -> a)
            for (int i = 0; i < n; i++)
            {
                HashSet <Production> applicable = null;
                if (lookupT.TryGetValue(word.Substring(i, 1), out applicable))
                {
                    foreach (Production p in applicable)
                    {
                        cyk[0][i].Item1.Add(p.Lhs);
                        cyk[0][i].Item2.Add(new Tuple <Production, int>(p, 1));
                    }
                }
            }
            //fill rest
            for (int length = 1; length < n; length++)
            {
                for (int start = 0; start + length < n; start++)
                {
                    //to_fill: cyk[length][start]
                    for (int part1 = 0; part1 < length; part1++)
                    {
                        var left  = cyk[part1][start].Item1;
                        var right = cyk[length - 1 - part1][start + 1 + part1].Item1;
                        if (left.Count > 0 && right.Count > 0)
                        {
                            foreach (Nonterminal leftNT in left)
                            {
                                foreach (Nonterminal rightNT in right)
                                {
                                    var tuple = new Tuple <Nonterminal, Nonterminal>(leftNT, rightNT);
                                    HashSet <Production> applicable = null;
                                    if (lookupNT.TryGetValue(new Tuple <Nonterminal, Nonterminal>(leftNT, rightNT), out applicable))
                                    {
                                        foreach (Production p in applicable)
                                        {
                                            cyk[length][start].Item1.Add(p.Lhs);
                                            cyk[length][start].Item2.Add(new Tuple <Production, int>(p, part1 + 1));
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }

            return(cyk);
        }
        /// <summary>
        /// Produces the CNF (Chomsky Normal Form) for the grammar g.
        /// It first eliminates epsilons, useless symbols, and unit productions.
        /// If Assumes that there are no epsilons, useless symbols or unit productions
        /// </summary>
        public static ContextFreeGrammar MkCNF(ContextFreeGrammar g, bool removeEpsilonsUselessSymbolsUnitsProductions)
        {
            if (removeEpsilonsUselessSymbolsUnitsProductions)
            {
                g = g.RemoveEpsilonsAndUselessSymbols();
                g = g.RemoveUnitProductions();
            }
            var productions = new Dictionary <Nonterminal, List <Production> >();
            List <Nonterminal> variables = new List <Nonterminal>(g.variables);

            foreach (Nonterminal v in g.variables)
            {
                productions[v] = new List <Production>();
            }

            int nonterminalID = 0;

            //Implements algo in Theorem 4.5, page 92-93, in Hopcroft-Ullman

            #region make productions of the form V --> V0...Vn or V --> a
            var freshVarMap = new Dictionary <GrammarSymbol, Nonterminal>();
            foreach (Nonterminal v in g.variables)
            {
                foreach (Production p in g.productionMap[v])
                {
                    if (p.ContainsNoExprinals || p.IsCNF)
                    {
                        productions[v].Add(p);
                    }
                    else
                    {
                        GrammarSymbol[] rhs = new GrammarSymbol[p.Rhs.Length];
                        for (int i = 0; i < rhs.Length; i++)
                        {
                            if (p.Rhs[i] is Nonterminal)
                            {
                                rhs[i] = p.Rhs[i];
                            }
                            else
                            {
                                Nonterminal u;
                                if (!freshVarMap.TryGetValue(p.Rhs[i], out u))
                                {
                                    u = new Nonterminal(nonterminalID++);
                                    freshVarMap[p.Rhs[i]] = u;
                                    variables.Add(u);
                                    var prods = new List <Production>();
                                    prods.Add(new Production(u, p.Rhs[i]));
                                    productions[u] = prods;
                                }
                                rhs[i] = u;
                            }
                        }
                        productions[v].Add(new Production(v, rhs));
                    }
                }
            }
            #endregion


            var productionsCNF = new Dictionary <Nonterminal, List <Production> >();
            List <Nonterminal> variablesCNF = new List <Nonterminal>(variables);
            foreach (Nonterminal v in variablesCNF)
            {
                productionsCNF[v] = new List <Production>();
            }

            #region replace V --> V0V1...Vn (n > 2), by V --> V0U0, U0 --> V1U1, ..., Un-2 --> Vn-1Vn
            foreach (Nonterminal v in variables)
            {
                foreach (Production p in productions[v])
                {
                    if (p.IsCNF)
                    {
                        productionsCNF[v].Add(p);
                    }
                    else
                    {
                        Nonterminal x = v;
                        Nonterminal y = new Nonterminal(nonterminalID++);
                        variablesCNF.Add(y);
                        productionsCNF[y] = new List <Production>();
                        for (int i = 0; i < p.Rhs.Length - 2; i++)
                        {
                            productionsCNF[x].Add(new Production(x, p.Rhs[i], y));
                            if (i < p.Rhs.Length - 3)
                            {
                                x = y;
                                y = new Nonterminal(nonterminalID++);
                                variablesCNF.Add(y);
                                productionsCNF[y] = new List <Production>();
                            }
                        }
                        productionsCNF[y].Add(new Production(y, p.Rhs[p.Rhs.Length - 2], p.Rhs[p.Rhs.Length - 1]));
                    }
                }
            }
            #endregion

            ContextFreeGrammar cnf = new ContextFreeGrammar(variablesCNF, g.startSymbol, productionsCNF);
            return(cnf);
        }
        public static Tuple <int, IEnumerable <String> > gradeCYK(ContextFreeGrammar grammar, String word, HashSet <Nonterminal>[][] attempt, int maxGrade, int feedbackLevel)
        {
            List <String> feedback = new List <String>();

            int  n = word.Length;
            int  checked_length    = 0;
            var  sol               = GrammarUtilities.cyk(grammar, word);
            bool all_correct_sofar = true;

            for (int len = 1; len <= n; len++)
            {
                for (int start = 0; start + len <= n; start++)
                {
                    HashSet <Nonterminal> must = sol[len - 1][start].Item1;
                    HashSet <Nonterminal> was  = attempt[len - 1][start];

                    Nonterminal missingExample = null;
                    Production  missingApplicableProduction = null;
                    int         missing        = 0;
                    Nonterminal tooMuchExample = null;
                    int         tooMuch        = 0;

                    //check if all must are present
                    foreach (Nonterminal nt in must)
                    {
                        if (!was.Contains(nt))
                        {
                            missing++;
                            all_correct_sofar = false;

                            //save as example and look for corresponding applicable production for hint
                            if (missingApplicableProduction != null)
                            {
                                continue;                                      //not needed: already found example
                            }
                            missingExample = nt;
                            foreach (var applicable in sol[len - 1][start].Item2)
                            {
                                if (applicable.Item1.Lhs.Equals(nt))
                                {
                                    missingApplicableProduction = applicable.Item1;
                                    break;
                                }
                            }
                        }
                    }

                    //check if all given are correct
                    foreach (Nonterminal nt in was)
                    {
                        if (!must.Contains(nt))
                        {
                            tooMuchExample = nt;
                            tooMuch++;
                            all_correct_sofar = false;
                        }
                    }

                    //feedback
                    String fieldName = String.Format("({0},{1})", start + 1, start + len);
                    if (feedbackLevel >= 2)
                    {
                        if (missing != 0)
                        {
                            feedback.Add(String.Format("You are missing some nonterminals in field {0} e.g. {1}", fieldName, missingExample));
                        }
                        if (tooMuch != 0)
                        {
                            feedback.Add(String.Format("There are nonterminals in field {0} that don't belong there... e.g. {1}", fieldName, tooMuchExample));
                        }
                    }
                    else if (feedbackLevel >= 1)
                    {
                        if (missing != 0)
                        {
                            feedback.Add(String.Format("You are missing some nonterminals in field {0}... (hint: The production \"{1}\" is applicable.)", fieldName, missingApplicableProduction));
                        }
                        if (tooMuch != 0)
                        {
                            feedback.Add(String.Format("There are nonterminals in field {0} that don't belong there...", fieldName));
                        }
                    }
                    else
                    {
                        if (missing != 0)
                        {
                            feedback.Add(String.Format("You are missing some nonterminals in field {0}...", fieldName));
                        }
                        if (tooMuch != 0)
                        {
                            feedback.Add(String.Format("There are nonterminals in field {0} that don't belong there...", fieldName));
                        }
                    }
                }

                if (!all_correct_sofar)
                {
                    break;
                }
                checked_length = len;
            }

            //grade
            int grade = (int)Math.Floor(checked_length * maxGrade / (double)n);

            //all correct?
            if (feedback.Count == 0)
            {
                feedback.Add("Correct!");
            }

            return(Tuple.Create(grade, (IEnumerable <String>)feedback));
        }
        /// <summary>
        /// Produces the GNF (Greibach Normal Form) for the grammar g.
        /// If g is not already in GNF, first makes CNF.
        /// Implements a variation of the Koch-Blum algorithm. (STACS 97, pp. 47-54)
        /// </summary>
        /// <param name="g"></param>
        /// <param name="removeEpsilonsUselessSymbolsUnitsProductions"></param>
        /// <returns></returns>
        public static ContextFreeGrammar MkGNF(ContextFreeGrammar g, bool removeEpsilonsUselessSymbolsUnitsProductions)
        {
            if (removeEpsilonsUselessSymbolsUnitsProductions)
            {
                g = g.RemoveEpsilonsAndUselessSymbols().RemoveUnitProductions();
            }
            if (g.IsInGNF())
            {
                return(g);
            }

            ContextFreeGrammar cnf = MkCNF(g, false);
            var Vars = cnf.variables;

            int nonterminalID = 0;

            var M = new Dictionary <Nonterminal, Automaton <GrammarSymbol> >();

            #region construct the automata M[B] for all variables B
            int id            = 0;
            var initStateMap  = new Dictionary <Nonterminal, int>();
            var finalStateMap = new Dictionary <Nonterminal, int>();
            foreach (Nonterminal B in Vars)
            {
                initStateMap[B]  = id++;
                finalStateMap[B] = id++;
            }

            var movesOfM = new Dictionary <Nonterminal, List <Move <GrammarSymbol> > >();

            foreach (Nonterminal B in Vars)
            {
                movesOfM[B] = new List <Move <GrammarSymbol> >();
            }

            #region construct the moves of the automata
            foreach (Nonterminal B in Vars)
            {
                var variableToStateMap    = new Dictionary <Nonterminal, int>();
                Stack <Nonterminal> stack = new Stack <Nonterminal>();
                stack.Push(B);
                int initState = initStateMap[B];
                variableToStateMap[B] = finalStateMap[B];
                while (stack.Count > 0)
                {
                    Nonterminal C = stack.Pop();
                    foreach (Production p in cnf.GetProductions(C))
                    {
                        if (p.IsSingleExprinal)
                        {
                            movesOfM[B].Add(Move <GrammarSymbol> .Create(initState, variableToStateMap[C], p.First));
                        }
                        else
                        {
                            Nonterminal D = (Nonterminal)p.First; //using the fact that the grammar is in CNF
                            if (!variableToStateMap.ContainsKey(D))
                            {
                                //visit all variables reachable that have not already been visited
                                variableToStateMap.Add(D, id++);
                                stack.Push(D);
                            }
                            GrammarSymbol E = p.Rhs[1];
                            movesOfM[B].Add(Move <GrammarSymbol> .Create(variableToStateMap[D], variableToStateMap[C], E));
                        }
                    }
                }
            }
            #endregion

            foreach (Nonterminal B in Vars)
            {
                M[B] = Automaton <GrammarSymbol> .Create(initStateMap[B], new int[] { finalStateMap[B] }, movesOfM[B]);
            }
            #endregion

            var G_ = new Dictionary <Nonterminal, ContextFreeGrammar>();

            #region construct corresponding intermediate grammars G_[B] corresponding to M[B]
            foreach (Nonterminal B in Vars)
            {
                var         MB = M[B];
                bool        MBfinalStateHasVariableMoves = FinalStateHasVariableMoves(MB);
                var         productions = new Dictionary <Nonterminal, List <Production> >();
                Nonterminal startSymbol = new Nonterminal(nonterminalID++);
                var         vars        = new List <Nonterminal>();
                vars.Add(startSymbol);
                productions[startSymbol] = new List <Production>();

                foreach (var move in MB.GetMovesFrom(MB.InitialState))
                {
                    if (move.TargetState == MB.FinalState)
                    {
                        productions[startSymbol].Add(new Production(startSymbol, move.Label));
                    }
                    if (move.TargetState != MB.FinalState || MBfinalStateHasVariableMoves)
                    {
                        var C = new Nonterminal("Q" + move.TargetState);
                        productions[startSymbol].Add(new Production(startSymbol, move.Label, C));
                        if (!productions.ContainsKey(C))
                        {
                            productions[C] = new List <Production>();
                            vars.Add(C);
                        }
                    }
                }

                foreach (int state in MB.States)
                {
                    if (state != MB.InitialState)
                    {
                        foreach (Move <GrammarSymbol> move in MB.GetMovesFrom(state))
                        {
                            Nonterminal D = new Nonterminal("Q" + state);
                            Nonterminal C = new Nonterminal("Q" + move.TargetState);
                            if (!productions.ContainsKey(D))
                            {
                                productions[D] = new List <Production>();
                                vars.Add(D);
                            }
                            Nonterminal E = (Nonterminal)move.Label;
                            if (move.TargetState == MB.FinalState)
                            {
                                productions[D].Add(new Production(D, E));
                            }
                            if (move.TargetState != MB.FinalState || MBfinalStateHasVariableMoves)
                            {
                                productions[D].Add(new Production(D, E, C));
                                //we pretend here that E is a terminal
                                if (!productions.ContainsKey(C))
                                {
                                    productions[C] = new List <Production>();
                                    vars.Add(C);
                                }
                            }
                        }
                    }
                }
                G_[B] = new ContextFreeGrammar(vars, startSymbol, productions);
            }
            #endregion

            var G = new Dictionary <Nonterminal, ContextFreeGrammar>();

            #region construct the corresponding temporary G[B]'s
            foreach (Nonterminal B in Vars)
            {
                var G_B         = G_[B];
                var productions = new Dictionary <Nonterminal, List <Production> >();
                //var vars = new List<Variable>();
                Nonterminal startSymbol = G_B.startSymbol;
                productions[startSymbol] = G_B.productionMap[startSymbol];
                foreach (Nonterminal D in G_B.variables)
                {
                    if (!D.Equals(startSymbol))
                    {
                        var productions_D = new List <Production>();
                        productions[D] = productions_D;
                        foreach (Production p in G_B.productionMap[D])
                        {
                            Nonterminal E   = (Nonterminal)p.First;
                            var         G_E = G_[E];
                            if (p.IsUnit)
                            {
                                foreach (Production q in G_E.productionMap[G_E.startSymbol])
                                {
                                    productions_D.Add(new Production(D, q.Rhs));
                                }
                            }
                            else
                            {
                                foreach (Production q in G_E.productionMap[G_E.startSymbol])
                                {
                                    GrammarSymbol[] symbols = new GrammarSymbol[q.Rhs.Length + 1];
                                    Array.Copy(q.Rhs, symbols, q.Rhs.Length);
                                    symbols[q.Rhs.Length] = p.Rhs[1];
                                    productions_D.Add(new Production(D, symbols));
                                }
                            }
                        }
                    }
                }
                //ignore the variable list, it is not used
                G[B] = new ContextFreeGrammar(null, startSymbol, productions);
            }
            #endregion

            #region construct the final GNF from the G[B]'s
            var productionsGNF = new List <Production>();
            foreach (Nonterminal A in cnf.variables)
            {
                foreach (Production p in cnf.productionMap[A])
                {
                    if (p.IsSingleExprinal)
                    {
                        productionsGNF.Add(p);
                    }
                    else
                    {
                        Nonterminal B  = (Nonterminal)p.Rhs[0];
                        Nonterminal C  = (Nonterminal)p.Rhs[1];
                        var         GB = G[B];
                        foreach (Production q in GB.productionMap[GB.startSymbol])
                        {
                            GrammarSymbol[] symbols = new GrammarSymbol[q.Rhs.Length + 1];
                            Array.Copy(q.Rhs, symbols, q.Rhs.Length);
                            symbols[q.Rhs.Length] = C;
                            productionsGNF.Add(new Production(A, symbols));
                        }
                    }
                }
            }
            foreach (Nonterminal B in Vars)
            {
                var GB = G[B];
                foreach (var kv in GB.productionMap)
                {
                    if (!kv.Key.Equals(GB.startSymbol))
                    {
                        productionsGNF.AddRange(kv.Value);
                    }
                }
            }
            #endregion

            ContextFreeGrammar gnf = new ContextFreeGrammar(cnf.startSymbol, productionsGNF);
            return(gnf);
        }
 /// <summary>
 /// Produces the EGNF (Extended Greibach Normal Form) for the grammar g.
 /// The grammar g can be arbitrary. First removes epsilons and useless symbols from g.
 /// Implements a variation of the Blum-Koch algorithm.
 /// (Inf. and Comp. vol.150, pp.112-118, 1999)
 /// </summary>
 /// <param name="g">the grammar to be normalized</param>
 /// <returns>Extended Greibach Normal Form of g</returns>
 public static ContextFreeGrammar MkEGNF(ContextFreeGrammar g)
 {
     return(MkEGNF(g, true));
 }
        /// <summary>
        /// Removes useless symbols from the grammar.
        /// Assumes that the language is nonempty.
        /// </summary>
        public ContextFreeGrammar RemoveUselessSymbols()
        {
            HashSet <Nonterminal> useful_backwards = new HashSet <Nonterminal>();

            //Lemma 4.1, p. 88, Hopcroft-Ullman
            #region backward reachability
            var variableNodeMap = new Dictionary <Nonterminal, VariableNode>();
            foreach (Nonterminal v in this.variables)
            {
                variableNodeMap[v] = new VariableNode();
            }


            List <ProductionNode> productionLeaves = new List <ProductionNode>();

            foreach (Nonterminal v in this.variables)
            {
                VariableNode parent = variableNodeMap[v];
                foreach (Production p in this.productionMap[v])
                {
                    var            children = Array.ConvertAll(new List <Nonterminal>(p.GetVariables()).ToArray(), w => variableNodeMap[w]);
                    ProductionNode pn       = new ProductionNode(parent, children);
                    if (children.Length == 0)
                    {
                        productionLeaves.Add(pn);
                    }
                    else
                    {
                        foreach (VariableNode child in children)
                        {
                            child.parents.Add(pn);
                        }
                    }
                }
            }

            foreach (ProductionNode leaf in productionLeaves)
            {
                leaf.PropagateMark();
            }

            foreach (Nonterminal v in this.variables)
            {
                if (variableNodeMap[v].isMarked)
                {
                    useful_backwards.Add(v);
                }
            }
            #endregion

            if (!useful_backwards.Contains(this.startSymbol))
            {
                throw new AutomataException(AutomataExceptionKind.LanguageOfGrammarIsEmpty);
            }

            ContextFreeGrammar g1 = this.RestrictToVariables(useful_backwards);

            HashSet <Nonterminal> useful_forwards = new HashSet <Nonterminal>();

            //Lemma 4.2, p. 89, Hopcroft-Ullman
            #region forward reachability
            Stack <Nonterminal> stack = new Stack <Nonterminal>();
            stack.Push(g1.StartSymbol);
            useful_forwards.Add(g1.StartSymbol);

            while (stack.Count > 0)
            {
                Nonterminal v = stack.Pop();
                foreach (Production p in g1.GetProductions(v))
                {
                    foreach (Nonterminal u in p.GetVariables())
                    {
                        if (!useful_forwards.Contains(u))
                        {
                            useful_forwards.Add(u);
                            stack.Push(u);
                        }
                    }
                }
            }

            #endregion

            ContextFreeGrammar g2 = g1.RestrictToVariables(useful_forwards);

            return(g2);
        }
        /// <summary>
        /// Produces the EGNF (Extended Greibach Normal Form) for the grammar g.
        /// Implements a variation of the Blum-Koch algorithm.
        /// (Inf. and Comp. vol.150, pp.112-118, 1999)
        /// </summary>
        /// <param name="g">the grammar to be normalized</param>
        /// <param name="removeEpsilonsAndUselessSymbols">if true, first removes epsilons and useless symbols, otherwise assumes that epsilons do not occur</param>
        /// <returns>Extended Greibach Normal Form of g</returns>
        public static ContextFreeGrammar MkEGNF(ContextFreeGrammar g, bool removeEpsilonsAndUselessSymbols)
        {
            if (removeEpsilonsAndUselessSymbols)
            {
                g = g.RemoveEpsilonsAndUselessSymbols();
            }

            if (g.IsInGNF())
            {
                return(g);
            }

            var leavesP = new List <Production>();
            var revP    = new Dictionary <Nonterminal, List <Pair <GrammarSymbol[], Nonterminal> > >();

            int nonterminalID = 0;

            #region compute leavesP and revP
            foreach (Nonterminal v in g.variables)
            {
                revP[v] = new List <Pair <GrammarSymbol[], Nonterminal> >();
            }

            foreach (Production p in g.GetProductions())
            {
                if (!(p.First is Nonterminal))
                {
                    leavesP.Add(p);
                }
                else
                {
                    revP[(Nonterminal)p.First].Add(new Pair <GrammarSymbol[], Nonterminal>(p.Rest, p.Lhs));
                }
            }
            #endregion

            var W           = new Dictionary <Nonterminal, HashSet <Nonterminal> >();
            var startSymbol = new Dictionary <Nonterminal, Nonterminal>();

            #region create new start symbols and compute unit closures
            foreach (Nonterminal v in g.variables)
            {
                W[v]           = g.GetUnitClosure(v);
                startSymbol[v] = new Nonterminal(nonterminalID++);
            }
            #endregion

            var P = new Dictionary <Nonterminal, List <Production> >();

            #region construct intermediate productions in P for each variable B
            foreach (Nonterminal B in g.variables)
            {
                var S_B  = startSymbol[B];
                var W_B  = W[B]; //unit closure of B
                var Bvar = new Dictionary <Nonterminal, Nonterminal>();
                Stack <Nonterminal>   stack   = new Stack <Nonterminal>();
                HashSet <Nonterminal> visited = new HashSet <Nonterminal>();
                var S_B_list = new List <Production>();
                P[S_B] = S_B_list;
                foreach (Production p in leavesP)
                {
                    S_B_list.Add(new Production(S_B, p.Rhs, Lookup(Bvar, p.Lhs, ref nonterminalID)));
                    if (visited.Add(p.Lhs))
                    {
                        stack.Push(p.Lhs);
                    }
                    if (W_B.Contains(p.Lhs))
                    {
                        S_B_list.Add(new Production(S_B, p.Rhs));
                    }
                }

                while (stack.Count > 0)
                {
                    Nonterminal       C   = stack.Pop();
                    Nonterminal       C_B = Lookup(Bvar, C, ref nonterminalID);
                    List <Production> C_B_list;
                    if (!P.TryGetValue(C_B, out C_B_list))
                    {
                        C_B_list = new List <Production>();
                        P[C_B]   = C_B_list;
                    }
                    foreach (var t in revP[C])
                    {
                        Nonterminal D   = t.Second;
                        Nonterminal D_B = Lookup(Bvar, D, ref nonterminalID);
                        C_B_list.Add(new Production(C_B, t.First, D_B));
                        if (t.First.Length > 0 && W_B.Contains(D))
                        {
                            C_B_list.Add(new Production(C_B, t.First));
                        }
                        if (visited.Add(D))
                        {
                            stack.Push(D);
                        }
                    }
                }
            }
            #endregion


            //produce the union of P and g.productionMap in H
            //and replace each production 'A ::= B alpha' by 'A ::= S_B alpha"

            var Hprods = new Dictionary <Nonterminal, List <Production> >();
            #region compute Hprods
            foreach (Nonterminal A in g.variables)
            {
                var A_prods = new List <Production>();
                Hprods[A] = A_prods;
                foreach (Production p in g.productionMap[A])
                {
                    if (p.First is Nonterminal && !p.IsUnit)
                    {
                        GrammarSymbol[] rhs = new GrammarSymbol[p.Rhs.Length];
                        rhs[0] = startSymbol[(Nonterminal)p.First];
                        Array.Copy(p.Rhs, 1, rhs, 1, rhs.Length - 1);
                        Production q = new Production(p.Lhs, rhs);
                        A_prods.Add(q);
                    }
                    else
                    {
                        A_prods.Add(p);
                    }
                }
            }
            foreach (Nonterminal A in P.Keys)
            {
                var A_prods = new List <Production>();
                Hprods[A] = A_prods;
                foreach (Production p in P[A])
                {
                    if (p.First is Nonterminal && !p.IsUnit)
                    {
                        GrammarSymbol[] rhs = new GrammarSymbol[p.Rhs.Length];
                        rhs[0] = startSymbol[(Nonterminal)p.First];
                        Array.Copy(p.Rhs, 1, rhs, 1, rhs.Length - 1);
                        Production q = new Production(p.Lhs, rhs);
                        A_prods.Add(q);
                    }
                    else
                    {
                        A_prods.Add(p);
                    }
                }
            }
            #endregion
            ContextFreeGrammar H = new ContextFreeGrammar(new List <Nonterminal>(Hprods.Keys), g.startSymbol, Hprods);

            //Console.WriteLine("--------- H:");
            //H.Display(Console.Out);

            //eliminate useless symbols from H
            //this may dramatically decrease the number of productions
            ContextFreeGrammar H1 = H.RemoveUselessSymbols();

            //Console.WriteLine("---------- H1:");
            //H1.Display(Console.Out);


            List <Nonterminal> egnfVars = new List <Nonterminal>();
            Dictionary <Nonterminal, List <Production> > egnfProds = new Dictionary <Nonterminal, List <Production> >();
            Stack <Nonterminal>   egnfStack   = new Stack <Nonterminal>();
            HashSet <Nonterminal> egnfVisited = new HashSet <Nonterminal>();
            egnfStack.Push(H1.startSymbol);
            egnfVisited.Add(H1.startSymbol);
            egnfVars.Add(H1.startSymbol);
            egnfProds[H1.startSymbol] = new List <Production>();

            #region eliminate temp start symbols and produce the EGNF form
            while (egnfStack.Count > 0)
            {
                var A = egnfStack.Pop();
                List <Production> A_prods = egnfProds[A];
                foreach (Production p in H1.productionMap[A])
                {
                    if (!(p.First is Nonterminal) || p.IsUnit)
                    {
                        A_prods.Add(p);
                        foreach (Nonterminal x in p.GetVariables())
                        {
                            if (egnfVisited.Add(x))
                            {
                                egnfStack.Push(x);
                                egnfVars.Add(x);
                                egnfProds[x] = new List <Production>();
                            }
                        }
                    }
                    else
                    {
                        Nonterminal S_B = (Nonterminal)p.First; //here we know that S_B is a temp start symbol
                        foreach (Production t in H1.productionMap[S_B])
                        {
                            int             k   = t.Rhs.Length;
                            GrammarSymbol[] rhs = new GrammarSymbol[k + p.Rhs.Length - 1];
                            for (int i = 0; i < k; i++)
                            {
                                rhs[i] = t.Rhs[i];
                            }
                            for (int i = 1; i < p.Rhs.Length; i++)
                            {
                                rhs[k + i - 1] = p.Rhs[i];
                            }
                            Production q = new Production(A, rhs);
                            A_prods.Add(q);
                            foreach (Nonterminal x in q.GetVariables())
                            {
                                if (egnfVisited.Add(x))
                                {
                                    egnfStack.Push(x);
                                    egnfVars.Add(x);
                                    egnfProds[x] = new List <Production>();
                                }
                            }
                        }
                    }
                }
            }
            #endregion

            ContextFreeGrammar egnf = new ContextFreeGrammar(egnfVars, H1.startSymbol, egnfProds);
            return(egnf);
        }
        public static Tuple <int, IEnumerable <String> > gradeWordsInGrammar(ContextFreeGrammar grammar, IEnumerable <String> wordsIn, IEnumerable <String> wordsOut, int maxGrade)
        {
            int    cases     = 0;
            double correct   = 0;
            var    terminals = new List <char>();

            foreach (GrammarSymbol s in grammar.GetNonVariableSymbols())
            {
                terminals.Add(s.ToString()[0]);
            }
            List <String> feedback = new List <String>();

            HashSet <String> done = new HashSet <String>(); //for duplicate checking

            foreach (String w in wordsIn)
            {
                cases++;
                //handle duplicates
                if (done.Contains(w))
                {
                    feedback.Add(String.Format("The word \"{0}\" was used more than once!", w));
                    continue;
                }
                else
                {
                    done.Add(w);
                }

                int prefixLength = GrammarUtilities.longestPrefixLength(grammar, w);

                if (prefixLength < 0)
                {
                    correct++;                   //correct
                }
                else //wrong
                {
                    feedback.Add(String.Format("The word \"{0}\" isn't in the language of the grammar! (hint: the word '{1}' is still possible prefix)", w, w.Substring(0, prefixLength)));
                }
            }
            foreach (String w in wordsOut)
            {
                cases++;
                //handle duplicates
                if (done.Contains(w))
                {
                    feedback.Add(String.Format("The word \"{0}\" was used more than once!", w));
                    continue;
                }
                else
                {
                    done.Add(w);
                }

                if (!GrammarUtilities.isWordInGrammar(grammar, w)) //correct
                {
                    //only useful terminals?
                    bool allUsefull = true;
                    char problem    = 'a';
                    foreach (char c in w)
                    {
                        if (!terminals.Contains(c))
                        {
                            allUsefull = false;
                            problem    = c;
                            break;
                        }
                    }

                    if (allUsefull)
                    {
                        correct += 1;             //full points
                    }
                    else //only half the points
                    {
                        correct += 0.5;
                        feedback.Add(String.Format("The word \"{0}\" uses the symbol '{1}' that is not part of the alphabet...", w, problem));
                    }
                }
                else   //wrong
                {
                    feedback.Add(String.Format("The word \"{0}\" is in the language of the grammar!", w));
                }
            }

            int grade = (int)Math.Floor(correct * maxGrade / (double)cases);

            //all correct?
            if (grade == maxGrade)
            {
                feedback.Add("Correct!");
            }

            return(Tuple.Create(grade, (IEnumerable <String>)feedback));
        }
        /// <summary>
        /// Return all useful nonterminal symbols. If checkBackwardsOnly is true, assume that all symbols are reachable from the start symbol.
        /// </summary>
        public HashSet <string> GetUsefulNonterminals(bool checkBackwardsOnly)
        {
            HashSet <Nonterminal> useful_backwards = new HashSet <Nonterminal>();

            //Lemma 4.1, p. 88, Hopcroft-Ullman
            #region backward reachability
            var variableNodeMap = new Dictionary <Nonterminal, VariableNode>();
            foreach (Nonterminal v in this.variables)
            {
                variableNodeMap[v] = new VariableNode();
            }


            List <ProductionNode> productionLeaves = new List <ProductionNode>();

            foreach (Nonterminal v in this.variables)
            {
                VariableNode parent = variableNodeMap[v];
                foreach (Production p in this.productionMap[v])
                {
                    var            children = Array.ConvertAll(new List <Nonterminal>(p.GetVariables()).ToArray(), w => variableNodeMap[w]);
                    ProductionNode pn       = new ProductionNode(parent, children);
                    if (children.Length == 0)
                    {
                        productionLeaves.Add(pn);
                    }
                    else
                    {
                        foreach (VariableNode child in children)
                        {
                            child.parents.Add(pn);
                        }
                    }
                }
            }

            foreach (ProductionNode leaf in productionLeaves)
            {
                leaf.PropagateMark();
            }

            foreach (Nonterminal v in this.variables)
            {
                if (variableNodeMap[v].isMarked)
                {
                    useful_backwards.Add(v);
                }
            }
            #endregion

            //returns the empty set because the language is empty
            if (!useful_backwards.Contains(this.startSymbol))
            {
                return(new HashSet <string>());
            }

            //don't bother to check forward
            if (checkBackwardsOnly)
            {
                var res = new HashSet <string>();
                foreach (var nt in useful_backwards)
                {
                    res.Add(nt.Name);
                }
                return(res);
            }

            ContextFreeGrammar g1 = this.RestrictToVariables(useful_backwards);

            HashSet <Nonterminal> useful_forwards = new HashSet <Nonterminal>();

            //Lemma 4.2, p. 89, Hopcroft-Ullman
            #region forward reachability
            Stack <Nonterminal> stack = new Stack <Nonterminal>();
            stack.Push(g1.StartSymbol);
            useful_forwards.Add(g1.StartSymbol);

            while (stack.Count > 0)
            {
                Nonterminal v = stack.Pop();
                foreach (Production p in g1.GetProductions(v))
                {
                    foreach (Nonterminal u in p.GetVariables())
                    {
                        if (!useful_forwards.Contains(u))
                        {
                            useful_forwards.Add(u);
                            stack.Push(u);
                        }
                    }
                }
            }

            #endregion

            HashSet <string> usefulSymbols = new HashSet <string>();
            foreach (var nt in useful_forwards)
            {
                if (useful_backwards.Contains(nt))
                {
                    usefulSymbols.Add(nt.Name);
                }
            }
            return(usefulSymbols);
        }