public Production(Nonterminal lhs, GrammarSymbol[] rhsButLast, GrammarSymbol last)
 {
     this.Lhs = lhs;
     this.Rhs = new GrammarSymbol[rhsButLast.Length + 1];
     Array.Copy(rhsButLast, this.Rhs, rhsButLast.Length);
     this.Rhs[rhsButLast.Length] = last;
 }
        IEnumerable <Production> EliminateNullables(Nonterminal v, HashSet <Nonterminal> nullables)
        {
            /*foreach (var p in this.productionMap[v])
             * {
             *  if (p.Rhs.Length == 0)
             *      yield break;
             *
             *  foreach (var symbols in EnumerateNullableFreeVariations(ConsList<GrammarSymbol>.Create(p.Rhs), nullables))
             *      if (symbols != null) //ignore the case when all nullables were replaced
             *          yield return new Production(v, symbols.ToArray());
             * }*/
            var res = new Dictionary <string, Production>();

            foreach (var p in this.productionMap[v])
            {
                if (p.Rhs.Length == 0)
                {
                    continue;
                }

                foreach (var symbols in EnumerateNullableFreeVariations(ConsList <GrammarSymbol> .Create(p.Rhs), nullables))
                {
                    if (symbols != null)
                    {//ignore the case when all nullables were replaced
                        var newProd = new Production(v, symbols.ToArray());
                        res[newProd.Description] = newProd;
                    }
                }
            }
            return(res.Values);
        }
Ejemplo n.º 3
0
 private GrammarParser(Lexer lex, Func <char, T> mkExprinal)
 {
     lexer           = lex;
     this.mkExprinal = mkExprinal;
     startvar        = null;
     productions     = new List <Production>();
 }
        static IEnumerable <ConsList <GrammarSymbol> > EnumerateNullableFreeVariations(ConsList <GrammarSymbol> symbols, HashSet <Nonterminal> nullables)
        {
            if (symbols == null)
            {
                yield return(null);
            }
            else
            {
                foreach (var rest in EnumerateNullableFreeVariations(symbols.Rest, nullables))
                {
                    GrammarSymbol first    = symbols.First;
                    Nonterminal   variable = first as Nonterminal;
                    if (variable == null || !nullables.Contains(variable))
                    {
                        yield return(new ConsList <GrammarSymbol>(first, rest));
                    }
                    else
                    {
                        yield return(rest);

                        yield return(new ConsList <GrammarSymbol>(first, rest));
                    }
                }
            }
        }
        static Nonterminal Lookup(Dictionary <Nonterminal, Nonterminal> vars, Nonterminal key, ref int nonterminalID)
        {
            Nonterminal v;

            if (vars.TryGetValue(key, out v))
            {
                return(v);
            }
            v         = new Nonterminal(nonterminalID++);
            vars[key] = v;
            return(v);
        }
        public static HashSet <GrammarSymbol[]> findAllDerivations(IEnumerable <Production> productions, GrammarSymbol[] partialWord, int type = DERIVATION_ALL)
        {
            var result = new HashSet <GrammarSymbol[]>(comparator);

            //find first and last NT
            int first_NT = -1;
            int last_NT  = -1;

            for (int i = 0; i < partialWord.Length; i++)
            {
                if (partialWord[i] is Nonterminal && first_NT == -1)
                {
                    first_NT = i;
                }
                if (partialWord[i] is Nonterminal)
                {
                    last_NT = i;
                }
            }

            //find all next steps
            for (int sym_i = 0; sym_i < partialWord.Length; sym_i++)
            {
                if (!(partialWord[sym_i] is Nonterminal))
                {
                    continue;                                       //not a NT
                }
                if (type == DERIVATION_LEFTMOST && sym_i != first_NT)
                {
                    continue;                                                   //should be leftmost derivatation
                }
                if (type == DERIVATION_RIGHTMOST && sym_i != last_NT)
                {
                    continue;                                                   //should be rightmost derivation
                }
                Nonterminal nt = (Nonterminal)partialWord[sym_i];
                foreach (Production p in productions)
                {
                    if (!p.Lhs.Equals(nt))
                    {
                        continue;
                    }

                    //build new partial word
                    var npw = applyPrduction(partialWord, sym_i, p);

                    result.Add(npw);
                }
            }

            return(result);
        }
        /// <summary>
        /// Genereates warnings for useless variables.
        /// </summary>
        /// <param name="g">the grammar</param>
        /// <returns></returns>
        public static List <string> getGrammarWarnings(ContextFreeGrammar g)
        {
            List <string>    res       = new List <string>();
            HashSet <string> variables = new HashSet <string>();

            foreach (var n in g.Variables)
            {
                variables.Add(n.ToString());
            }

            var productiv   = g.GetUsefulNonterminals(true);
            var unproductiv = variables.Except(productiv);

            if (unproductiv.Count() > 0)
            {
                res.Add(string.Format("Warning: There are unproductive variables! ({0})", string.Join(", ", unproductiv)));
            }

            var reachable = new HashSet <string>();
            //Lemma 4.2, p. 89, Hopcroft-Ullman
            Stack <Nonterminal> stack = new Stack <Nonterminal>();

            stack.Push(g.StartSymbol);
            reachable.Add(g.StartSymbol.ToString());
            while (stack.Count > 0)
            {
                Nonterminal v = stack.Pop();
                foreach (Production p in g.GetProductions(v))
                {
                    foreach (Nonterminal u in p.GetVariables())
                    {
                        if (!reachable.Contains(u.ToString()))
                        {
                            reachable.Add(u.ToString());
                            stack.Push(u);
                        }
                    }
                }
            }
            var unreachable = variables.Except(reachable);

            if (unproductiv.Count() > 0)
            {
                res.Add(string.Format("Warning: There are unreachable variables! ({0})", string.Join(", ", unreachable)));
            }

            return(res);
        }
        /// <summary>
        /// Generates a CFG that accepts the prefix closure of a given grammar.
        /// </summary>
        /// <param name="g">the original grammar</param>
        /// <returns>the prefix closure</returns>
        public static ContextFreeGrammar getPrefixClosure(ContextFreeGrammar g)
        {
            Func <Nonterminal, Nonterminal> prefixFor = delegate(Nonterminal x)
            {
                return(new Nonterminal(x.Name + "PREFIX"));
            };

            if (g == null)
            {
                return(g);
            }
            if (!g.IsInCNF())
            {
                g = getEquivalentCNF(g);
            }
            if (g == null)
            {
                return(g);
            }
            Nonterminal prefixStart       = prefixFor(g.StartSymbol);
            var         prefixProductions = new List <Production>();

            foreach (Production p in g.GetProductions())
            {
                //add original
                prefixProductions.Add(p);

                Nonterminal prefixNT = prefixFor(p.Lhs);
                if (p.Rhs.Length == 2) // case:  X->AB      ==>     X' ->A' | AB'
                {
                    prefixProductions.Add(new Production(prefixNT, new GrammarSymbol[] { p.Rhs[0], prefixFor((Nonterminal)p.Rhs[1]) }));
                    prefixProductions.Add(new Production(prefixNT, new GrammarSymbol[] { prefixFor((Nonterminal)p.Rhs[0]) }));
                }
                else // case:  X->a   ==>    X'->a
                {
                    prefixProductions.Add(new Production(prefixNT, new GrammarSymbol[] { p.Rhs[0] }));
                }
            }

            var res = new ContextFreeGrammar(prefixStart, prefixProductions);

            res.setAcceptanceForEmptyString(true);

            return(res);
        }
        private string DescribeProductions(Nonterminal v)
        {
            StringBuilder sb = new StringBuilder();

            sb.Append(v.Name);
            List <Production> prods = productionMap[v];

            if (prods.Count > 0)
            {
                sb.Append(" -> ");
                sb.Append(prods[0].DescriptionOfRhs);
                for (int i = 1; i < prods.Count; i++)
                {
                    sb.Append(" | ");
                    sb.Append(prods[i].DescriptionOfRhs);
                }
            }
            return(sb.ToString());
        }
        public ContextFreeGrammar(Nonterminal startSymbol, IEnumerable <Production> productions)
        {
            HashSet <Nonterminal> vars     = new HashSet <Nonterminal>();
            List <Nonterminal>    varsList = new List <Nonterminal>();
            bool startSymbolExisted        = false;

            foreach (Production p in productions)
            {
                if (p.Lhs.Equals(startSymbol))
                {
                    startSymbolExisted = true;
                }
                if (vars.Add(p.Lhs))
                {
                    varsList.Add(p.Lhs);
                }
                foreach (Nonterminal v in p.GetVariables())
                {
                    if (vars.Add(v))
                    {
                        varsList.Add(v);
                    }
                }
            }
            if (!startSymbolExisted)
            {
                throw new ArgumentException("Start symbol is not used as the LHS of any production.");
            }

            this.variables   = varsList;
            this.startSymbol = startSymbol;
            var prodMap = new Dictionary <Nonterminal, List <Production> >();

            foreach (Nonterminal v in varsList)
            {
                prodMap.Add(v, new List <Production>());
            }
            foreach (Production p in productions)
            {
                prodMap[p.Lhs].Add(p);
            }
            this.productionMap = prodMap;
        }
        HashSet <Nonterminal> GetUnitClosure(Nonterminal v)
        {
            HashSet <Nonterminal> res = new HashSet <Nonterminal>();

            res.Add(v);
            Stack <Nonterminal> stack = new Stack <Nonterminal>();

            stack.Push(v);
            while (stack.Count > 0)
            {
                foreach (Production p in productionMap[stack.Pop()])
                {
                    if (p.IsUnit)
                    {
                        if (res.Add((Nonterminal)p.First)) //p.First is a new variable that is added to res
                        {
                            stack.Push((Nonterminal)p.First);
                        }
                    }
                }
            }
            return(res);
        }
Ejemplo n.º 12
0
        private void Parse()
        {
            bool  done = false;
            Token cur  = null;
            Token last = null;

            Nonterminal curlhs = new Nonterminal(ExpectNT().content);

            startvar = curlhs;

            ExpectArrow();
            List <GrammarSymbol> currhs = new List <GrammarSymbol>();

            last = cur;
            cur  = lexer.Next();
            while (!done)
            {
                switch (cur.t)
                {
                case TokenType.NT:
                    currhs.Add(new Nonterminal(cur.content));
                    last = cur;
                    cur  = lexer.Next();
                    break;

                case TokenType.T:
                    currhs.Add(new Exprinal <T>(mkExprinal(cur.content[0]), cur.content));
                    last = cur;
                    cur  = lexer.Next();
                    break;

                case TokenType.OR:
                    productions.Add(new Production(curlhs, currhs.ToArray()));
                    currhs.Clear();
                    last = cur;
                    cur  = lexer.Next();
                    break;
                //case TokenType.ARR:
                //    if (currhs.Count < 1)
                //    {
                //        throw new ParseException(string.Format("A production cannot start with an arrow... ({0})", generateLocationString(cur)));
                //    }
                //    if (last.t != TokenType.NT)
                //    {
                //        throw new ParseException(string.Format("On the left hand side of every arrow has to be a Nonterminal... ({0})", generateLocationString(cur)));
                //    }

                //    Nonterminal newlhs = (Nonterminal)currhs[currhs.Count - 1];
                //    currhs.RemoveAt(currhs.Count - 1);
                //    productions.Add(new Production(curlhs, currhs.ToArray()));
                //    currhs.Clear();
                //    curlhs = newlhs;
                //    break;
                case TokenType.NEL:
                    productions.Add(new Production(curlhs, currhs.ToArray()));
                    currhs.Clear();
                    last = cur;
                    cur  = ExpectNT();
                    if (cur.t == TokenType.NT)
                    {
                        curlhs = new Nonterminal(cur.content);
                        ExpectArrow();
                        last = cur;
                        cur  = lexer.Next();
                    }
                    break;

                case TokenType.EOS:
                    productions.Add(new Production(curlhs, currhs.ToArray()));
                    currhs.Clear();
                    done = true;
                    break;

                default:
                    throw new ParseException(string.Format("The grammar couldn't be parsed. Please check the syntax... ({0})", generateLocationString(cur)));
                }
            }
        }
        public static Tuple <int, IEnumerable <String> > gradeCYK(ContextFreeGrammar grammar, String word, HashSet <Nonterminal>[][] attempt, int maxGrade, int feedbackLevel)
        {
            List <String> feedback = new List <String>();

            int  n = word.Length;
            int  checked_length    = 0;
            var  sol               = GrammarUtilities.cyk(grammar, word);
            bool all_correct_sofar = true;

            for (int len = 1; len <= n; len++)
            {
                for (int start = 0; start + len <= n; start++)
                {
                    HashSet <Nonterminal> must = sol[len - 1][start].Item1;
                    HashSet <Nonterminal> was  = attempt[len - 1][start];

                    Nonterminal missingExample = null;
                    Production  missingApplicableProduction = null;
                    int         missing        = 0;
                    Nonterminal tooMuchExample = null;
                    int         tooMuch        = 0;

                    //check if all must are present
                    foreach (Nonterminal nt in must)
                    {
                        if (!was.Contains(nt))
                        {
                            missing++;
                            all_correct_sofar = false;

                            //save as example and look for corresponding applicable production for hint
                            if (missingApplicableProduction != null)
                            {
                                continue;                                      //not needed: already found example
                            }
                            missingExample = nt;
                            foreach (var applicable in sol[len - 1][start].Item2)
                            {
                                if (applicable.Item1.Lhs.Equals(nt))
                                {
                                    missingApplicableProduction = applicable.Item1;
                                    break;
                                }
                            }
                        }
                    }

                    //check if all given are correct
                    foreach (Nonterminal nt in was)
                    {
                        if (!must.Contains(nt))
                        {
                            tooMuchExample = nt;
                            tooMuch++;
                            all_correct_sofar = false;
                        }
                    }

                    //feedback
                    String fieldName = String.Format("({0},{1})", start + 1, start + len);
                    if (feedbackLevel >= 2)
                    {
                        if (missing != 0)
                        {
                            feedback.Add(String.Format("You are missing some nonterminals in field {0} e.g. {1}", fieldName, missingExample));
                        }
                        if (tooMuch != 0)
                        {
                            feedback.Add(String.Format("There are nonterminals in field {0} that don't belong there... e.g. {1}", fieldName, tooMuchExample));
                        }
                    }
                    else if (feedbackLevel >= 1)
                    {
                        if (missing != 0)
                        {
                            feedback.Add(String.Format("You are missing some nonterminals in field {0}... (hint: The production \"{1}\" is applicable.)", fieldName, missingApplicableProduction));
                        }
                        if (tooMuch != 0)
                        {
                            feedback.Add(String.Format("There are nonterminals in field {0} that don't belong there...", fieldName));
                        }
                    }
                    else
                    {
                        if (missing != 0)
                        {
                            feedback.Add(String.Format("You are missing some nonterminals in field {0}...", fieldName));
                        }
                        if (tooMuch != 0)
                        {
                            feedback.Add(String.Format("There are nonterminals in field {0} that don't belong there...", fieldName));
                        }
                    }
                }

                if (!all_correct_sofar)
                {
                    break;
                }
                checked_length = len;
            }

            //grade
            int grade = (int)Math.Floor(checked_length * maxGrade / (double)n);

            //all correct?
            if (feedback.Count == 0)
            {
                feedback.Add("Correct!");
            }

            return(Tuple.Create(grade, (IEnumerable <String>)feedback));
        }
        /// <summary>
        /// Produces the EGNF (Extended Greibach Normal Form) for the grammar g.
        /// Implements a variation of the Blum-Koch algorithm.
        /// (Inf. and Comp. vol.150, pp.112-118, 1999)
        /// </summary>
        /// <param name="g">the grammar to be normalized</param>
        /// <param name="removeEpsilonsAndUselessSymbols">if true, first removes epsilons and useless symbols, otherwise assumes that epsilons do not occur</param>
        /// <returns>Extended Greibach Normal Form of g</returns>
        public static ContextFreeGrammar MkEGNF(ContextFreeGrammar g, bool removeEpsilonsAndUselessSymbols)
        {
            if (removeEpsilonsAndUselessSymbols)
            {
                g = g.RemoveEpsilonsAndUselessSymbols();
            }

            if (g.IsInGNF())
            {
                return(g);
            }

            var leavesP = new List <Production>();
            var revP    = new Dictionary <Nonterminal, List <Pair <GrammarSymbol[], Nonterminal> > >();

            int nonterminalID = 0;

            #region compute leavesP and revP
            foreach (Nonterminal v in g.variables)
            {
                revP[v] = new List <Pair <GrammarSymbol[], Nonterminal> >();
            }

            foreach (Production p in g.GetProductions())
            {
                if (!(p.First is Nonterminal))
                {
                    leavesP.Add(p);
                }
                else
                {
                    revP[(Nonterminal)p.First].Add(new Pair <GrammarSymbol[], Nonterminal>(p.Rest, p.Lhs));
                }
            }
            #endregion

            var W           = new Dictionary <Nonterminal, HashSet <Nonterminal> >();
            var startSymbol = new Dictionary <Nonterminal, Nonterminal>();

            #region create new start symbols and compute unit closures
            foreach (Nonterminal v in g.variables)
            {
                W[v]           = g.GetUnitClosure(v);
                startSymbol[v] = new Nonterminal(nonterminalID++);
            }
            #endregion

            var P = new Dictionary <Nonterminal, List <Production> >();

            #region construct intermediate productions in P for each variable B
            foreach (Nonterminal B in g.variables)
            {
                var S_B  = startSymbol[B];
                var W_B  = W[B]; //unit closure of B
                var Bvar = new Dictionary <Nonterminal, Nonterminal>();
                Stack <Nonterminal>   stack   = new Stack <Nonterminal>();
                HashSet <Nonterminal> visited = new HashSet <Nonterminal>();
                var S_B_list = new List <Production>();
                P[S_B] = S_B_list;
                foreach (Production p in leavesP)
                {
                    S_B_list.Add(new Production(S_B, p.Rhs, Lookup(Bvar, p.Lhs, ref nonterminalID)));
                    if (visited.Add(p.Lhs))
                    {
                        stack.Push(p.Lhs);
                    }
                    if (W_B.Contains(p.Lhs))
                    {
                        S_B_list.Add(new Production(S_B, p.Rhs));
                    }
                }

                while (stack.Count > 0)
                {
                    Nonterminal       C   = stack.Pop();
                    Nonterminal       C_B = Lookup(Bvar, C, ref nonterminalID);
                    List <Production> C_B_list;
                    if (!P.TryGetValue(C_B, out C_B_list))
                    {
                        C_B_list = new List <Production>();
                        P[C_B]   = C_B_list;
                    }
                    foreach (var t in revP[C])
                    {
                        Nonterminal D   = t.Second;
                        Nonterminal D_B = Lookup(Bvar, D, ref nonterminalID);
                        C_B_list.Add(new Production(C_B, t.First, D_B));
                        if (t.First.Length > 0 && W_B.Contains(D))
                        {
                            C_B_list.Add(new Production(C_B, t.First));
                        }
                        if (visited.Add(D))
                        {
                            stack.Push(D);
                        }
                    }
                }
            }
            #endregion


            //produce the union of P and g.productionMap in H
            //and replace each production 'A ::= B alpha' by 'A ::= S_B alpha"

            var Hprods = new Dictionary <Nonterminal, List <Production> >();
            #region compute Hprods
            foreach (Nonterminal A in g.variables)
            {
                var A_prods = new List <Production>();
                Hprods[A] = A_prods;
                foreach (Production p in g.productionMap[A])
                {
                    if (p.First is Nonterminal && !p.IsUnit)
                    {
                        GrammarSymbol[] rhs = new GrammarSymbol[p.Rhs.Length];
                        rhs[0] = startSymbol[(Nonterminal)p.First];
                        Array.Copy(p.Rhs, 1, rhs, 1, rhs.Length - 1);
                        Production q = new Production(p.Lhs, rhs);
                        A_prods.Add(q);
                    }
                    else
                    {
                        A_prods.Add(p);
                    }
                }
            }
            foreach (Nonterminal A in P.Keys)
            {
                var A_prods = new List <Production>();
                Hprods[A] = A_prods;
                foreach (Production p in P[A])
                {
                    if (p.First is Nonterminal && !p.IsUnit)
                    {
                        GrammarSymbol[] rhs = new GrammarSymbol[p.Rhs.Length];
                        rhs[0] = startSymbol[(Nonterminal)p.First];
                        Array.Copy(p.Rhs, 1, rhs, 1, rhs.Length - 1);
                        Production q = new Production(p.Lhs, rhs);
                        A_prods.Add(q);
                    }
                    else
                    {
                        A_prods.Add(p);
                    }
                }
            }
            #endregion
            ContextFreeGrammar H = new ContextFreeGrammar(new List <Nonterminal>(Hprods.Keys), g.startSymbol, Hprods);

            //Console.WriteLine("--------- H:");
            //H.Display(Console.Out);

            //eliminate useless symbols from H
            //this may dramatically decrease the number of productions
            ContextFreeGrammar H1 = H.RemoveUselessSymbols();

            //Console.WriteLine("---------- H1:");
            //H1.Display(Console.Out);


            List <Nonterminal> egnfVars = new List <Nonterminal>();
            Dictionary <Nonterminal, List <Production> > egnfProds = new Dictionary <Nonterminal, List <Production> >();
            Stack <Nonterminal>   egnfStack   = new Stack <Nonterminal>();
            HashSet <Nonterminal> egnfVisited = new HashSet <Nonterminal>();
            egnfStack.Push(H1.startSymbol);
            egnfVisited.Add(H1.startSymbol);
            egnfVars.Add(H1.startSymbol);
            egnfProds[H1.startSymbol] = new List <Production>();

            #region eliminate temp start symbols and produce the EGNF form
            while (egnfStack.Count > 0)
            {
                var A = egnfStack.Pop();
                List <Production> A_prods = egnfProds[A];
                foreach (Production p in H1.productionMap[A])
                {
                    if (!(p.First is Nonterminal) || p.IsUnit)
                    {
                        A_prods.Add(p);
                        foreach (Nonterminal x in p.GetVariables())
                        {
                            if (egnfVisited.Add(x))
                            {
                                egnfStack.Push(x);
                                egnfVars.Add(x);
                                egnfProds[x] = new List <Production>();
                            }
                        }
                    }
                    else
                    {
                        Nonterminal S_B = (Nonterminal)p.First; //here we know that S_B is a temp start symbol
                        foreach (Production t in H1.productionMap[S_B])
                        {
                            int             k   = t.Rhs.Length;
                            GrammarSymbol[] rhs = new GrammarSymbol[k + p.Rhs.Length - 1];
                            for (int i = 0; i < k; i++)
                            {
                                rhs[i] = t.Rhs[i];
                            }
                            for (int i = 1; i < p.Rhs.Length; i++)
                            {
                                rhs[k + i - 1] = p.Rhs[i];
                            }
                            Production q = new Production(A, rhs);
                            A_prods.Add(q);
                            foreach (Nonterminal x in q.GetVariables())
                            {
                                if (egnfVisited.Add(x))
                                {
                                    egnfStack.Push(x);
                                    egnfVars.Add(x);
                                    egnfProds[x] = new List <Production>();
                                }
                            }
                        }
                    }
                }
            }
            #endregion

            ContextFreeGrammar egnf = new ContextFreeGrammar(egnfVars, H1.startSymbol, egnfProds);
            return(egnf);
        }
        /// <summary>
        /// Produces the GNF (Greibach Normal Form) for the grammar g.
        /// If g is not already in GNF, first makes CNF.
        /// Implements a variation of the Koch-Blum algorithm. (STACS 97, pp. 47-54)
        /// </summary>
        /// <param name="g"></param>
        /// <param name="removeEpsilonsUselessSymbolsUnitsProductions"></param>
        /// <returns></returns>
        public static ContextFreeGrammar MkGNF(ContextFreeGrammar g, bool removeEpsilonsUselessSymbolsUnitsProductions)
        {
            if (removeEpsilonsUselessSymbolsUnitsProductions)
            {
                g = g.RemoveEpsilonsAndUselessSymbols().RemoveUnitProductions();
            }
            if (g.IsInGNF())
            {
                return(g);
            }

            ContextFreeGrammar cnf = MkCNF(g, false);
            var Vars = cnf.variables;

            int nonterminalID = 0;

            var M = new Dictionary <Nonterminal, Automaton <GrammarSymbol> >();

            #region construct the automata M[B] for all variables B
            int id            = 0;
            var initStateMap  = new Dictionary <Nonterminal, int>();
            var finalStateMap = new Dictionary <Nonterminal, int>();
            foreach (Nonterminal B in Vars)
            {
                initStateMap[B]  = id++;
                finalStateMap[B] = id++;
            }

            var movesOfM = new Dictionary <Nonterminal, List <Move <GrammarSymbol> > >();

            foreach (Nonterminal B in Vars)
            {
                movesOfM[B] = new List <Move <GrammarSymbol> >();
            }

            #region construct the moves of the automata
            foreach (Nonterminal B in Vars)
            {
                var variableToStateMap    = new Dictionary <Nonterminal, int>();
                Stack <Nonterminal> stack = new Stack <Nonterminal>();
                stack.Push(B);
                int initState = initStateMap[B];
                variableToStateMap[B] = finalStateMap[B];
                while (stack.Count > 0)
                {
                    Nonterminal C = stack.Pop();
                    foreach (Production p in cnf.GetProductions(C))
                    {
                        if (p.IsSingleExprinal)
                        {
                            movesOfM[B].Add(Move <GrammarSymbol> .Create(initState, variableToStateMap[C], p.First));
                        }
                        else
                        {
                            Nonterminal D = (Nonterminal)p.First; //using the fact that the grammar is in CNF
                            if (!variableToStateMap.ContainsKey(D))
                            {
                                //visit all variables reachable that have not already been visited
                                variableToStateMap.Add(D, id++);
                                stack.Push(D);
                            }
                            GrammarSymbol E = p.Rhs[1];
                            movesOfM[B].Add(Move <GrammarSymbol> .Create(variableToStateMap[D], variableToStateMap[C], E));
                        }
                    }
                }
            }
            #endregion

            foreach (Nonterminal B in Vars)
            {
                M[B] = Automaton <GrammarSymbol> .Create(initStateMap[B], new int[] { finalStateMap[B] }, movesOfM[B]);
            }
            #endregion

            var G_ = new Dictionary <Nonterminal, ContextFreeGrammar>();

            #region construct corresponding intermediate grammars G_[B] corresponding to M[B]
            foreach (Nonterminal B in Vars)
            {
                var         MB = M[B];
                bool        MBfinalStateHasVariableMoves = FinalStateHasVariableMoves(MB);
                var         productions = new Dictionary <Nonterminal, List <Production> >();
                Nonterminal startSymbol = new Nonterminal(nonterminalID++);
                var         vars        = new List <Nonterminal>();
                vars.Add(startSymbol);
                productions[startSymbol] = new List <Production>();

                foreach (var move in MB.GetMovesFrom(MB.InitialState))
                {
                    if (move.TargetState == MB.FinalState)
                    {
                        productions[startSymbol].Add(new Production(startSymbol, move.Label));
                    }
                    if (move.TargetState != MB.FinalState || MBfinalStateHasVariableMoves)
                    {
                        var C = new Nonterminal("Q" + move.TargetState);
                        productions[startSymbol].Add(new Production(startSymbol, move.Label, C));
                        if (!productions.ContainsKey(C))
                        {
                            productions[C] = new List <Production>();
                            vars.Add(C);
                        }
                    }
                }

                foreach (int state in MB.States)
                {
                    if (state != MB.InitialState)
                    {
                        foreach (Move <GrammarSymbol> move in MB.GetMovesFrom(state))
                        {
                            Nonterminal D = new Nonterminal("Q" + state);
                            Nonterminal C = new Nonterminal("Q" + move.TargetState);
                            if (!productions.ContainsKey(D))
                            {
                                productions[D] = new List <Production>();
                                vars.Add(D);
                            }
                            Nonterminal E = (Nonterminal)move.Label;
                            if (move.TargetState == MB.FinalState)
                            {
                                productions[D].Add(new Production(D, E));
                            }
                            if (move.TargetState != MB.FinalState || MBfinalStateHasVariableMoves)
                            {
                                productions[D].Add(new Production(D, E, C));
                                //we pretend here that E is a terminal
                                if (!productions.ContainsKey(C))
                                {
                                    productions[C] = new List <Production>();
                                    vars.Add(C);
                                }
                            }
                        }
                    }
                }
                G_[B] = new ContextFreeGrammar(vars, startSymbol, productions);
            }
            #endregion

            var G = new Dictionary <Nonterminal, ContextFreeGrammar>();

            #region construct the corresponding temporary G[B]'s
            foreach (Nonterminal B in Vars)
            {
                var G_B         = G_[B];
                var productions = new Dictionary <Nonterminal, List <Production> >();
                //var vars = new List<Variable>();
                Nonterminal startSymbol = G_B.startSymbol;
                productions[startSymbol] = G_B.productionMap[startSymbol];
                foreach (Nonterminal D in G_B.variables)
                {
                    if (!D.Equals(startSymbol))
                    {
                        var productions_D = new List <Production>();
                        productions[D] = productions_D;
                        foreach (Production p in G_B.productionMap[D])
                        {
                            Nonterminal E   = (Nonterminal)p.First;
                            var         G_E = G_[E];
                            if (p.IsUnit)
                            {
                                foreach (Production q in G_E.productionMap[G_E.startSymbol])
                                {
                                    productions_D.Add(new Production(D, q.Rhs));
                                }
                            }
                            else
                            {
                                foreach (Production q in G_E.productionMap[G_E.startSymbol])
                                {
                                    GrammarSymbol[] symbols = new GrammarSymbol[q.Rhs.Length + 1];
                                    Array.Copy(q.Rhs, symbols, q.Rhs.Length);
                                    symbols[q.Rhs.Length] = p.Rhs[1];
                                    productions_D.Add(new Production(D, symbols));
                                }
                            }
                        }
                    }
                }
                //ignore the variable list, it is not used
                G[B] = new ContextFreeGrammar(null, startSymbol, productions);
            }
            #endregion

            #region construct the final GNF from the G[B]'s
            var productionsGNF = new List <Production>();
            foreach (Nonterminal A in cnf.variables)
            {
                foreach (Production p in cnf.productionMap[A])
                {
                    if (p.IsSingleExprinal)
                    {
                        productionsGNF.Add(p);
                    }
                    else
                    {
                        Nonterminal B  = (Nonterminal)p.Rhs[0];
                        Nonterminal C  = (Nonterminal)p.Rhs[1];
                        var         GB = G[B];
                        foreach (Production q in GB.productionMap[GB.startSymbol])
                        {
                            GrammarSymbol[] symbols = new GrammarSymbol[q.Rhs.Length + 1];
                            Array.Copy(q.Rhs, symbols, q.Rhs.Length);
                            symbols[q.Rhs.Length] = C;
                            productionsGNF.Add(new Production(A, symbols));
                        }
                    }
                }
            }
            foreach (Nonterminal B in Vars)
            {
                var GB = G[B];
                foreach (var kv in GB.productionMap)
                {
                    if (!kv.Key.Equals(GB.startSymbol))
                    {
                        productionsGNF.AddRange(kv.Value);
                    }
                }
            }
            #endregion

            ContextFreeGrammar gnf = new ContextFreeGrammar(cnf.startSymbol, productionsGNF);
            return(gnf);
        }
        /// <summary>
        /// Return all useful nonterminal symbols. If checkBackwardsOnly is true, assume that all symbols are reachable from the start symbol.
        /// </summary>
        public HashSet <string> GetUsefulNonterminals(bool checkBackwardsOnly)
        {
            HashSet <Nonterminal> useful_backwards = new HashSet <Nonterminal>();

            //Lemma 4.1, p. 88, Hopcroft-Ullman
            #region backward reachability
            var variableNodeMap = new Dictionary <Nonterminal, VariableNode>();
            foreach (Nonterminal v in this.variables)
            {
                variableNodeMap[v] = new VariableNode();
            }


            List <ProductionNode> productionLeaves = new List <ProductionNode>();

            foreach (Nonterminal v in this.variables)
            {
                VariableNode parent = variableNodeMap[v];
                foreach (Production p in this.productionMap[v])
                {
                    var            children = Array.ConvertAll(new List <Nonterminal>(p.GetVariables()).ToArray(), w => variableNodeMap[w]);
                    ProductionNode pn       = new ProductionNode(parent, children);
                    if (children.Length == 0)
                    {
                        productionLeaves.Add(pn);
                    }
                    else
                    {
                        foreach (VariableNode child in children)
                        {
                            child.parents.Add(pn);
                        }
                    }
                }
            }

            foreach (ProductionNode leaf in productionLeaves)
            {
                leaf.PropagateMark();
            }

            foreach (Nonterminal v in this.variables)
            {
                if (variableNodeMap[v].isMarked)
                {
                    useful_backwards.Add(v);
                }
            }
            #endregion

            //returns the empty set because the language is empty
            if (!useful_backwards.Contains(this.startSymbol))
            {
                return(new HashSet <string>());
            }

            //don't bother to check forward
            if (checkBackwardsOnly)
            {
                var res = new HashSet <string>();
                foreach (var nt in useful_backwards)
                {
                    res.Add(nt.Name);
                }
                return(res);
            }

            ContextFreeGrammar g1 = this.RestrictToVariables(useful_backwards);

            HashSet <Nonterminal> useful_forwards = new HashSet <Nonterminal>();

            //Lemma 4.2, p. 89, Hopcroft-Ullman
            #region forward reachability
            Stack <Nonterminal> stack = new Stack <Nonterminal>();
            stack.Push(g1.StartSymbol);
            useful_forwards.Add(g1.StartSymbol);

            while (stack.Count > 0)
            {
                Nonterminal v = stack.Pop();
                foreach (Production p in g1.GetProductions(v))
                {
                    foreach (Nonterminal u in p.GetVariables())
                    {
                        if (!useful_forwards.Contains(u))
                        {
                            useful_forwards.Add(u);
                            stack.Push(u);
                        }
                    }
                }
            }

            #endregion

            HashSet <string> usefulSymbols = new HashSet <string>();
            foreach (var nt in useful_forwards)
            {
                if (useful_backwards.Contains(nt))
                {
                    usefulSymbols.Add(nt.Name);
                }
            }
            return(usefulSymbols);
        }
        /// <summary>
        /// Produces the CNF (Chomsky Normal Form) for the grammar g.
        /// It first eliminates epsilons, useless symbols, and unit productions.
        /// If Assumes that there are no epsilons, useless symbols or unit productions
        /// </summary>
        public static ContextFreeGrammar MkCNF(ContextFreeGrammar g, bool removeEpsilonsUselessSymbolsUnitsProductions)
        {
            if (removeEpsilonsUselessSymbolsUnitsProductions)
            {
                g = g.RemoveEpsilonsAndUselessSymbols();
                g = g.RemoveUnitProductions();
            }
            var productions = new Dictionary <Nonterminal, List <Production> >();
            List <Nonterminal> variables = new List <Nonterminal>(g.variables);

            foreach (Nonterminal v in g.variables)
            {
                productions[v] = new List <Production>();
            }

            int nonterminalID = 0;

            //Implements algo in Theorem 4.5, page 92-93, in Hopcroft-Ullman

            #region make productions of the form V --> V0...Vn or V --> a
            var freshVarMap = new Dictionary <GrammarSymbol, Nonterminal>();
            foreach (Nonterminal v in g.variables)
            {
                foreach (Production p in g.productionMap[v])
                {
                    if (p.ContainsNoExprinals || p.IsCNF)
                    {
                        productions[v].Add(p);
                    }
                    else
                    {
                        GrammarSymbol[] rhs = new GrammarSymbol[p.Rhs.Length];
                        for (int i = 0; i < rhs.Length; i++)
                        {
                            if (p.Rhs[i] is Nonterminal)
                            {
                                rhs[i] = p.Rhs[i];
                            }
                            else
                            {
                                Nonterminal u;
                                if (!freshVarMap.TryGetValue(p.Rhs[i], out u))
                                {
                                    u = new Nonterminal(nonterminalID++);
                                    freshVarMap[p.Rhs[i]] = u;
                                    variables.Add(u);
                                    var prods = new List <Production>();
                                    prods.Add(new Production(u, p.Rhs[i]));
                                    productions[u] = prods;
                                }
                                rhs[i] = u;
                            }
                        }
                        productions[v].Add(new Production(v, rhs));
                    }
                }
            }
            #endregion


            var productionsCNF = new Dictionary <Nonterminal, List <Production> >();
            List <Nonterminal> variablesCNF = new List <Nonterminal>(variables);
            foreach (Nonterminal v in variablesCNF)
            {
                productionsCNF[v] = new List <Production>();
            }

            #region replace V --> V0V1...Vn (n > 2), by V --> V0U0, U0 --> V1U1, ..., Un-2 --> Vn-1Vn
            foreach (Nonterminal v in variables)
            {
                foreach (Production p in productions[v])
                {
                    if (p.IsCNF)
                    {
                        productionsCNF[v].Add(p);
                    }
                    else
                    {
                        Nonterminal x = v;
                        Nonterminal y = new Nonterminal(nonterminalID++);
                        variablesCNF.Add(y);
                        productionsCNF[y] = new List <Production>();
                        for (int i = 0; i < p.Rhs.Length - 2; i++)
                        {
                            productionsCNF[x].Add(new Production(x, p.Rhs[i], y));
                            if (i < p.Rhs.Length - 3)
                            {
                                x = y;
                                y = new Nonterminal(nonterminalID++);
                                variablesCNF.Add(y);
                                productionsCNF[y] = new List <Production>();
                            }
                        }
                        productionsCNF[y].Add(new Production(y, p.Rhs[p.Rhs.Length - 2], p.Rhs[p.Rhs.Length - 1]));
                    }
                }
            }
            #endregion

            ContextFreeGrammar cnf = new ContextFreeGrammar(variablesCNF, g.startSymbol, productionsCNF);
            return(cnf);
        }
 public IList <Production> GetProductions(Nonterminal v)
 {
     return(productionMap[v]);
 }
 internal ContextFreeGrammar(List <Nonterminal> variables, Nonterminal startSymbol, Dictionary <Nonterminal, List <Production> > productionMap)
 {
     this.variables     = variables;
     this.startSymbol   = startSymbol;
     this.productionMap = productionMap;
 }
 public bool IsValidVariable(Nonterminal v)
 {
     return(productionMap.ContainsKey(v));
 }
        private static HashSet <string> generateWordsWithLength(ContextFreeGrammar cnf, int length, Dictionary <Nonterminal, Dictionary <int, HashSet <string> > > dp)
        {
            HashSet <string> res = new HashSet <string>();

            if (cnf == null)
            {
                return(res); //empty grammar -> can't generate any words
            }
            if (length == 0) //case: length = 0
            {
                if (cnf.acceptsEmptyString())
                {
                    res.Add("");
                }
            }
            else if (length == 1) //case: length = 1
            {
                foreach (Nonterminal nt in cnf.Variables)
                {
                    //init dp[nt]
                    Dictionary <int, HashSet <string> > curDP = new Dictionary <int, HashSet <string> >();
                    dp.Add(nt, curDP);

                    //find words of length 1
                    HashSet <string> l = new HashSet <string>();
                    foreach (Production p in cnf.GetProductions(nt))
                    {
                        if (p.IsSingleExprinal)
                        {
                            l.Add(p.Rhs[0].ToString());
                        }
                    }
                    curDP.Add(1, l);
                    if (nt.Equals(cnf.StartSymbol))
                    {
                        res = l;
                    }
                }
            }
            else //case: length > 1
            {
                foreach (KeyValuePair <Nonterminal, Dictionary <int, HashSet <string> > > entry in dp)
                {
                    Nonterminal cur = entry.Key;
                    Dictionary <int, HashSet <string> > curDP = entry.Value;
                    HashSet <string> curSet = new HashSet <string>();
                    curDP.Add(length, curSet);
                    if (cur.Equals(cnf.StartSymbol))
                    {
                        res = curSet;
                    }

                    foreach (Production p in cnf.GetProductions(entry.Key))
                    {
                        if (p.Rhs.Length != 2)
                        {
                            continue;                    //ignore productions that don't have form X->AB
                        }
                        Nonterminal left = (Nonterminal)p.Rhs[0];
                        Dictionary <int, HashSet <string> > leftDP = null;
                        dp.TryGetValue(left, out leftDP);

                        Nonterminal right = (Nonterminal)p.Rhs[1];
                        Dictionary <int, HashSet <string> > rightDP = null;
                        dp.TryGetValue(right, out rightDP);

                        for (int leftPart = 1; leftPart < length; leftPart++)
                        {
                            int rightPart = length - leftPart;

                            HashSet <string> leftPossibilities = null;
                            leftDP.TryGetValue(leftPart, out leftPossibilities);
                            HashSet <string> rightPossibilities = null;
                            rightDP.TryGetValue(rightPart, out rightPossibilities);

                            foreach (string leftString in leftPossibilities)
                            {
                                foreach (string rightString in rightPossibilities)
                                {
                                    curSet.Add(leftString + rightString);
                                }
                            }
                        }
                    }
                }
            }

            return(res);
        }
 public Production(Nonterminal lhs, params GrammarSymbol[] rhs)
 {
     this.Lhs = lhs;
     this.Rhs = rhs;
 }
        /// <summary>
        /// Removes useless symbols from the grammar.
        /// Assumes that the language is nonempty.
        /// </summary>
        public ContextFreeGrammar RemoveUselessSymbols()
        {
            HashSet <Nonterminal> useful_backwards = new HashSet <Nonterminal>();

            //Lemma 4.1, p. 88, Hopcroft-Ullman
            #region backward reachability
            var variableNodeMap = new Dictionary <Nonterminal, VariableNode>();
            foreach (Nonterminal v in this.variables)
            {
                variableNodeMap[v] = new VariableNode();
            }


            List <ProductionNode> productionLeaves = new List <ProductionNode>();

            foreach (Nonterminal v in this.variables)
            {
                VariableNode parent = variableNodeMap[v];
                foreach (Production p in this.productionMap[v])
                {
                    var            children = Array.ConvertAll(new List <Nonterminal>(p.GetVariables()).ToArray(), w => variableNodeMap[w]);
                    ProductionNode pn       = new ProductionNode(parent, children);
                    if (children.Length == 0)
                    {
                        productionLeaves.Add(pn);
                    }
                    else
                    {
                        foreach (VariableNode child in children)
                        {
                            child.parents.Add(pn);
                        }
                    }
                }
            }

            foreach (ProductionNode leaf in productionLeaves)
            {
                leaf.PropagateMark();
            }

            foreach (Nonterminal v in this.variables)
            {
                if (variableNodeMap[v].isMarked)
                {
                    useful_backwards.Add(v);
                }
            }
            #endregion

            if (!useful_backwards.Contains(this.startSymbol))
            {
                throw new AutomataException(AutomataExceptionKind.LanguageOfGrammarIsEmpty);
            }

            ContextFreeGrammar g1 = this.RestrictToVariables(useful_backwards);

            HashSet <Nonterminal> useful_forwards = new HashSet <Nonterminal>();

            //Lemma 4.2, p. 89, Hopcroft-Ullman
            #region forward reachability
            Stack <Nonterminal> stack = new Stack <Nonterminal>();
            stack.Push(g1.StartSymbol);
            useful_forwards.Add(g1.StartSymbol);

            while (stack.Count > 0)
            {
                Nonterminal v = stack.Pop();
                foreach (Production p in g1.GetProductions(v))
                {
                    foreach (Nonterminal u in p.GetVariables())
                    {
                        if (!useful_forwards.Contains(u))
                        {
                            useful_forwards.Add(u);
                            stack.Push(u);
                        }
                    }
                }
            }

            #endregion

            ContextFreeGrammar g2 = g1.RestrictToVariables(useful_forwards);

            return(g2);
        }