/// <summary> /// Produces the EGNF (Extended Greibach Normal Form) for the grammar g. /// Implements a variation of the Blum-Koch algorithm. /// (Inf. and Comp. vol.150, pp.112-118, 1999) /// </summary> /// <param name="g">the grammar to be normalized</param> /// <param name="removeEpsilonsAndUselessSymbols">if true, first removes epsilons and useless symbols, otherwise assumes that epsilons do not occur</param> /// <returns>Extended Greibach Normal Form of g</returns> public static ContextFreeGrammar MkEGNF(ContextFreeGrammar g, bool removeEpsilonsAndUselessSymbols) { if (removeEpsilonsAndUselessSymbols) { g = g.RemoveEpsilonsAndUselessSymbols(); } if (g.IsInGNF()) { return(g); } var leavesP = new List <Production>(); var revP = new Dictionary <Nonterminal, List <Pair <GrammarSymbol[], Nonterminal> > >(); int nonterminalID = 0; #region compute leavesP and revP foreach (Nonterminal v in g.variables) { revP[v] = new List <Pair <GrammarSymbol[], Nonterminal> >(); } foreach (Production p in g.GetProductions()) { if (!(p.First is Nonterminal)) { leavesP.Add(p); } else { revP[(Nonterminal)p.First].Add(new Pair <GrammarSymbol[], Nonterminal>(p.Rest, p.Lhs)); } } #endregion var W = new Dictionary <Nonterminal, HashSet <Nonterminal> >(); var startSymbol = new Dictionary <Nonterminal, Nonterminal>(); #region create new start symbols and compute unit closures foreach (Nonterminal v in g.variables) { W[v] = g.GetUnitClosure(v); startSymbol[v] = new Nonterminal(nonterminalID++); } #endregion var P = new Dictionary <Nonterminal, List <Production> >(); #region construct intermediate productions in P for each variable B foreach (Nonterminal B in g.variables) { var S_B = startSymbol[B]; var W_B = W[B]; //unit closure of B var Bvar = new Dictionary <Nonterminal, Nonterminal>(); Stack <Nonterminal> stack = new Stack <Nonterminal>(); HashSet <Nonterminal> visited = new HashSet <Nonterminal>(); var S_B_list = new List <Production>(); P[S_B] = S_B_list; foreach (Production p in leavesP) { S_B_list.Add(new Production(S_B, p.Rhs, Lookup(Bvar, p.Lhs, ref nonterminalID))); if (visited.Add(p.Lhs)) { stack.Push(p.Lhs); } if (W_B.Contains(p.Lhs)) { S_B_list.Add(new Production(S_B, p.Rhs)); } } while (stack.Count > 0) { Nonterminal C = stack.Pop(); Nonterminal C_B = Lookup(Bvar, C, ref nonterminalID); List <Production> C_B_list; if (!P.TryGetValue(C_B, out C_B_list)) { C_B_list = new List <Production>(); P[C_B] = C_B_list; } foreach (var t in revP[C]) { Nonterminal D = t.Second; Nonterminal D_B = Lookup(Bvar, D, ref nonterminalID); C_B_list.Add(new Production(C_B, t.First, D_B)); if (t.First.Length > 0 && W_B.Contains(D)) { C_B_list.Add(new Production(C_B, t.First)); } if (visited.Add(D)) { stack.Push(D); } } } } #endregion //produce the union of P and g.productionMap in H //and replace each production 'A ::= B alpha' by 'A ::= S_B alpha" var Hprods = new Dictionary <Nonterminal, List <Production> >(); #region compute Hprods foreach (Nonterminal A in g.variables) { var A_prods = new List <Production>(); Hprods[A] = A_prods; foreach (Production p in g.productionMap[A]) { if (p.First is Nonterminal && !p.IsUnit) { GrammarSymbol[] rhs = new GrammarSymbol[p.Rhs.Length]; rhs[0] = startSymbol[(Nonterminal)p.First]; Array.Copy(p.Rhs, 1, rhs, 1, rhs.Length - 1); Production q = new Production(p.Lhs, rhs); A_prods.Add(q); } else { A_prods.Add(p); } } } foreach (Nonterminal A in P.Keys) { var A_prods = new List <Production>(); Hprods[A] = A_prods; foreach (Production p in P[A]) { if (p.First is Nonterminal && !p.IsUnit) { GrammarSymbol[] rhs = new GrammarSymbol[p.Rhs.Length]; rhs[0] = startSymbol[(Nonterminal)p.First]; Array.Copy(p.Rhs, 1, rhs, 1, rhs.Length - 1); Production q = new Production(p.Lhs, rhs); A_prods.Add(q); } else { A_prods.Add(p); } } } #endregion ContextFreeGrammar H = new ContextFreeGrammar(new List <Nonterminal>(Hprods.Keys), g.startSymbol, Hprods); //Console.WriteLine("--------- H:"); //H.Display(Console.Out); //eliminate useless symbols from H //this may dramatically decrease the number of productions ContextFreeGrammar H1 = H.RemoveUselessSymbols(); //Console.WriteLine("---------- H1:"); //H1.Display(Console.Out); List <Nonterminal> egnfVars = new List <Nonterminal>(); Dictionary <Nonterminal, List <Production> > egnfProds = new Dictionary <Nonterminal, List <Production> >(); Stack <Nonterminal> egnfStack = new Stack <Nonterminal>(); HashSet <Nonterminal> egnfVisited = new HashSet <Nonterminal>(); egnfStack.Push(H1.startSymbol); egnfVisited.Add(H1.startSymbol); egnfVars.Add(H1.startSymbol); egnfProds[H1.startSymbol] = new List <Production>(); #region eliminate temp start symbols and produce the EGNF form while (egnfStack.Count > 0) { var A = egnfStack.Pop(); List <Production> A_prods = egnfProds[A]; foreach (Production p in H1.productionMap[A]) { if (!(p.First is Nonterminal) || p.IsUnit) { A_prods.Add(p); foreach (Nonterminal x in p.GetVariables()) { if (egnfVisited.Add(x)) { egnfStack.Push(x); egnfVars.Add(x); egnfProds[x] = new List <Production>(); } } } else { Nonterminal S_B = (Nonterminal)p.First; //here we know that S_B is a temp start symbol foreach (Production t in H1.productionMap[S_B]) { int k = t.Rhs.Length; GrammarSymbol[] rhs = new GrammarSymbol[k + p.Rhs.Length - 1]; for (int i = 0; i < k; i++) { rhs[i] = t.Rhs[i]; } for (int i = 1; i < p.Rhs.Length; i++) { rhs[k + i - 1] = p.Rhs[i]; } Production q = new Production(A, rhs); A_prods.Add(q); foreach (Nonterminal x in q.GetVariables()) { if (egnfVisited.Add(x)) { egnfStack.Push(x); egnfVars.Add(x); egnfProds[x] = new List <Production>(); } } } } } } #endregion ContextFreeGrammar egnf = new ContextFreeGrammar(egnfVars, H1.startSymbol, egnfProds); return(egnf); }