public Production(Nonterminal lhs, GrammarSymbol[] rhsButLast, GrammarSymbol last) { this.Lhs = lhs; this.Rhs = new GrammarSymbol[rhsButLast.Length + 1]; Array.Copy(rhsButLast, this.Rhs, rhsButLast.Length); this.Rhs[rhsButLast.Length] = last; }
static IEnumerable <ConsList <GrammarSymbol> > EnumerateNullableFreeVariations(ConsList <GrammarSymbol> symbols, HashSet <Nonterminal> nullables) { if (symbols == null) { yield return(null); } else { foreach (var rest in EnumerateNullableFreeVariations(symbols.Rest, nullables)) { GrammarSymbol first = symbols.First; Nonterminal variable = first as Nonterminal; if (variable == null || !nullables.Contains(variable)) { yield return(new ConsList <GrammarSymbol>(first, rest)); } else { yield return(rest); yield return(new ConsList <GrammarSymbol>(first, rest)); } } } }
internal bool RhsContainsSymbol(GrammarSymbol symbol) { foreach (GrammarSymbol s in Rhs) { if (s.Equals(symbol)) { return(true); } } return(false); }
public Production(Nonterminal lhs, GrammarSymbol[] rhsButLast, GrammarSymbol last) { if (lhs == null || last == null || Array.Exists(rhsButLast, x => x == null)) { throw new ArgumentNullException(); } this.Lhs = lhs; this.Rhs = new GrammarSymbol[rhsButLast.Length + 1]; Array.Copy(rhsButLast, this.Rhs, rhsButLast.Length); this.Rhs[rhsButLast.Length] = last; }
/// <summary> /// Produces the GNF (Greibach Normal Form) for the grammar g. /// If g is not already in GNF, first makes CNF. /// Implements a variation of the Koch-Blum algorithm. (STACS 97, pp. 47-54) /// </summary> /// <param name="g"></param> /// <param name="removeEpsilonsUselessSymbolsUnitsProductions"></param> /// <returns></returns> public static ContextFreeGrammar MkGNF(ContextFreeGrammar g, bool removeEpsilonsUselessSymbolsUnitsProductions) { if (removeEpsilonsUselessSymbolsUnitsProductions) g = g.RemoveEpsilonsAndUselessSymbols().RemoveUnitProductions(); if (g.IsInGNF()) return g; ContextFreeGrammar cnf = MkCNF(g,false); var Vars = cnf.variables; int nonterminalID = 0; var M = new Dictionary<Nonterminal, Automaton<GrammarSymbol>>(); #region construct the automata M[B] for all variables B int id = 0; var initStateMap = new Dictionary<Nonterminal, int>(); var finalStateMap = new Dictionary<Nonterminal, int>(); foreach (Nonterminal B in Vars) { initStateMap[B] = id++; finalStateMap[B] = id++; } var movesOfM = new Dictionary<Nonterminal, List<Move<GrammarSymbol>>>(); foreach (Nonterminal B in Vars) movesOfM[B] = new List<Move<GrammarSymbol>>(); #region construct the moves of the automata foreach (Nonterminal B in Vars) { var variableToStateMap = new Dictionary<Nonterminal, int>(); Stack<Nonterminal> stack = new Stack<Nonterminal>(); stack.Push(B); int initState = initStateMap[B]; variableToStateMap[B] = finalStateMap[B]; while (stack.Count > 0) { Nonterminal C = stack.Pop(); foreach (Production p in cnf.GetProductions(C)) { if (p.IsSingleExprinal) movesOfM[B].Add(Move<GrammarSymbol>.Create(initState, variableToStateMap[C], p.First)); else { Nonterminal D = (Nonterminal)p.First; //using the fact that the grammar is in CNF if (!variableToStateMap.ContainsKey(D)) { //visit all variables reachable that have not already been visited variableToStateMap.Add(D,id++); stack.Push(D); } GrammarSymbol E = p.Rhs[1]; movesOfM[B].Add(Move<GrammarSymbol>.Create(variableToStateMap[D], variableToStateMap[C], E)); } } } } #endregion foreach (Nonterminal B in Vars) M[B] = Automaton<GrammarSymbol>.Create(null, initStateMap[B], new int[] {finalStateMap[B]}, movesOfM[B]); #endregion var G_ = new Dictionary<Nonterminal, ContextFreeGrammar>(); #region construct corresponding intermediate grammars G_[B] corresponding to M[B] foreach (Nonterminal B in Vars) { var MB = M[B]; bool MBfinalStateHasVariableMoves = FinalStateHasVariableMoves(MB); var productions = new Dictionary<Nonterminal, List<Production>>(); Nonterminal startSymbol = new Nonterminal(nonterminalID++); var vars = new List<Nonterminal>(); vars.Add(startSymbol); productions[startSymbol] = new List<Production>(); foreach (var move in MB.GetMovesFrom(MB.InitialState)) { if (move.TargetState == MB.FinalState) productions[startSymbol].Add(new Production(startSymbol, move.Label)); if (move.TargetState != MB.FinalState || MBfinalStateHasVariableMoves) { var C = new Nonterminal("Q" + move.TargetState); productions[startSymbol].Add(new Production(startSymbol, move.Label, C)); if (!productions.ContainsKey(C)) { productions[C] = new List<Production>(); vars.Add(C); } } } foreach (int state in MB.States) if (state != MB.InitialState) foreach (Move<GrammarSymbol> move in MB.GetMovesFrom(state)) { Nonterminal D = new Nonterminal("Q" + state); Nonterminal C = new Nonterminal("Q" + move.TargetState); if (!productions.ContainsKey(D)) { productions[D] = new List<Production>(); vars.Add(D); } Nonterminal E = (Nonterminal)move.Label; if (move.TargetState == MB.FinalState) productions[D].Add(new Production(D, E)); if (move.TargetState != MB.FinalState || MBfinalStateHasVariableMoves) { productions[D].Add(new Production(D, E, C)); //we pretend here that E is a terminal if (!productions.ContainsKey(C)) { productions[C] = new List<Production>(); vars.Add(C); } } } G_[B] = new ContextFreeGrammar(vars, startSymbol, productions); } #endregion var G = new Dictionary<Nonterminal, ContextFreeGrammar>(); #region construct the corresponding temporary G[B]'s foreach (Nonterminal B in Vars) { var G_B = G_[B]; var productions = new Dictionary<Nonterminal, List<Production>>(); //var vars = new List<Variable>(); Nonterminal startSymbol = G_B.startSymbol; productions[startSymbol] = G_B.productionMap[startSymbol]; foreach (Nonterminal D in G_B.variables) if (!D.Equals(startSymbol)) { var productions_D = new List<Production>(); productions[D] = productions_D; foreach (Production p in G_B.productionMap[D]) { Nonterminal E = (Nonterminal)p.First; var G_E = G_[E]; if (p.IsUnit) foreach (Production q in G_E.productionMap[G_E.startSymbol]) productions_D.Add(new Production(D, q.Rhs)); else foreach (Production q in G_E.productionMap[G_E.startSymbol]) { GrammarSymbol[] symbols = new GrammarSymbol[q.Rhs.Length + 1]; Array.Copy(q.Rhs, symbols, q.Rhs.Length); symbols[q.Rhs.Length] = p.Rhs[1]; productions_D.Add(new Production(D, symbols)); } } } //ignore the variable list, it is not used G[B] = new ContextFreeGrammar(null, startSymbol, productions); } #endregion #region construct the final GNF from the G[B]'s var productionsGNF = new List<Production>(); foreach (Nonterminal A in cnf.variables) { foreach (Production p in cnf.productionMap[A]) { if (p.IsSingleExprinal) productionsGNF.Add(p); else { Nonterminal B = (Nonterminal)p.Rhs[0]; Nonterminal C = (Nonterminal)p.Rhs[1]; var GB = G[B]; foreach (Production q in GB.productionMap[GB.startSymbol]) { GrammarSymbol[] symbols = new GrammarSymbol[q.Rhs.Length + 1]; Array.Copy(q.Rhs, symbols, q.Rhs.Length); symbols[q.Rhs.Length] = C; productionsGNF.Add(new Production(A, symbols)); } } } } foreach (Nonterminal B in Vars) { var GB = G[B]; foreach (var kv in GB.productionMap) if (!kv.Key.Equals(GB.startSymbol)) productionsGNF.AddRange(kv.Value); } #endregion ContextFreeGrammar gnf = new ContextFreeGrammar(cnf.startSymbol, productionsGNF); return gnf; }
/// <summary> /// Produces the EGNF (Extended Greibach Normal Form) for the grammar g. /// Implements a variation of the Blum-Koch algorithm. /// (Inf. and Comp. vol.150, pp.112-118, 1999) /// </summary> /// <param name="g">the grammar to be normalized</param> /// <param name="removeEpsilonsAndUselessSymbols">if true, first removes epsilons and useless symbols, otherwise assumes that epsilons do not occur</param> /// <returns>Extended Greibach Normal Form of g</returns> public static ContextFreeGrammar MkEGNF(ContextFreeGrammar g, bool removeEpsilonsAndUselessSymbols) { if (removeEpsilonsAndUselessSymbols) g = g.RemoveEpsilonsAndUselessSymbols(); if (g.IsInGNF()) return g; var leavesP = new List<Production>(); var revP = new Dictionary<Nonterminal, List<Pair<GrammarSymbol[], Nonterminal>>>(); int nonterminalID = 0; #region compute leavesP and revP foreach (Nonterminal v in g.variables) revP[v] = new List<Pair<GrammarSymbol[], Nonterminal>>(); foreach (Production p in g.GetProductions()) if (!(p.First is Nonterminal)) leavesP.Add(p); else revP[(Nonterminal)p.First].Add(new Pair<GrammarSymbol[], Nonterminal>(p.Rest, p.Lhs)); #endregion var W = new Dictionary<Nonterminal, HashSet<Nonterminal>>(); var startSymbol = new Dictionary<Nonterminal, Nonterminal>(); #region create new start symbols and compute unit closures foreach (Nonterminal v in g.variables) { W[v] = g.GetUnitClosure(v); startSymbol[v] = new Nonterminal(nonterminalID++); } #endregion var P = new Dictionary<Nonterminal, List<Production>>(); #region construct intermediate productions in P for each variable B foreach (Nonterminal B in g.variables) { var S_B = startSymbol[B]; var W_B = W[B]; //unit closure of B var Bvar = new Dictionary<Nonterminal, Nonterminal>(); Stack<Nonterminal> stack = new Stack<Nonterminal>(); HashSet<Nonterminal> visited = new HashSet<Nonterminal>(); var S_B_list = new List<Production>(); P[S_B] = S_B_list; foreach (Production p in leavesP) { S_B_list.Add(new Production(S_B, p.Rhs, Lookup(Bvar, p.Lhs, ref nonterminalID))); if (visited.Add(p.Lhs)) stack.Push(p.Lhs); if (W_B.Contains(p.Lhs)) S_B_list.Add(new Production(S_B, p.Rhs)); } while (stack.Count > 0) { Nonterminal C = stack.Pop(); Nonterminal C_B = Lookup(Bvar, C, ref nonterminalID); List<Production> C_B_list; if (!P.TryGetValue(C_B, out C_B_list)) { C_B_list = new List<Production>(); P[C_B] = C_B_list; } foreach (var t in revP[C]) { Nonterminal D = t.Second; Nonterminal D_B = Lookup(Bvar, D, ref nonterminalID); C_B_list.Add(new Production(C_B, t.First, D_B)); if (t.First.Length > 0 && W_B.Contains(D)) C_B_list.Add(new Production(C_B, t.First)); if (visited.Add(D)) stack.Push(D); } } } #endregion //produce the union of P and g.productionMap in H //and replace each production 'A ::= B alpha' by 'A ::= S_B alpha" var Hprods = new Dictionary<Nonterminal, List<Production>>(); #region compute Hprods foreach (Nonterminal A in g.variables) { var A_prods = new List<Production>(); Hprods[A] = A_prods; foreach (Production p in g.productionMap[A]) { if (p.First is Nonterminal && !p.IsUnit) { GrammarSymbol[] rhs = new GrammarSymbol[p.Rhs.Length]; rhs[0] = startSymbol[(Nonterminal)p.First]; Array.Copy(p.Rhs, 1, rhs, 1, rhs.Length - 1); Production q = new Production(p.Lhs, rhs); A_prods.Add(q); } else A_prods.Add(p); } } foreach (Nonterminal A in P.Keys) { var A_prods = new List<Production>(); Hprods[A] = A_prods; foreach (Production p in P[A]) { if (p.First is Nonterminal && !p.IsUnit) { GrammarSymbol[] rhs = new GrammarSymbol[p.Rhs.Length]; rhs[0] = startSymbol[(Nonterminal)p.First]; Array.Copy(p.Rhs, 1, rhs, 1, rhs.Length - 1); Production q = new Production(p.Lhs, rhs); A_prods.Add(q); } else A_prods.Add(p); } } #endregion ContextFreeGrammar H = new ContextFreeGrammar(new List<Nonterminal>(Hprods.Keys), g.startSymbol, Hprods); //Console.WriteLine("--------- H:"); //H.Display(Console.Out); //eliminate useless symbols from H //this may dramatically decrease the number of productions ContextFreeGrammar H1 = H.RemoveUselessSymbols(); //Console.WriteLine("---------- H1:"); //H1.Display(Console.Out); List<Nonterminal> egnfVars = new List<Nonterminal>(); Dictionary<Nonterminal, List<Production>> egnfProds = new Dictionary<Nonterminal, List<Production>>(); Stack<Nonterminal> egnfStack = new Stack<Nonterminal>(); HashSet<Nonterminal> egnfVisited = new HashSet<Nonterminal>(); egnfStack.Push(H1.startSymbol); egnfVisited.Add(H1.startSymbol); egnfVars.Add(H1.startSymbol); egnfProds[H1.startSymbol] = new List<Production>(); #region eliminate temp start symbols and produce the EGNF form while (egnfStack.Count > 0) { var A = egnfStack.Pop(); List<Production> A_prods = egnfProds[A]; foreach (Production p in H1.productionMap[A]) { if (!(p.First is Nonterminal) || p.IsUnit) { A_prods.Add(p); foreach (Nonterminal x in p.GetVariables()) if (egnfVisited.Add(x)) { egnfStack.Push(x); egnfVars.Add(x); egnfProds[x] = new List<Production>(); } } else { Nonterminal S_B = (Nonterminal)p.First; //here we know that S_B is a temp start symbol foreach (Production t in H1.productionMap[S_B]) { int k = t.Rhs.Length; GrammarSymbol[] rhs = new GrammarSymbol[k + p.Rhs.Length - 1]; for (int i = 0; i < k; i++) rhs[i] = t.Rhs[i]; for (int i = 1; i < p.Rhs.Length; i++) rhs[k + i - 1] = p.Rhs[i]; Production q = new Production(A, rhs); A_prods.Add(q); foreach (Nonterminal x in q.GetVariables()) if (egnfVisited.Add(x)) { egnfStack.Push(x); egnfVars.Add(x); egnfProds[x] = new List<Production>(); } } } } } #endregion ContextFreeGrammar egnf = new ContextFreeGrammar(egnfVars, H1.startSymbol, egnfProds); return egnf; }
/// <summary> /// Produces the CNF (Chomsky Normal Form) for the grammar g. /// It first eliminates epsilons, useless symbols, and unit productions. /// If Assumes that there are no epsilons, useless symbols or unit productions /// </summary> public static ContextFreeGrammar MkCNF(ContextFreeGrammar g, bool removeEpsilonsUselessSymbolsUnitsProductions) { if (removeEpsilonsUselessSymbolsUnitsProductions) g = g.RemoveEpsilonsAndUselessSymbols().RemoveUnitProductions(); var productions = new Dictionary<Nonterminal, List<Production>>(); List<Nonterminal> variables = new List<Nonterminal>(g.variables); foreach (Nonterminal v in g.variables) productions[v] = new List<Production>(); int nonterminalID = 0; //Implements algo in Theorem 4.5, page 92-93, in Hopcroft-Ullman #region make productions of the form V --> V0...Vn or V --> a var freshVarMap = new Dictionary<GrammarSymbol, Nonterminal>(); foreach (Nonterminal v in g.variables) foreach (Production p in g.productionMap[v]) if (p.ContainsNoExprinals || p.IsCNF) productions[v].Add(p); else { GrammarSymbol[] rhs = new GrammarSymbol[p.Rhs.Length]; for (int i = 0; i < rhs.Length; i++) { if (p.Rhs[i] is Nonterminal) rhs[i] = p.Rhs[i]; else { Nonterminal u; if (!freshVarMap.TryGetValue(p.Rhs[i], out u)) { u = new Nonterminal(nonterminalID++); freshVarMap[p.Rhs[i]] = u; variables.Add(u); var prods = new List<Production>(); prods.Add(new Production(u, p.Rhs[i])); productions[u] = prods; } rhs[i] = u; } } productions[v].Add(new Production(v, rhs)); } #endregion var productionsCNF = new Dictionary<Nonterminal, List<Production>>(); List<Nonterminal> variablesCNF = new List<Nonterminal>(variables); foreach (Nonterminal v in variablesCNF) productionsCNF[v] = new List<Production>(); #region replace V --> V0V1...Vn (n > 2), by V --> V0U0, U0 --> V1U1, ..., Un-2 --> Vn-1Vn foreach (Nonterminal v in variables) foreach (Production p in productions[v]) if (p.IsCNF) productionsCNF[v].Add(p); else { Nonterminal x = v; Nonterminal y = new Nonterminal(nonterminalID++); variablesCNF.Add(y); productionsCNF[y] = new List<Production>(); for (int i = 0; i < p.Rhs.Length - 2; i++) { productionsCNF[x].Add(new Production(x, p.Rhs[i], y)); if (i < p.Rhs.Length - 3) { x = y; y = new Nonterminal(nonterminalID++); variablesCNF.Add(y); productionsCNF[y] = new List<Production>(); } } productionsCNF[y].Add(new Production(y, p.Rhs[p.Rhs.Length - 2], p.Rhs[p.Rhs.Length - 1])); } #endregion ContextFreeGrammar cnf = new ContextFreeGrammar(variablesCNF, g.startSymbol, productionsCNF); return cnf; }
private void Parse() { bool done = false; Token cur = null; Token last = null; Grammars.Nonterminal curlhs = ExpectNT(); startvar = curlhs; ExpectArrow(); List <Grammars.GrammarSymbol> currhs = new List <Grammars.GrammarSymbol>(); while (!done) { last = cur; cur = lexer.Next(); switch (cur.t) { case TokenType.NT: currhs.Add(Grammars.Nonterminal.CreateByParser(cur.content)); break; case TokenType.T: { GrammarSymbol[] symbs; if (!terminalMap.TryGetValue(cur.content, out symbs)) { var aut = parseRegex(cur.content); #region parse this terminal-regex as an automaton and compute symbs or set symbs to top nonterminal int seq_length = -1; if (aut.IsEpsilon) { symbs = new GrammarSymbol[] { }; } else if (aut.InitialStateIsSource && aut.HasSingleFinalSink && aut.MoveCount == 1) { //just a single terminal var move = aut.GetMoveFrom(aut.InitialState); symbs = new GrammarSymbol[] { new Terminal <T>(move.Label) }; } else if (aut.CheckIfSequence(out seq_length) && aut.HasSingleFinalSink && aut.IsEpsilonFree) { //collect all the elements and map them to individual terminals //inline the automaton as sequence of terminals symbs = new GrammarSymbol[seq_length]; int q = aut.InitialState; int i = 0; while (!aut.IsFinalState(q)) { var move = aut.GetMoveFrom(q); q = move.TargetState; symbs[i] = new Terminal <T>(move.Label); i += 1; } } else { //introduce new nonterminal for the automaton int id = __regexId++; var nt = Nonterminal.MkNonterminalForRegex(id); parsedRegexes[nt] = aut; symbs = new GrammarSymbol[] { nt }; } terminalMap[cur.content] = symbs; #endregion } currhs.AddRange(symbs); //--- break; } case TokenType.OR: productions.Add(new Grammars.Production(curlhs, currhs.ToArray())); currhs.Clear(); break; case TokenType.ARR: if (currhs.Count < 1) { throw new ParseException(); } if (last.t != TokenType.NT) { throw new ParseException(); } // downcast :( Grammars.Nonterminal newlhs = (Grammars.Nonterminal)currhs[currhs.Count - 1]; currhs.RemoveAt(currhs.Count - 1); productions.Add(new Grammars.Production(curlhs, currhs.ToArray())); currhs.Clear(); curlhs = newlhs; break; case TokenType.EOS: productions.Add(new Grammars.Production(curlhs, currhs.ToArray())); currhs.Clear(); done = true; break; default: throw new ParseException(); } } }
/// <summary> /// Produces the EGNF (Extended Greibach Normal Form) for the grammar g. /// Implements a variation of the Blum-Koch algorithm. /// (Inf. and Comp. vol.150, pp.112-118, 1999) /// </summary> /// <param name="g">the grammar to be normalized</param> /// <param name="removeEpsilonsAndUselessSymbols">if true, first removes epsilons and useless symbols, otherwise assumes that epsilons do not occur</param> /// <returns>Extended Greibach Normal Form of g</returns> public static ContextFreeGrammar MkEGNF(ContextFreeGrammar g, bool removeEpsilonsAndUselessSymbols) { if (removeEpsilonsAndUselessSymbols) { g = g.RemoveEpsilonsAndUselessSymbols(); } if (g.IsInGNF()) { return(g); } var leavesP = new List <Production>(); var revP = new Dictionary <Nonterminal, List <Tuple <GrammarSymbol[], Nonterminal> > >(); int nonterminalID = 0; #region compute leavesP and revP foreach (Nonterminal v in g.variables) { revP[v] = new List <Tuple <GrammarSymbol[], Nonterminal> >(); } foreach (Production p in g.GetProductions()) { if (!(p.First is Nonterminal)) { leavesP.Add(p); } else { revP[(Nonterminal)p.First].Add(new Tuple <GrammarSymbol[], Nonterminal>(p.Rest, p.Lhs)); } } #endregion var W = new Dictionary <Nonterminal, HashSet <Nonterminal> >(); var startSymbol = new Dictionary <Nonterminal, Nonterminal>(); #region create new start symbols and compute unit closures foreach (Nonterminal v in g.variables) { W[v] = g.GetUnitClosure(v); startSymbol[v] = new Nonterminal(nonterminalID++); } #endregion var P = new Dictionary <Nonterminal, List <Production> >(); #region construct intermediate productions in P for each variable B foreach (Nonterminal B in g.variables) { var S_B = startSymbol[B]; var W_B = W[B]; //unit closure of B var Bvar = new Dictionary <Nonterminal, Nonterminal>(); Stack <Nonterminal> stack = new Stack <Nonterminal>(); HashSet <Nonterminal> visited = new HashSet <Nonterminal>(); var S_B_list = new List <Production>(); P[S_B] = S_B_list; foreach (Production p in leavesP) { S_B_list.Add(new Production(S_B, p.Rhs, Lookup(Bvar, p.Lhs, ref nonterminalID))); if (visited.Add(p.Lhs)) { stack.Push(p.Lhs); } if (W_B.Contains(p.Lhs)) { S_B_list.Add(new Production(S_B, p.Rhs)); } } while (stack.Count > 0) { Nonterminal C = stack.Pop(); Nonterminal C_B = Lookup(Bvar, C, ref nonterminalID); List <Production> C_B_list; if (!P.TryGetValue(C_B, out C_B_list)) { C_B_list = new List <Production>(); P[C_B] = C_B_list; } foreach (var t in revP[C]) { Nonterminal D = t.Item2; Nonterminal D_B = Lookup(Bvar, D, ref nonterminalID); C_B_list.Add(new Production(C_B, t.Item1, D_B)); if (t.Item1.Length > 0 && W_B.Contains(D)) { C_B_list.Add(new Production(C_B, t.Item1)); } if (visited.Add(D)) { stack.Push(D); } } } } #endregion //produce the union of P and g.productionMap in H //and replace each production 'A ::= B alpha' by 'A ::= S_B alpha" var Hprods = new Dictionary <Nonterminal, List <Production> >(); #region compute Hprods foreach (Nonterminal A in g.variables) { var A_prods = new List <Production>(); Hprods[A] = A_prods; foreach (Production p in g.productionMap[A]) { if (p.First is Nonterminal && !p.IsUnit) { GrammarSymbol[] rhs = new GrammarSymbol[p.Rhs.Length]; rhs[0] = startSymbol[(Nonterminal)p.First]; Array.Copy(p.Rhs, 1, rhs, 1, rhs.Length - 1); Production q = new Production(p.Lhs, rhs); A_prods.Add(q); } else { A_prods.Add(p); } } } foreach (Nonterminal A in P.Keys) { var A_prods = new List <Production>(); Hprods[A] = A_prods; foreach (Production p in P[A]) { if (p.First is Nonterminal && !p.IsUnit) { GrammarSymbol[] rhs = new GrammarSymbol[p.Rhs.Length]; rhs[0] = startSymbol[(Nonterminal)p.First]; Array.Copy(p.Rhs, 1, rhs, 1, rhs.Length - 1); Production q = new Production(p.Lhs, rhs); A_prods.Add(q); } else { A_prods.Add(p); } } } #endregion ContextFreeGrammar H = new ContextFreeGrammar(new List <Nonterminal>(Hprods.Keys), g.startSymbol, Hprods); //Console.WriteLine("--------- H:"); //H.Display(Console.Out); //eliminate useless symbols from H //this may dramatically decrease the number of productions ContextFreeGrammar H1 = H.RemoveUselessSymbols(); //Console.WriteLine("---------- H1:"); //H1.Display(Console.Out); List <Nonterminal> egnfVars = new List <Nonterminal>(); Dictionary <Nonterminal, List <Production> > egnfProds = new Dictionary <Nonterminal, List <Production> >(); Stack <Nonterminal> egnfStack = new Stack <Nonterminal>(); HashSet <Nonterminal> egnfVisited = new HashSet <Nonterminal>(); egnfStack.Push(H1.startSymbol); egnfVisited.Add(H1.startSymbol); egnfVars.Add(H1.startSymbol); egnfProds[H1.startSymbol] = new List <Production>(); #region eliminate temp start symbols and produce the EGNF form while (egnfStack.Count > 0) { var A = egnfStack.Pop(); List <Production> A_prods = egnfProds[A]; foreach (Production p in H1.productionMap[A]) { if (!(p.First is Nonterminal) || p.IsUnit) { A_prods.Add(p); foreach (Nonterminal x in p.GetVariables()) { if (egnfVisited.Add(x)) { egnfStack.Push(x); egnfVars.Add(x); egnfProds[x] = new List <Production>(); } } } else { Nonterminal S_B = (Nonterminal)p.First; //here we know that S_B is a temp start symbol foreach (Production t in H1.productionMap[S_B]) { int k = t.Rhs.Length; GrammarSymbol[] rhs = new GrammarSymbol[k + p.Rhs.Length - 1]; for (int i = 0; i < k; i++) { rhs[i] = t.Rhs[i]; } for (int i = 1; i < p.Rhs.Length; i++) { rhs[k + i - 1] = p.Rhs[i]; } Production q = new Production(A, rhs); A_prods.Add(q); foreach (Nonterminal x in q.GetVariables()) { if (egnfVisited.Add(x)) { egnfStack.Push(x); egnfVars.Add(x); egnfProds[x] = new List <Production>(); } } } } } } #endregion ContextFreeGrammar egnf = new ContextFreeGrammar(egnfVars, H1.startSymbol, egnfProds); return(egnf); }
/// <summary> /// Produces the GNF (Greibach Normal Form) for the grammar g. /// If g is not already in GNF, first makes CNF. /// Implements a variation of the Koch-Blum algorithm. (STACS 97, pp. 47-54) /// </summary> /// <param name="g"></param> /// <param name="removeEpsilonsUselessSymbolsUnitsProductions"></param> /// <returns></returns> public static ContextFreeGrammar MkGNF(ContextFreeGrammar g, bool removeEpsilonsUselessSymbolsUnitsProductions) { if (removeEpsilonsUselessSymbolsUnitsProductions) { g = g.RemoveEpsilonsAndUselessSymbols().RemoveUnitProductions(); } if (g.IsInGNF()) { return(g); } ContextFreeGrammar cnf = MkCNF(g, false); var Vars = cnf.variables; int nonterminalID = 0; var M = new Dictionary <Nonterminal, Automaton <GrammarSymbol> >(); #region construct the automata M[B] for all variables B int id = 0; var initStateMap = new Dictionary <Nonterminal, int>(); var finalStateMap = new Dictionary <Nonterminal, int>(); foreach (Nonterminal B in Vars) { initStateMap[B] = id++; finalStateMap[B] = id++; } var movesOfM = new Dictionary <Nonterminal, List <Move <GrammarSymbol> > >(); foreach (Nonterminal B in Vars) { movesOfM[B] = new List <Move <GrammarSymbol> >(); } #region construct the moves of the automata foreach (Nonterminal B in Vars) { var variableToStateMap = new Dictionary <Nonterminal, int>(); Stack <Nonterminal> stack = new Stack <Nonterminal>(); stack.Push(B); int initState = initStateMap[B]; variableToStateMap[B] = finalStateMap[B]; while (stack.Count > 0) { Nonterminal C = stack.Pop(); foreach (Production p in cnf.GetProductions(C)) { if (p.IsSingleExprinal) { movesOfM[B].Add(Move <GrammarSymbol> .Create(initState, variableToStateMap[C], p.First)); } else { Nonterminal D = (Nonterminal)p.First; //using the fact that the grammar is in CNF if (!variableToStateMap.ContainsKey(D)) { //visit all variables reachable that have not already been visited variableToStateMap.Add(D, id++); stack.Push(D); } GrammarSymbol E = p.Rhs[1]; movesOfM[B].Add(Move <GrammarSymbol> .Create(variableToStateMap[D], variableToStateMap[C], E)); } } } } #endregion foreach (Nonterminal B in Vars) { M[B] = Automaton <GrammarSymbol> .Create(null, initStateMap[B], new int[] { finalStateMap[B] }, movesOfM[B]); } #endregion var G_ = new Dictionary <Nonterminal, ContextFreeGrammar>(); #region construct corresponding intermediate grammars G_[B] corresponding to M[B] foreach (Nonterminal B in Vars) { var MB = M[B]; bool MBfinalStateHasVariableMoves = FinalStateHasVariableMoves(MB); var productions = new Dictionary <Nonterminal, List <Production> >(); Nonterminal startSymbol = new Nonterminal(nonterminalID++); var vars = new List <Nonterminal>(); vars.Add(startSymbol); productions[startSymbol] = new List <Production>(); foreach (var move in MB.GetMovesFrom(MB.InitialState)) { if (move.TargetState == MB.FinalState) { productions[startSymbol].Add(new Production(startSymbol, move.Label)); } if (move.TargetState != MB.FinalState || MBfinalStateHasVariableMoves) { var C = new Nonterminal("Q" + move.TargetState); productions[startSymbol].Add(new Production(startSymbol, move.Label, C)); if (!productions.ContainsKey(C)) { productions[C] = new List <Production>(); vars.Add(C); } } } foreach (int state in MB.States) { if (state != MB.InitialState) { foreach (Move <GrammarSymbol> move in MB.GetMovesFrom(state)) { Nonterminal D = new Nonterminal("Q" + state); Nonterminal C = new Nonterminal("Q" + move.TargetState); if (!productions.ContainsKey(D)) { productions[D] = new List <Production>(); vars.Add(D); } Nonterminal E = (Nonterminal)move.Label; if (move.TargetState == MB.FinalState) { productions[D].Add(new Production(D, E)); } if (move.TargetState != MB.FinalState || MBfinalStateHasVariableMoves) { productions[D].Add(new Production(D, E, C)); //we pretend here that E is a terminal if (!productions.ContainsKey(C)) { productions[C] = new List <Production>(); vars.Add(C); } } } } } G_[B] = new ContextFreeGrammar(vars, startSymbol, productions); } #endregion var G = new Dictionary <Nonterminal, ContextFreeGrammar>(); #region construct the corresponding temporary G[B]'s foreach (Nonterminal B in Vars) { var G_B = G_[B]; var productions = new Dictionary <Nonterminal, List <Production> >(); //var vars = new List<Variable>(); Nonterminal startSymbol = G_B.startSymbol; productions[startSymbol] = G_B.productionMap[startSymbol]; foreach (Nonterminal D in G_B.variables) { if (!D.Equals(startSymbol)) { var productions_D = new List <Production>(); productions[D] = productions_D; foreach (Production p in G_B.productionMap[D]) { Nonterminal E = (Nonterminal)p.First; var G_E = G_[E]; if (p.IsUnit) { foreach (Production q in G_E.productionMap[G_E.startSymbol]) { productions_D.Add(new Production(D, q.Rhs)); } } else { foreach (Production q in G_E.productionMap[G_E.startSymbol]) { GrammarSymbol[] symbols = new GrammarSymbol[q.Rhs.Length + 1]; Array.Copy(q.Rhs, symbols, q.Rhs.Length); symbols[q.Rhs.Length] = p.Rhs[1]; productions_D.Add(new Production(D, symbols)); } } } } } //ignore the variable list, it is not used G[B] = new ContextFreeGrammar(null, startSymbol, productions); } #endregion #region construct the final GNF from the G[B]'s var productionsGNF = new List <Production>(); foreach (Nonterminal A in cnf.variables) { foreach (Production p in cnf.productionMap[A]) { if (p.IsSingleExprinal) { productionsGNF.Add(p); } else { Nonterminal B = (Nonterminal)p.Rhs[0]; Nonterminal C = (Nonterminal)p.Rhs[1]; var GB = G[B]; foreach (Production q in GB.productionMap[GB.startSymbol]) { GrammarSymbol[] symbols = new GrammarSymbol[q.Rhs.Length + 1]; Array.Copy(q.Rhs, symbols, q.Rhs.Length); symbols[q.Rhs.Length] = C; productionsGNF.Add(new Production(A, symbols)); } } } } foreach (Nonterminal B in Vars) { var GB = G[B]; foreach (var kv in GB.productionMap) { if (!kv.Key.Equals(GB.startSymbol)) { productionsGNF.AddRange(kv.Value); } } } #endregion ContextFreeGrammar gnf = new ContextFreeGrammar(cnf.startSymbol, productionsGNF); return(gnf); }
/// <summary> /// Produces the CNF (Chomsky Normal Form) for the grammar g. /// It first eliminates epsilons, useless symbols, and unit productions. /// If Assumes that there are no epsilons, useless symbols or unit productions /// </summary> public static ContextFreeGrammar MkCNF(ContextFreeGrammar g, bool removeEpsilonsUselessSymbolsUnitsProductions) { if (removeEpsilonsUselessSymbolsUnitsProductions) { g = g.RemoveEpsilonsAndUselessSymbols().RemoveUnitProductions(); } var productions = new Dictionary <Nonterminal, List <Production> >(); List <Nonterminal> variables = new List <Nonterminal>(g.variables); foreach (Nonterminal v in g.variables) { productions[v] = new List <Production>(); } int nonterminalID = 0; //Implements algo in Theorem 4.5, page 92-93, in Hopcroft-Ullman #region make productions of the form V --> V0...Vn or V --> a var freshVarMap = new Dictionary <GrammarSymbol, Nonterminal>(); foreach (Nonterminal v in g.variables) { foreach (Production p in g.productionMap[v]) { if (p.ContainsNoExprinals || p.IsCNF) { productions[v].Add(p); } else { GrammarSymbol[] rhs = new GrammarSymbol[p.Rhs.Length]; for (int i = 0; i < rhs.Length; i++) { if (p.Rhs[i] is Nonterminal) { rhs[i] = p.Rhs[i]; } else { Nonterminal u; if (!freshVarMap.TryGetValue(p.Rhs[i], out u)) { u = new Nonterminal(nonterminalID++); freshVarMap[p.Rhs[i]] = u; variables.Add(u); var prods = new List <Production>(); prods.Add(new Production(u, p.Rhs[i])); productions[u] = prods; } rhs[i] = u; } } productions[v].Add(new Production(v, rhs)); } } } #endregion var productionsCNF = new Dictionary <Nonterminal, List <Production> >(); List <Nonterminal> variablesCNF = new List <Nonterminal>(variables); foreach (Nonterminal v in variablesCNF) { productionsCNF[v] = new List <Production>(); } #region replace V --> V0V1...Vn (n > 2), by V --> V0U0, U0 --> V1U1, ..., Un-2 --> Vn-1Vn foreach (Nonterminal v in variables) { foreach (Production p in productions[v]) { if (p.IsCNF) { productionsCNF[v].Add(p); } else { Nonterminal x = v; Nonterminal y = new Nonterminal(nonterminalID++); variablesCNF.Add(y); productionsCNF[y] = new List <Production>(); for (int i = 0; i < p.Rhs.Length - 2; i++) { productionsCNF[x].Add(new Production(x, p.Rhs[i], y)); if (i < p.Rhs.Length - 3) { x = y; y = new Nonterminal(nonterminalID++); variablesCNF.Add(y); productionsCNF[y] = new List <Production>(); } } productionsCNF[y].Add(new Production(y, p.Rhs[p.Rhs.Length - 2], p.Rhs[p.Rhs.Length - 1])); } } } #endregion ContextFreeGrammar cnf = new ContextFreeGrammar(variablesCNF, g.startSymbol, productionsCNF); return(cnf); }
internal bool RhsContainsSymbol(GrammarSymbol symbol) { foreach (GrammarSymbol s in Rhs) if (s.Equals(symbol)) return true; return false; }