/// <summary> /// Remove epsilon productions. /// If the grammar derives epsilon, then only the start variable may derive epsilon, but then it /// would not be found on the right-hand side of any production. /// </summary> /// <param name="grammar">target grammar</param> /// <returns>grammar without epsilon productions</returns> public static CFG RemoveEpsilonProductions(this CFG grammar) { bool hasNullableStartSymbolInRHS = grammar.HasNullableStartSymbolInRHS(); if (!grammar.HasEpsilonProductionsExceptStartVariable() && !hasNullableStartSymbolInRHS) { return(grammar); } if (hasNullableStartSymbolInRHS) { OrderedHashSet <Production> newProductions = new OrderedHashSet <Production>(); foreach (Production production in grammar.productions) { newProductions.Add(production); } string newStartVariable = GenerateAuxiliaryVariable(grammar.Symbols(), grammar.startVariable); newProductions.Add(new Production(newStartVariable, grammar.startVariable)); newProductions.Add(new Production(newStartVariable)); return(RemoveEpsilonProductions(new CFG(newStartVariable, newProductions))); } OrderedHashSet <Production> productions = grammar.productions; while (true) { OrderedHashSet <string> nullableVariables = new OrderedHashSet <string>(), nullVariables = new OrderedHashSet <string>(); foreach (Production production in productions) { if (production.IsEpsilonProduction()) { nullableVariables.Add(production.lhs); nullVariables.Add(production.lhs); } } foreach (Production production in productions) { if (!production.IsEpsilonProduction()) { nullVariables.Remove(production.lhs); } } if (nullableVariables.Count == 0 || nullableVariables.Count == 1 && nullableVariables.Contains(grammar.startVariable)) { return(verifyConsistency(grammar, new CFG(grammar.startVariable, productions).Simplify())); } OrderedHashSet <Production> newProductions = new OrderedHashSet <Production>(); foreach (Production production in productions) { OrderedHashSet <List <string> > rhss = new OrderedHashSet <List <string> >(); rhss.Add(new List <string>()); foreach (string symbol in production.rhs) { OrderedHashSet <List <string> > newRHSs = new OrderedHashSet <List <string> >(); foreach (var rhs in rhss) { if (nullVariables.Contains(symbol)) { newRHSs.AddUnique(rhs); } else if (!nullableVariables.Contains(symbol)) { List <string> newRHS = new List <string>(); newRHS.AddRange(rhs); newRHS.Add(symbol); newRHSs.Add(newRHS); } else { newRHSs.AddUnique(rhs); List <string> newRHS = new List <string>(); newRHS.AddRange(rhs); newRHS.Add(symbol); newRHSs.Add(newRHS); } } rhss = newRHSs; } foreach (var newRHS in rhss) { newProductions.Add(new Production(production.lhs, newRHS.ToArray())); } } foreach (string nullableVariable in nullableVariables) { if (nullableVariable != grammar.startVariable) { newProductions.Remove(new Production(nullableVariable)); } } productions = newProductions; } }
/// <summary> /// Removes left-recursive productions from the grammar. /// Implementation of Paull's algorithm. /// </summary> /// <param name="grammar">target grammar</param> /// <returns>grammar without left recursion</returns> public static CFG RemoveLeftRecursion(this CFG grammar) { if (!grammar.HasLeftRecursion()) { return(grammar); } grammar = grammar.Simplify().RemoveEpsilonProductions(); if (!grammar.HasLeftRecursion()) { return(grammar); } bool epsilonIsDerived = grammar.productions.Contains(new Production(grammar.startVariable)); OrderedHashSet <Production> productions = new OrderedHashSet <Production>(); foreach (Production production in grammar.productions) { productions.Add(production); } if (epsilonIsDerived) { productions.Remove(new Production(grammar.startVariable)); } List <string> variables = new List <string>(grammar.Variables()); OrderedHashSet <string> usedSymbols = new OrderedHashSet <string>(); foreach (string symbol in grammar.Symbols()) { usedSymbols.Add(symbol); } for (int i = 0; i < variables.Count; ++i) { string vi = variables[i]; for (int j = 0; j < i; ++j) { string vj = variables[j]; OrderedHashSet <Production> toRemove = new OrderedHashSet <Production>(); OrderedHashSet <Production> toAdd = new OrderedHashSet <Production>(); foreach (Production production in productions) { if (production.lhs == vi && production.rhs.Length > 0 && production.rhs[0] == vj) { toRemove.Add(production); foreach (Production otherProduction in productions) { if (otherProduction.lhs == vj) { List <string> newRHS = new List <string>(); newRHS.AddRange(otherProduction.rhs); newRHS.AddRange(production.rhs.Skip(1)); toAdd.Add(new Production(vi, newRHS.ToArray())); } } break; } } foreach (Production addedProduction in toAdd) { productions.Add(addedProduction); } foreach (Production removedProduction in toRemove) { productions.Remove(removedProduction); } } OrderedHashSet <Production> productionsWithoutViDirectLeftRecursion = new OrderedHashSet <Production>(); OrderedHashSet <Production> newViProductions = new OrderedHashSet <Production>(); foreach (Production production in productions) { if (production.lhs != vi) { productionsWithoutViDirectLeftRecursion.Add(production); } else { newViProductions.Add(production); } } foreach (Production newViProduction in RemoveDirectLeftRecursion(usedSymbols, newViProductions)) { productionsWithoutViDirectLeftRecursion.Add(newViProduction); } productions = productionsWithoutViDirectLeftRecursion; } if (epsilonIsDerived) { productions.Add(new Production(grammar.startVariable)); } return(verifyConsistency(grammar, new CFG(grammar.startVariable, productions))); }