private static CFG verifyConsistency(CFG oldGrammar, CFG newGrammar) { OrderedHashSet <string> oldVariables = oldGrammar.Variables(); foreach (string terminal in newGrammar.Terminals()) { if (oldVariables.Contains(terminal)) { throw new Exception($"grammar is malformed: variable {terminal} cannot be derived"); } } foreach (string variable in newGrammar.Variables()) { bool variableIsDerived = false; foreach (Production production in newGrammar.productions) { if (production.lhs == variable) { variableIsDerived = true; break; } } if (!variableIsDerived) { throw new Exception($"grammar is malformed: variable {variable} cannot be derived"); } } return(newGrammar); }
/// <summary> /// Simplify context-free grammar by removing unreachable symbols and productions and productions of /// the form V ::= V. /// </summary> /// <param name="grammar">target grammar</param> /// <returns>simplified grammar</returns> public static CFG Simplify(this CFG grammar) { OrderedHashSet <Production> productions = new OrderedHashSet <Production>(); OrderedHashSet <string> newSymbols = new OrderedHashSet <string>(), seenSymbols = new OrderedHashSet <string>(); newSymbols.Add(grammar.startVariable); while (newSymbols.Count() > 0) { foreach (string symbol in newSymbols) { seenSymbols.Add(symbol); } OrderedHashSet <string> nextSymbols = new OrderedHashSet <string>(); foreach (Production production in grammar.productions) { if (newSymbols.Contains(production.lhs) && (production.rhs.Length != 1 || production.rhs[0] != production.lhs)) { productions.Add(production); foreach (string symbol in production.rhs) { if (!seenSymbols.Contains(symbol)) { nextSymbols.Add(symbol); } } } } newSymbols = nextSymbols; } return(verifyConsistency(grammar, new CFG(grammar.startVariable, productions))); }
/// <summary> /// Transforms grammar according to given grammar options. /// </summary> /// <param name="grammar">target grammar</param> /// <param name="options">set of options</param> /// <returns>application of options to grammar</returns> public static CFG ApplyOptions(this CFG grammar, OrderedHashSet <GrammarOption> options) { if (options.Contains(GrammarOption.Reversed)) { return(Reversed(grammar)); } return(grammar); }
private static string GenerateAuxiliaryVariable(OrderedHashSet <string> usedSymbols, string variableName, int indexHint) { int index = indexHint; while (index > 0) { string name = variableName + index; if (!usedSymbols.Contains(name)) { return(name); } ++index; } throw new Exception("grammar transformation error"); }
private static string GenerateAuxiliaryVariable(OrderedHashSet <string> usedSymbols, string sourceVariable) { if (!usedSymbols.Contains(sourceVariable)) { return(sourceVariable); } for (int i = 0; i < sourceVariable.Length; ++i) { if (int.TryParse(sourceVariable.Substring(i), out int variableIndex)) { return(GenerateAuxiliaryVariable(usedSymbols, sourceVariable.Substring(0, i), variableIndex)); } } return(GenerateAuxiliaryVariable(usedSymbols, sourceVariable, 2)); }
/// <summary> /// Remove epsilon productions. /// If the grammar derives epsilon, then only the start variable may derive epsilon, but then it /// would not be found on the right-hand side of any production. /// </summary> /// <param name="grammar">target grammar</param> /// <returns>grammar without epsilon productions</returns> public static CFG RemoveEpsilonProductions(this CFG grammar) { bool hasNullableStartSymbolInRHS = grammar.HasNullableStartSymbolInRHS(); if (!grammar.HasEpsilonProductionsExceptStartVariable() && !hasNullableStartSymbolInRHS) { return(grammar); } if (hasNullableStartSymbolInRHS) { OrderedHashSet <Production> newProductions = new OrderedHashSet <Production>(); foreach (Production production in grammar.productions) { newProductions.Add(production); } string newStartVariable = GenerateAuxiliaryVariable(grammar.Symbols(), grammar.startVariable); newProductions.Add(new Production(newStartVariable, grammar.startVariable)); newProductions.Add(new Production(newStartVariable)); return(RemoveEpsilonProductions(new CFG(newStartVariable, newProductions))); } OrderedHashSet <Production> productions = grammar.productions; while (true) { OrderedHashSet <string> nullableVariables = new OrderedHashSet <string>(), nullVariables = new OrderedHashSet <string>(); foreach (Production production in productions) { if (production.IsEpsilonProduction()) { nullableVariables.Add(production.lhs); nullVariables.Add(production.lhs); } } foreach (Production production in productions) { if (!production.IsEpsilonProduction()) { nullVariables.Remove(production.lhs); } } if (nullableVariables.Count == 0 || nullableVariables.Count == 1 && nullableVariables.Contains(grammar.startVariable)) { return(verifyConsistency(grammar, new CFG(grammar.startVariable, productions).Simplify())); } OrderedHashSet <Production> newProductions = new OrderedHashSet <Production>(); foreach (Production production in productions) { OrderedHashSet <List <string> > rhss = new OrderedHashSet <List <string> >(); rhss.Add(new List <string>()); foreach (string symbol in production.rhs) { OrderedHashSet <List <string> > newRHSs = new OrderedHashSet <List <string> >(); foreach (var rhs in rhss) { if (nullVariables.Contains(symbol)) { newRHSs.AddUnique(rhs); } else if (!nullableVariables.Contains(symbol)) { List <string> newRHS = new List <string>(); newRHS.AddRange(rhs); newRHS.Add(symbol); newRHSs.Add(newRHS); } else { newRHSs.AddUnique(rhs); List <string> newRHS = new List <string>(); newRHS.AddRange(rhs); newRHS.Add(symbol); newRHSs.Add(newRHS); } } rhss = newRHSs; } foreach (var newRHS in rhss) { newProductions.Add(new Production(production.lhs, newRHS.ToArray())); } } foreach (string nullableVariable in nullableVariables) { if (nullableVariable != grammar.startVariable) { newProductions.Remove(new Production(nullableVariable)); } } productions = newProductions; } }