private TerminalSet <T> CalculateFirst(ISet <NonTerminal <T> > nullable) { var first = new TerminalSet <T>(grammar); // Algorithm is that if a nonterminal has a production that starts with a // terminal, we add that to the first set. If it starts with a nonterminal, we add // that nonterminals firsts to the known firsts of our nonterminal. bool addedThings; do { addedThings = false; foreach (var symbol in grammar.AllSymbols.OfType <NonTerminal <T> >()) { foreach (var productionRule in symbol.ProductionRules) { foreach (var productionSymbol in productionRule.Symbols) { // Terminals are trivial, just add them if (productionSymbol is Terminal <T> ) { addedThings |= first.Add(symbol, (Terminal <T>)productionSymbol); // This production rule is done now break; } if (productionSymbol is NonTerminal <T> ) { var nonTerminal = (NonTerminal <T>)productionSymbol; // Add everything in FIRST for the given terminal. foreach (var f in first[nonTerminal]) { addedThings |= first.Add(symbol, f); } // Stop iterating if it wasn't nullable if (!nullable.Contains(nonTerminal)) { // Jump out since we've found a non nullable symbol break; } } } } } } while (addedThings); return(first); }
private Lr1ItemSet <T> Closure(IEnumerable <Lr1Item <T> > items, TerminalSet <T> first, ISet <NonTerminal <T> > nullable) { // The items themselves are always in their own closure set Lr1ItemSet <T> closure = new Lr1ItemSet <T>(); foreach (Lr1Item <T> lr1Item in items) { closure.Add(lr1Item); } // This needs to be a normal for loop since we add to the underlying collection // as we go along. This avoids investigating the same rule twice for (int currentItem = 0; currentItem < closure.Count(); ++currentItem) { Lr1Item <T> item = closure[currentItem]; ISymbol <T> symbolRightOfDot = item.SymbolRightOfDot; if (symbolRightOfDot != null) { // Generate the lookahead items HashSet <Terminal <T> > lookaheads = new HashSet <Terminal <T> >(); bool nonNullableFound = false; for (int i = item.DotLocation + 1; i < item.ProductionRule.Symbols.Length; ++i) { ISymbol <T> symbol = item.ProductionRule.Symbols[i]; // If symbol is terminal, just add it if (symbol is Terminal <T> ) { lookaheads.Add((Terminal <T>)symbol); // Terminals are not nullable, break out of loop nonNullableFound = true; break; } foreach (Terminal <T> terminal in first[(NonTerminal <T>)symbol]) { lookaheads.Add(terminal); } if (!nullable.Contains(symbol)) { nonNullableFound = true; break; } } if (!nonNullableFound) { // Add each of the lookahead symbols of the generating rule to the new lookahead set foreach (Terminal <T> lookahead in item.Lookaheads) { lookaheads.Add(lookahead); } } // Create new Lr1 items from all rules where the resulting symbol of the production rule // matches the symbol that was to the right of the dot. foreach (Lr1Item <T> lr1Item in from f in grammar.ProductionRules where f.ResultSymbol == symbolRightOfDot select new Lr1Item <T>(f, 0, lookaheads)) { closure.Add(lr1Item); } } } return(closure); }
internal IParser <T> CreateParser() { // First order of business is to create the canonical list of LR1 states, or at least we are going to go through // them as we merge the sets together. // This starts with augmenting the grammar with an accept symbol, then we derive the // grammar from that IProductionRule <T> start = grammar.Start; // Get the first and follow sets for all nonterminal symbols ISet <NonTerminal <T> > nullable = CalculateNullable(); TerminalSet <T> first = CalculateFirst(nullable); // So, we are going to calculate the LR1 closure for the start symbol, which should // be the augmented accept state of the grammar. // The closure is all states which are accessible by the dot at the left hand side of the // item. List <Lr1ItemSet <T> > itemSets = new List <Lr1ItemSet <T> > { Closure(new List <Lr1Item <T> > { new Lr1Item <T>(start, 0, new HashSet <Terminal <T> > { grammar.EndOfInputTerminal }) }, first, nullable) }; List <GotoSetTransition> gotoSetTransitions = new List <GotoSetTransition>(); // Repeat until nothing gets added any more // This is neccessary since we are merging sets as we go, which changes things around. bool added; do { added = false; for (int i = 0; i < itemSets.Count; ++i) { Lr1ItemSet <T> itemSet = itemSets[i]; foreach (ISymbol <T> symbol in grammar.AllSymbols) { // Calculate the itemset for by goto for each symbol in the grammar Lr1ItemSet <T> gotoSet = Goto(itemSet, symbol); // If there is anything found in the set if (gotoSet.Any()) { // Do a closure on the goto set and see if it's already present in the sets of items that we have // if that is not the case add it to the item set gotoSet = Closure(gotoSet, first, nullable); Lr1ItemSet <T> oldGotoSet = itemSets.Find(f => f.CoreEquals(gotoSet)); if (oldGotoSet == null) { // Add goto set to itemsets itemSets.Add(gotoSet); // Add a transition gotoSetTransitions.Add(new GotoSetTransition { From = itemSet, OnSymbol = symbol, To = gotoSet }); added = true; } else { // Already found the set // Merge the lookaheads for all rules oldGotoSet.MergeLookaheads(gotoSet); // Add a transition if it already isn't there GotoSetTransition nt = new GotoSetTransition { From = itemSet, OnSymbol = symbol, To = oldGotoSet }; if (!gotoSetTransitions.Any(a => (a.From == nt.From) && (a.OnSymbol == nt.OnSymbol) && (a.To == nt.To))) { gotoSetTransitions.Add(nt); } } } } } } while (added); LRParseTable <T> parseTable = CreateParseTable(itemSets, gotoSetTransitions); // Create a new parser using that parse table and some additional information that needs // to be available for the runtime parsing to work. return(new LRParser <T>( parseTable, (grammar.ErrorToken as Terminal <T>).TokenNumber, grammar.EndOfInputTerminal.TokenNumber, grammar.AllSymbols.OfType <Terminal <T> >().Select(f => f.DebugName).ToArray() )); }