public void MergeLookaheads(Lr1ItemSet <T> other) { foreach (var lr1Item in Items) { var otherRule = other.First(f => f.ProductionRule == lr1Item.ProductionRule && f.DotLocation == lr1Item.DotLocation); lr1Item.Lookaheads.UnionWith(otherRule.Lookaheads); } }
public bool CoreEquals(Lr1ItemSet <T> other) { // Must be the same number of items if (Items.Count == other.Items.Count) { // Every item must have the same production rule the dot at the same place return(Items.All(f => other.Any(o => o.ProductionRule == f.ProductionRule && f.DotLocation == o.DotLocation))); } return(false); }
private Lr1ItemSet <T> Closure(IEnumerable <Lr1Item <T> > items, TerminalSet <T> first, ISet <NonTerminal <T> > nullable) { // The items themselves are always in their own closure set Lr1ItemSet <T> closure = new Lr1ItemSet <T>(); foreach (Lr1Item <T> lr1Item in items) { closure.Add(lr1Item); } // This needs to be a normal for loop since we add to the underlying collection // as we go along. This avoids investigating the same rule twice for (int currentItem = 0; currentItem < closure.Count(); ++currentItem) { Lr1Item <T> item = closure[currentItem]; ISymbol <T> symbolRightOfDot = item.SymbolRightOfDot; if (symbolRightOfDot != null) { // Generate the lookahead items HashSet <Terminal <T> > lookaheads = new HashSet <Terminal <T> >(); bool nonNullableFound = false; for (int i = item.DotLocation + 1; i < item.ProductionRule.Symbols.Length; ++i) { ISymbol <T> symbol = item.ProductionRule.Symbols[i]; // If symbol is terminal, just add it if (symbol is Terminal <T> ) { lookaheads.Add((Terminal <T>)symbol); // Terminals are not nullable, break out of loop nonNullableFound = true; break; } foreach (Terminal <T> terminal in first[(NonTerminal <T>)symbol]) { lookaheads.Add(terminal); } if (!nullable.Contains(symbol)) { nonNullableFound = true; break; } } if (!nonNullableFound) { // Add each of the lookahead symbols of the generating rule to the new lookahead set foreach (Terminal <T> lookahead in item.Lookaheads) { lookaheads.Add(lookahead); } } // Create new Lr1 items from all rules where the resulting symbol of the production rule // matches the symbol that was to the right of the dot. foreach (Lr1Item <T> lr1Item in from f in grammar.ProductionRules where f.ResultSymbol == symbolRightOfDot select new Lr1Item <T>(f, 0, lookaheads)) { closure.Add(lr1Item); } } } return(closure); }
internal IParser <T> CreateParser() { // First order of business is to create the canonical list of LR1 states, or at least we are going to go through // them as we merge the sets together. // This starts with augmenting the grammar with an accept symbol, then we derive the // grammar from that IProductionRule <T> start = grammar.Start; // Get the first and follow sets for all nonterminal symbols ISet <NonTerminal <T> > nullable = CalculateNullable(); TerminalSet <T> first = CalculateFirst(nullable); // So, we are going to calculate the LR1 closure for the start symbol, which should // be the augmented accept state of the grammar. // The closure is all states which are accessible by the dot at the left hand side of the // item. List <Lr1ItemSet <T> > itemSets = new List <Lr1ItemSet <T> > { Closure(new List <Lr1Item <T> > { new Lr1Item <T>(start, 0, new HashSet <Terminal <T> > { grammar.EndOfInputTerminal }) }, first, nullable) }; List <GotoSetTransition> gotoSetTransitions = new List <GotoSetTransition>(); // Repeat until nothing gets added any more // This is neccessary since we are merging sets as we go, which changes things around. bool added; do { added = false; for (int i = 0; i < itemSets.Count; ++i) { Lr1ItemSet <T> itemSet = itemSets[i]; foreach (ISymbol <T> symbol in grammar.AllSymbols) { // Calculate the itemset for by goto for each symbol in the grammar Lr1ItemSet <T> gotoSet = Goto(itemSet, symbol); // If there is anything found in the set if (gotoSet.Any()) { // Do a closure on the goto set and see if it's already present in the sets of items that we have // if that is not the case add it to the item set gotoSet = Closure(gotoSet, first, nullable); Lr1ItemSet <T> oldGotoSet = itemSets.Find(f => f.CoreEquals(gotoSet)); if (oldGotoSet == null) { // Add goto set to itemsets itemSets.Add(gotoSet); // Add a transition gotoSetTransitions.Add(new GotoSetTransition { From = itemSet, OnSymbol = symbol, To = gotoSet }); added = true; } else { // Already found the set // Merge the lookaheads for all rules oldGotoSet.MergeLookaheads(gotoSet); // Add a transition if it already isn't there GotoSetTransition nt = new GotoSetTransition { From = itemSet, OnSymbol = symbol, To = oldGotoSet }; if (!gotoSetTransitions.Any(a => (a.From == nt.From) && (a.OnSymbol == nt.OnSymbol) && (a.To == nt.To))) { gotoSetTransitions.Add(nt); } } } } } } while (added); LRParseTable <T> parseTable = CreateParseTable(itemSets, gotoSetTransitions); // Create a new parser using that parse table and some additional information that needs // to be available for the runtime parsing to work. return(new LRParser <T>( parseTable, (grammar.ErrorToken as Terminal <T>).TokenNumber, grammar.EndOfInputTerminal.TokenNumber, grammar.AllSymbols.OfType <Terminal <T> >().Select(f => f.DebugName).ToArray() )); }
private LRParseTable <T> CreateParseTable(List <Lr1ItemSet <T> > itemSets, List <GotoSetTransition> gotoSetTransitions) { LRParseTable <T> table = new LRParseTable <T>(); // Create a temporary uncompressed action table. This is what we will use to create // the compressed action table later on. This could probably be improved upon to save // memory if needed. short[,] uncompressedActionTable = new short[itemSets.Count, grammar.AllSymbols.OfType <Terminal <T> >().Count()]; for (int i = 0; i < itemSets.Count; ++i) { for (int j = 0; j < grammar.AllSymbols.OfType <Terminal <T> >().Count(); ++j) { uncompressedActionTable[i, j] = short.MinValue; } } int firstNonTerminalTokenNumber = grammar.AllSymbols.OfType <NonTerminal <T> >().First().TokenNumber; List <GotoTable.GotoTableValue> gotos = new List <GotoTable.GotoTableValue>(); for (int i = 0; i < itemSets.Count; ++i) { Lr1ItemSet <T> itemSet = itemSets[i]; foreach (Lr1Item <T> lr1Item in itemSet) { // Fill the action table first // If the next symbol in the LR0 item is a terminal (symbol // found after the dot, add a SHIFT j IF GOTO(lr0Item, nextSymbol) == j if (lr1Item.SymbolRightOfDot != null) { if (lr1Item.SymbolRightOfDot is Terminal <T> ) { // Look for a transition in the gotoSetTransitions // there should always be one. GotoSetTransition transition = gotoSetTransitions.First(t => t.From == itemSet && t.OnSymbol == lr1Item.SymbolRightOfDot); int transitionIndex = itemSets.IndexOf(transition.To); int tokenNumber = ((Terminal <T>)lr1Item.SymbolRightOfDot).TokenNumber; SetActionTable(uncompressedActionTable, i, tokenNumber, LRParseTable <T> .Shift(transitionIndex)); } } else { // The dot is at the end. Add reduce action to the parse table for // all lookaheads for the resulting symbol // Do NOT do this if the resulting symbol is the start symbol if (lr1Item.ProductionRule.ResultSymbol != grammar.AcceptSymbol) { int numReductionRules = reductionRules.Count; int reductionRule = 0; for (; reductionRule < numReductionRules; ++reductionRule) { if (reductionRules[reductionRule].Item1 == lr1Item.ProductionRule) { break; // Found it, it's already created } } if (numReductionRules == reductionRule) { // Need to create a new reduction rule reductionRules.Add(new Tuple <IProductionRule <T>, ReductionRule <T> >(lr1Item.ProductionRule, new ReductionRule <T> { NumTokensToPop = lr1Item.ProductionRule.Symbols.Length, OnReduce = lr1Item.ProductionRule.ReduceAction, TokenToPush = ((Symbol <T>)lr1Item.ProductionRule.ResultSymbol).TokenNumber - firstNonTerminalTokenNumber })); } foreach (Terminal <T> lookahead in lr1Item.Lookaheads) { try { SetActionTable(uncompressedActionTable, i, lookahead.TokenNumber, LRParseTable <T> .Reduce(reductionRule)); } catch (ReduceReduceConflictException <T> e) { // Augment exception with correct symbols for the poor user e.PreviousReduceSymbol = reductionRules[-(1 + e.PreviousValue)].Item1.ResultSymbol; e.NewReduceSymbol = reductionRules[reductionRule].Item1.ResultSymbol; throw; } } } else { // This production rule has the start symbol with the dot at the rightmost end in it, add ACCEPT to action for end of input character. SetActionTable(uncompressedActionTable, i, grammar.EndOfInputTerminal.TokenNumber, LRParseTable <T> .Accept()); } } } // Fill the goto table with the state IDs of all states that have been originally // produced by the GOTO operation from this state foreach (GotoSetTransition gotoTransition in gotoSetTransitions.Where(f => f.From == itemSet && f.OnSymbol is NonTerminal <T>)) { gotos.Add(new GotoTable.GotoTableValue { NewState = itemSets.IndexOf(gotoTransition.To), State = i, Token = (gotoTransition.OnSymbol as Symbol <T>).TokenNumber - firstNonTerminalTokenNumber }); } } // Move the reduction rules to the table. No need for the impromptu dictionary // anymore. table.ReductionRules = reductionRules.Select(f => f.Item2).ToArray(); table.Action = new CompressedTable(uncompressedActionTable); table.Goto = new GotoTable(gotos); table.StateCount = itemSets.Count; // Useful point to look at the table, and everything the builder has generated, since after this point the grammar is pretty much destroyed. //string gotoGraph = gotoSetTransitions.AsDotNotation(itemSets); //string debugTable = table.ToDebugString(grammar, itemSets.Count); return(table); }