Beispiel #1
0
        internal IParser <T> CreateParser()
        {
            // First order of business is to create the canonical list of LR1 states, or at least we are going to go through
            // them as we merge the sets together.
            // This starts with augmenting the grammar with an accept symbol, then we derive the
            // grammar from that
            IProductionRule <T> start = grammar.Start;

            // Get the first and follow sets for all nonterminal symbols
            ISet <NonTerminal <T> > nullable = CalculateNullable();
            TerminalSet <T>         first    = CalculateFirst(nullable);

            // So, we are going to calculate the LR1 closure for the start symbol, which should
            // be the augmented accept state of the grammar.
            // The closure is all states which are accessible by the dot at the left hand side of the
            // item.
            List <Lr1ItemSet <T> > itemSets = new List <Lr1ItemSet <T> >
            {
                Closure(new List <Lr1Item <T> >
                {
                    new Lr1Item <T>(start, 0, new HashSet <Terminal <T> > {
                        grammar.EndOfInputTerminal
                    })
                }, first, nullable)
            };
            List <GotoSetTransition> gotoSetTransitions = new List <GotoSetTransition>();

            // Repeat until nothing gets added any more
            // This is neccessary since we are merging sets as we go, which changes things around.
            bool added;

            do
            {
                added = false;

                for (int i = 0; i < itemSets.Count; ++i)
                {
                    Lr1ItemSet <T> itemSet = itemSets[i];

                    foreach (ISymbol <T> symbol in grammar.AllSymbols)
                    {
                        // Calculate the itemset for by goto for each symbol in the grammar
                        Lr1ItemSet <T> gotoSet = Goto(itemSet, symbol);

                        // If there is anything found in the set
                        if (gotoSet.Any())
                        {
                            // Do a closure on the goto set and see if it's already present in the sets of items that we have
                            // if that is not the case add it to the item set
                            gotoSet = Closure(gotoSet, first, nullable);

                            Lr1ItemSet <T> oldGotoSet = itemSets.Find(f => f.CoreEquals(gotoSet));

                            if (oldGotoSet == null)
                            {
                                // Add goto set to itemsets
                                itemSets.Add(gotoSet);

                                // Add a transition
                                gotoSetTransitions.Add(new GotoSetTransition
                                {
                                    From     = itemSet,
                                    OnSymbol = symbol,
                                    To       = gotoSet
                                });
                                added = true;
                            }
                            else
                            {
                                // Already found the set
                                // Merge the lookaheads for all rules
                                oldGotoSet.MergeLookaheads(gotoSet);

                                // Add a transition if it already isn't there
                                GotoSetTransition nt = new GotoSetTransition
                                {
                                    From     = itemSet,
                                    OnSymbol = symbol,
                                    To       = oldGotoSet
                                };

                                if (!gotoSetTransitions.Any(a => (a.From == nt.From) && (a.OnSymbol == nt.OnSymbol) && (a.To == nt.To)))
                                {
                                    gotoSetTransitions.Add(nt);
                                }
                            }
                        }
                    }
                }
            } while (added);

            LRParseTable <T> parseTable = CreateParseTable(itemSets, gotoSetTransitions);

            // Create a new parser using that parse table and some additional information that needs
            // to be available for the runtime parsing to work.
            return(new LRParser <T>(
                       parseTable,
                       (grammar.ErrorToken as Terminal <T>).TokenNumber,
                       grammar.EndOfInputTerminal.TokenNumber,
                       grammar.AllSymbols.OfType <Terminal <T> >().Select(f => f.DebugName).ToArray()
                       ));
        }
Beispiel #2
0
        private LRParseTable <T> CreateParseTable(List <Lr1ItemSet <T> > itemSets, List <GotoSetTransition> gotoSetTransitions)
        {
            LRParseTable <T> table = new LRParseTable <T>();

            // Create a temporary uncompressed action table. This is what we will use to create
            // the compressed action table later on. This could probably be improved upon to save
            // memory if needed.
            short[,] uncompressedActionTable = new short[itemSets.Count, grammar.AllSymbols.OfType <Terminal <T> >().Count()];

            for (int i = 0; i < itemSets.Count; ++i)
            {
                for (int j = 0; j < grammar.AllSymbols.OfType <Terminal <T> >().Count(); ++j)
                {
                    uncompressedActionTable[i, j] = short.MinValue;
                }
            }

            int firstNonTerminalTokenNumber       = grammar.AllSymbols.OfType <NonTerminal <T> >().First().TokenNumber;
            List <GotoTable.GotoTableValue> gotos = new List <GotoTable.GotoTableValue>();

            for (int i = 0; i < itemSets.Count; ++i)
            {
                Lr1ItemSet <T> itemSet = itemSets[i];

                foreach (Lr1Item <T> lr1Item in itemSet)
                {
                    // Fill the action table first

                    // If the next symbol in the LR0 item is a terminal (symbol
                    // found after the dot, add a SHIFT j IF GOTO(lr0Item, nextSymbol) == j
                    if (lr1Item.SymbolRightOfDot != null)
                    {
                        if (lr1Item.SymbolRightOfDot is Terminal <T> )
                        {
                            // Look for a transition in the gotoSetTransitions
                            // there should always be one.
                            GotoSetTransition transition = gotoSetTransitions.First(t => t.From == itemSet && t.OnSymbol == lr1Item.SymbolRightOfDot);
                            int transitionIndex          = itemSets.IndexOf(transition.To);
                            int tokenNumber = ((Terminal <T>)lr1Item.SymbolRightOfDot).TokenNumber;

                            SetActionTable(uncompressedActionTable, i, tokenNumber, LRParseTable <T> .Shift(transitionIndex));
                        }
                    }
                    else
                    {
                        // The dot is at the end. Add reduce action to the parse table for
                        // all lookaheads for the resulting symbol
                        // Do NOT do this if the resulting symbol is the start symbol
                        if (lr1Item.ProductionRule.ResultSymbol != grammar.AcceptSymbol)
                        {
                            int numReductionRules = reductionRules.Count;
                            int reductionRule     = 0;

                            for (; reductionRule < numReductionRules; ++reductionRule)
                            {
                                if (reductionRules[reductionRule].Item1 == lr1Item.ProductionRule)
                                {
                                    break; // Found it, it's already created
                                }
                            }
                            if (numReductionRules == reductionRule)
                            {
                                // Need to create a new reduction rule
                                reductionRules.Add(new Tuple <IProductionRule <T>, ReductionRule <T> >(lr1Item.ProductionRule,
                                                                                                       new ReductionRule <T>
                                {
                                    NumTokensToPop = lr1Item.ProductionRule.Symbols.Length,
                                    OnReduce       = lr1Item.ProductionRule.ReduceAction,
                                    TokenToPush    = ((Symbol <T>)lr1Item.ProductionRule.ResultSymbol).TokenNumber - firstNonTerminalTokenNumber
                                }));
                            }

                            foreach (Terminal <T> lookahead in lr1Item.Lookaheads)
                            {
                                try
                                {
                                    SetActionTable(uncompressedActionTable, i, lookahead.TokenNumber, LRParseTable <T> .Reduce(reductionRule));
                                }
                                catch (ReduceReduceConflictException <T> e)
                                {
                                    // Augment exception with correct symbols for the poor user
                                    e.PreviousReduceSymbol = reductionRules[-(1 + e.PreviousValue)].Item1.ResultSymbol;
                                    e.NewReduceSymbol      = reductionRules[reductionRule].Item1.ResultSymbol;

                                    throw;
                                }
                            }
                        }
                        else
                        {
                            // This production rule has the start symbol with the dot at the rightmost end in it, add ACCEPT to action for end of input character.
                            SetActionTable(uncompressedActionTable, i, grammar.EndOfInputTerminal.TokenNumber, LRParseTable <T> .Accept());
                        }
                    }
                }

                // Fill the goto table with the state IDs of all states that have been originally
                // produced by the GOTO operation from this state
                foreach (GotoSetTransition gotoTransition in gotoSetTransitions.Where(f => f.From == itemSet && f.OnSymbol is NonTerminal <T>))
                {
                    gotos.Add(new GotoTable.GotoTableValue
                    {
                        NewState = itemSets.IndexOf(gotoTransition.To),
                        State    = i,
                        Token    = (gotoTransition.OnSymbol as Symbol <T>).TokenNumber - firstNonTerminalTokenNumber
                    });
                }
            }

            // Move the reduction rules to the table. No need for the impromptu dictionary
            // anymore.
            table.ReductionRules = reductionRules.Select(f => f.Item2).ToArray();
            table.Action         = new CompressedTable(uncompressedActionTable);
            table.Goto           = new GotoTable(gotos);
            table.StateCount     = itemSets.Count;

            // Useful point to look at the table, and everything the builder has generated, since after this point the grammar is pretty much destroyed.
            //string gotoGraph = gotoSetTransitions.AsDotNotation(itemSets);
            //string debugTable = table.ToDebugString(grammar, itemSets.Count);
            return(table);
        }