Example #1
0
        private TerminalSet <T> CalculateFirst(ISet <NonTerminal <T> > nullable)
        {
            var first = new TerminalSet <T>(grammar);

            // Algorithm is that if a nonterminal has a production that starts with a
            // terminal, we add that to the first set. If it starts with a nonterminal, we add
            // that nonterminals firsts to the known firsts of our nonterminal.
            bool addedThings;

            do
            {
                addedThings = false;

                foreach (var symbol in grammar.AllSymbols.OfType <NonTerminal <T> >())
                {
                    foreach (var productionRule in symbol.ProductionRules)
                    {
                        foreach (var productionSymbol in productionRule.Symbols)
                        {
                            // Terminals are trivial, just add them
                            if (productionSymbol is Terminal <T> )
                            {
                                addedThings |= first.Add(symbol, (Terminal <T>)productionSymbol);

                                // This production rule is done now
                                break;
                            }

                            if (productionSymbol is NonTerminal <T> )
                            {
                                var nonTerminal = (NonTerminal <T>)productionSymbol;
                                // Add everything in FIRST for the given terminal.
                                foreach (var f in first[nonTerminal])
                                {
                                    addedThings |= first.Add(symbol, f);
                                }

                                // Stop iterating if it wasn't nullable
                                if (!nullable.Contains(nonTerminal))
                                {
                                    // Jump out since we've found a non nullable symbol
                                    break;
                                }
                            }
                        }
                    }
                }
            } while (addedThings);

            return(first);
        }
Example #2
0
        private Lr1ItemSet <T> Closure(IEnumerable <Lr1Item <T> > items, TerminalSet <T> first, ISet <NonTerminal <T> > nullable)
        {
            // The items themselves are always in their own closure set
            Lr1ItemSet <T> closure = new Lr1ItemSet <T>();

            foreach (Lr1Item <T> lr1Item in items)
            {
                closure.Add(lr1Item);
            }

            // This needs to be a normal for loop since we add to the underlying collection
            // as we go along. This avoids investigating the same rule twice
            for (int currentItem = 0; currentItem < closure.Count(); ++currentItem)
            {
                Lr1Item <T> item             = closure[currentItem];
                ISymbol <T> symbolRightOfDot = item.SymbolRightOfDot;

                if (symbolRightOfDot != null)
                {
                    // Generate the lookahead items
                    HashSet <Terminal <T> > lookaheads = new HashSet <Terminal <T> >();
                    bool nonNullableFound = false;

                    for (int i = item.DotLocation + 1; i < item.ProductionRule.Symbols.Length; ++i)
                    {
                        ISymbol <T> symbol = item.ProductionRule.Symbols[i];

                        // If symbol is terminal, just add it
                        if (symbol is Terminal <T> )
                        {
                            lookaheads.Add((Terminal <T>)symbol);

                            // Terminals are not nullable, break out of loop
                            nonNullableFound = true;

                            break;
                        }

                        foreach (Terminal <T> terminal in first[(NonTerminal <T>)symbol])
                        {
                            lookaheads.Add(terminal);
                        }

                        if (!nullable.Contains(symbol))
                        {
                            nonNullableFound = true;

                            break;
                        }
                    }

                    if (!nonNullableFound)
                    {
                        // Add each of the lookahead symbols of the generating rule to the new lookahead set
                        foreach (Terminal <T> lookahead in item.Lookaheads)
                        {
                            lookaheads.Add(lookahead);
                        }
                    }

                    // Create new Lr1 items from all rules where the resulting symbol of the production rule
                    // matches the symbol that was to the right of the dot.
                    foreach (Lr1Item <T> lr1Item in from f in grammar.ProductionRules
                             where f.ResultSymbol == symbolRightOfDot
                             select new Lr1Item <T>(f, 0, lookaheads))
                    {
                        closure.Add(lr1Item);
                    }
                }
            }

            return(closure);
        }
Example #3
0
        internal IParser <T> CreateParser()
        {
            // First order of business is to create the canonical list of LR1 states, or at least we are going to go through
            // them as we merge the sets together.
            // This starts with augmenting the grammar with an accept symbol, then we derive the
            // grammar from that
            IProductionRule <T> start = grammar.Start;

            // Get the first and follow sets for all nonterminal symbols
            ISet <NonTerminal <T> > nullable = CalculateNullable();
            TerminalSet <T>         first    = CalculateFirst(nullable);

            // So, we are going to calculate the LR1 closure for the start symbol, which should
            // be the augmented accept state of the grammar.
            // The closure is all states which are accessible by the dot at the left hand side of the
            // item.
            List <Lr1ItemSet <T> > itemSets = new List <Lr1ItemSet <T> >
            {
                Closure(new List <Lr1Item <T> >
                {
                    new Lr1Item <T>(start, 0, new HashSet <Terminal <T> > {
                        grammar.EndOfInputTerminal
                    })
                }, first, nullable)
            };
            List <GotoSetTransition> gotoSetTransitions = new List <GotoSetTransition>();

            // Repeat until nothing gets added any more
            // This is neccessary since we are merging sets as we go, which changes things around.
            bool added;

            do
            {
                added = false;

                for (int i = 0; i < itemSets.Count; ++i)
                {
                    Lr1ItemSet <T> itemSet = itemSets[i];

                    foreach (ISymbol <T> symbol in grammar.AllSymbols)
                    {
                        // Calculate the itemset for by goto for each symbol in the grammar
                        Lr1ItemSet <T> gotoSet = Goto(itemSet, symbol);

                        // If there is anything found in the set
                        if (gotoSet.Any())
                        {
                            // Do a closure on the goto set and see if it's already present in the sets of items that we have
                            // if that is not the case add it to the item set
                            gotoSet = Closure(gotoSet, first, nullable);

                            Lr1ItemSet <T> oldGotoSet = itemSets.Find(f => f.CoreEquals(gotoSet));

                            if (oldGotoSet == null)
                            {
                                // Add goto set to itemsets
                                itemSets.Add(gotoSet);

                                // Add a transition
                                gotoSetTransitions.Add(new GotoSetTransition
                                {
                                    From     = itemSet,
                                    OnSymbol = symbol,
                                    To       = gotoSet
                                });
                                added = true;
                            }
                            else
                            {
                                // Already found the set
                                // Merge the lookaheads for all rules
                                oldGotoSet.MergeLookaheads(gotoSet);

                                // Add a transition if it already isn't there
                                GotoSetTransition nt = new GotoSetTransition
                                {
                                    From     = itemSet,
                                    OnSymbol = symbol,
                                    To       = oldGotoSet
                                };

                                if (!gotoSetTransitions.Any(a => (a.From == nt.From) && (a.OnSymbol == nt.OnSymbol) && (a.To == nt.To)))
                                {
                                    gotoSetTransitions.Add(nt);
                                }
                            }
                        }
                    }
                }
            } while (added);

            LRParseTable <T> parseTable = CreateParseTable(itemSets, gotoSetTransitions);

            // Create a new parser using that parse table and some additional information that needs
            // to be available for the runtime parsing to work.
            return(new LRParser <T>(
                       parseTable,
                       (grammar.ErrorToken as Terminal <T>).TokenNumber,
                       grammar.EndOfInputTerminal.TokenNumber,
                       grammar.AllSymbols.OfType <Terminal <T> >().Select(f => f.DebugName).ToArray()
                       ));
        }