示例#1
0
        /// <summary>
        /// Parses input tokens supplied by the <i>lexer</i> and returns the result.
        /// </summary>
        /// <param name="lexer">The ILexer which will supply the Parser with tokens.</param>
        /// <returns>An object containing the value returned for the root nonterminal.</returns>
        /// <exception cref="ParsingException">when the lexer's output doesn't conform to the grammar's rules or
        /// when the lexer throws a ParsingException.</exception>
        public object Parse(ILexer lexer, object userObject)
        {
            Stack <PartialResult> resultStack = new Stack <PartialResult>();
            Stack <int>           stateStack  = new Stack <int>();

            //počáteční stav
            stateStack.Push(0);

            bool  done      = false;
            Token nextToken = lexer.GetNextToken();
            int   state     = stateStack.Peek();

            while (!done && (lexer.HasTokens || ((state >= 0) && (parseTable[state, nextToken.SymbolCode].ActionType != ParserActionType.Fail))))
            {
                ParserAction nextAction = parseTable[state, nextToken.SymbolCode];

                switch (nextAction.ActionType)
                {
                case ParserActionType.Shift:
                    resultStack.Push(new PartialResult(nextToken.Value, symbolNames[nextToken.SymbolCode],
                                                       nextToken.LineNumber, nextToken.ColumnNumber));
                    stateStack.Push(nextAction.Argument);
                    state     = stateStack.Peek();
                    nextToken = lexer.GetNextToken();
                    break;

                case ParserActionType.Reduce:
                    //podle informací o patřičném přepisovacím pravidle odebereme ze zásobníků
                    //příslušný počet prvků
                    ProductionOutline     production   = productions[nextAction.Argument];
                    Stack <PartialResult> constituents = new Stack <PartialResult>();
                    for (int i = 0; i < production.NumRHSSymbols; i++)
                    {
                        constituents.Push(resultStack.Pop());
                        stateStack.Pop();
                    }
                    state = stateStack.Peek();

                    // We take the values of the constituents and compute the value of the composite
                    // element using the relevant action. This new result replaces the old ones on the
                    // result stack; the accompanying state for the state stack is found in the goto
                    // table of the parser. The line and column number for the new result are taken from
                    // the first constituent. If the nonterminal has no constituents, we take the position
                    // of the upcoming token.
                    int      numConstituents    = constituents.Count;
                    object[] constituentValues  = new object[numConstituents];
                    int[]    constituentLines   = new int[numConstituents];
                    int[]    constituentColumns = new int[numConstituents];
                    for (int i = 0; i < numConstituents; i++)
                    {
                        PartialResult constituent = constituents.Pop();
                        constituentValues[i]  = constituent.Value;
                        constituentLines[i]   = constituent.LineNumber;
                        constituentColumns[i] = constituent.ColumnNumber;
                    }
                    object result = actions[nextAction.Argument](constituentValues, constituentLines, constituentColumns, userObject);
                    if (numConstituents > 0)
                    {
                        resultStack.Push(new PartialResult(result, symbolNames[production.LHSSymbol],
                                                           constituentLines[0], constituentColumns[0]));
                    }
                    else
                    {
                        resultStack.Push(new PartialResult(result, symbolNames[production.LHSSymbol],
                                                           nextToken.LineNumber, nextToken.ColumnNumber));
                    }

                    stateStack.Push(gotoTable[state, production.LHSSymbol - numTerminals]);
                    state = stateStack.Peek();

                    //přepisovací pravidlo 0 je vždy $start ::= <start-symbol> $end a provedení
                    //redukce podle tohoto pravidla znamená, že jsme načetli i $end a podařilo
                    //se nám vstup rozparsovat, tudíž končíme;
                    //pokud by se nám snažil lexer ještě nějaké tokeny vrazit, tak to ohlásíme
                    if (nextAction.Argument == 0)
                    {
                        done = true;
                    }
                    break;

                case ParserActionType.Fail:
                    StringBuilder expectedTerminals = new StringBuilder();
                    for (int terminal = 0; terminal < numTerminals; terminal++)
                    {
                        if (parseTable[state, terminal].ActionType != ParserActionType.Fail)
                        {
                            expectedTerminals.Append(", ");
                            expectedTerminals.Append(symbolNames[terminal]);
                        }
                    }

                    throw new ParsingException(string.Format("Unexpected terminal {0}({1}) encountered by the parser, expected one of the following terminals: {2}.",
                                                             symbolNames[nextToken.SymbolCode], nextToken.Value, expectedTerminals.ToString(2, expectedTerminals.Length - 2)),
                                               nextToken.LineNumber, nextToken.ColumnNumber);
                }
            }

            if (lexer.HasTokens)
            {
                //lexer nám chce něco říct, ale my už jsme hotovi; tohle by se s naším lexerem nemělo nikdy
                //stát, jelikož po tom, co vrátí $end, už další tokeny nenabízí
                nextToken = lexer.GetNextToken();
                throw new ParsingException(string.Format("There are additional symbols in the input string starting with terminal {0}({1}).",
                                                         symbolNames[nextToken.SymbolCode], nextToken.Value), nextToken.LineNumber, nextToken.ColumnNumber);
            }
            else if ((resultStack.Count == 1) && (resultStack.Peek().SymbolName == "$start"))
            {
                //vše je, jak má být
                return(resultStack.Pop().Value);
            }
            else if (resultStack.Count == 0)
            {
                throw new ParsingException("There were no symbols in the input.",
                                           nextToken.LineNumber, nextToken.ColumnNumber);
            }
            else
            {
                //tohle znamená, že parser nebyl ještě se vstupem spokojen a očekává další symboly
                StringBuilder symbolsOnStack = new StringBuilder();
                foreach (PartialResult stackItem in resultStack.Reverse())
                {
                    symbolsOnStack.Append(", ");
                    symbolsOnStack.Append(stackItem.SymbolName);
                }

                throw new ParsingException("The entire input was reduced to more than one symbol: "
                                           + symbolsOnStack.ToString(2, symbolsOnStack.Length - 2) +
                                           ". Input text was probably incomplete.", nextToken.LineNumber, nextToken.ColumnNumber);
            }
        }
示例#2
0
        /// <summary>
        /// Computes the ParseTable and GotoTable of a Grammar's ParserData, logging the automata's graph
        /// to a logfile should the <i>grammar</i> prove to be non-LALR(1) or should the caller explicitly
        /// state he wants a log. Any reports generated by the processor will be sent to the <i>reportOutput</i>
        /// TextWriter instance.
        /// </summary>
        /// <param name="grammar">The Grammar whose tables are to be computed. GrammarDefinition ought to be
        /// set and filled with appropriate data and ParserData should be initialized.</param>
        /// <param name="logfileName">The name of the file to which the automaton is to be logged; <b>null</b>
        /// if logging should be disabled.</param>
        /// <param name="explicitLogging">A Boolean value determining whether the automaton should be
        /// written to the logfile even though there are no inconsistencies.</param>
        /// <param name="reportOutput">The TextWriter to which the report should be written; <b>null</b>
        /// if reporting should be disabled.</param>
        public void ComputeTables(Grammar grammar, string logfileName, bool explicitLogging, TextWriter reportOutput)
        {
            // INICIALIZACE

            this.grammar = grammar;

            //inicializace a výpočet productionsByRHSNonterminals
            productionsByRHSNonterminals = new List <Production> [grammar.GrammarDefinition.NumNonterminals];
            for (int nonterminal = 0; nonterminal < productionsByRHSNonterminals.Length; nonterminal++)
            {
                productionsByRHSNonterminals[nonterminal] = new List <Production>();
            }

            foreach (Production production in grammar.Productions)
            {
                foreach (int rhsSymbol in production.RHSSymbols)
                {
                    if (rhsSymbol >= grammar.NumTerminals)
                    {
                        productionsByRHSNonterminals[rhsSymbol - grammar.NumTerminals].Add(production);
                    }
                }
            }

            //inicializace transitionsByNonterminals, hodnoty jsou do seznamů posléze nasázeny ve funkci
            //exploreTransitions, která zároveň vyrábí LR(0) automat
            transitionsByNonterminals = new List <NonterminalTransition> [grammar.NumNonterminals];
            for (int nonterminal = 0; nonterminal < grammar.NumNonterminals; nonterminal++)
            {
                transitionsByNonterminals[nonterminal] = new List <NonterminalTransition>();
            }

            numNonterminalTransitions = 0;

            conflictingItems = new List <List <Item> >();
            lookaheadSets    = new List <List <BitVectorSet> >();


            parserStates = new List <State>();

            // TVORBA LR(0) AUTOMATU

            //vytvoříme počáteční ItemSet a nastartujeme rekurzivní
            //exploreTransitions

            Item    startItem = new Item(grammar.Productions[0], 0);
            ItemSet startIS   = new ItemSet();

            startIS.Add(startItem);
            startIS.CloseItemSet(grammar);

            State initialState = new State(0, startIS);

            parserStates.Add(initialState);

            //spočítá nám parserStates, hrany mezi nimi, nonterminalTransitions (počet neterminálních hran)
            //a transitionsByNonterminals
            exploreTransitions(initialState);


            //tenhle kousek inicializace si musel počkat na dopočítání stavů automatu
            stateLookaheadIndex = new int[parserStates.Count];
            for (int i = 0; i < parserStates.Count; i++)
            {
                stateLookaheadIndex[i] = -1;
            }

            //původní hodnota Look
            stateResolvedAt = new LookaheadComplexity[parserStates.Count];


            // ŘEŠENÍ NEDETERMINISTICKÝCH STAVŮ (KONFLIKTŮ)

            numInconsistentStates = 0;

            foreach (State state in parserStates)
            {
                List <Item> finalItems = new List <Item>();
                stateResolvedAt[state.StateNumber] = LookaheadComplexity.LR0;

                foreach (Item item in state.ItemSet)
                {
                    if (item.IsFinal)
                    {
                        finalItems.Add(item);
                    }
                }

                if (finalItems.Count >= 2)
                {
                    stateLookaheadIndex[state.StateNumber] = numInconsistentStates;
                    stateResolvedAt[state.StateNumber]     = LookaheadComplexity.Unresolved;
                    numInconsistentStates++;
                    conflictingItems.Add(finalItems);
                }
                else if (finalItems.Count >= 1)
                {
                    bool canRead = false;
                    foreach (Transition trans in state.Transitions)
                    {
                        if (trans is TerminalTransition)
                        {
                            canRead = true;
                            break;
                        }
                    }
                    if (canRead)
                    {
                        stateLookaheadIndex[state.StateNumber] = numInconsistentStates;
                        stateResolvedAt[state.StateNumber]     = LookaheadComplexity.Unresolved;
                        numInconsistentStates++;
                        conflictingItems.Add(finalItems);
                    }
                }
            }

            if (numInconsistentStates > 0)
            {
                //Vstupní gramatika není LR(0), bude tedy třeba spočítat lookahead množiny pro nekonzistentní
                //stavy. Použijeme postup DeRemera a Pennella, kdy se pokusíme každý nekonzistení stav nejdříve
                //vyřešit pomocí SLR(1) lookahead množin a až poté případně přikročíme k výpočtu LALR(1) lookaheadů.

                //Krok 1. Určit, které neterminály jsou nulovatelné.

                computeNullableNonterminals();

                //Krok 2. Spočítat SLR(1) lookaheady.
                //Připravíme se na počítání Read a SLR-Follow množin a pokusíme se vyřešit konflikty
                //pouze pomocí SLR(1) lookaheadů.


                //Direct Read množina pro každou neterminální hranu
                initDR =
                    (trans =>
                {
                    BitVectorSet set = new BitVectorSet(grammar.NumTerminals);
                    foreach (Transition nextTrans in trans.Destination.Transitions)
                    {
                        if (nextTrans is TerminalTransition)
                        {
                            set.Add(nextTrans.TransitionSymbol);
                        }
                    }
                    return(set);
                });

                read    = new BitVectorSet[numNonterminalTransitions];
                N_reads = new int[numNonterminalTransitions];
                reads   = new ReadsOracle(this);

                if (!forceLalr1)
                {
                    getNontermIndex = (nonterm => nonterm - grammar.NumTerminals);

                    //původní hodnota pro nějaký neterminál bude sjednocení Read množin všech hran označených
                    //tímto neterminálem; vyplývá téměř přímo z definice výpočtu Follow množin SLR(1) parserů
                    initSLR = (nonterm =>
                    {
                        BitVectorSet set = new BitVectorSet(grammar.NumTerminals);
                        foreach (NonterminalTransition trans in transitionsByNonterminals[nonterm - grammar.NumTerminals])
                        {
                            if (N_reads[getTransNumber(trans)] == 0)
                            {
                                digraphTraverse <NonterminalTransition>(trans, N_reads, read, reads, initDR, getTransNumber);
                            }
                            set.UnionWith(read[getTransNumber(trans)]);
                        }
                        return(set);
                    });

                    slr_follow  = new BitVectorSet[grammar.NumNonterminals];
                    N_slr       = new int[grammar.NumNonterminals];
                    slr_follows = new SLROracle(this);

                    foreach (State state in parserStates)
                    {
                        if (stateResolvedAt[state.StateNumber] == LookaheadComplexity.Unresolved)
                        {
                            List <BitVectorSet> stateLookaheads = new List <BitVectorSet>();

                            foreach (Item conflictItem in conflictingItems[stateLookaheadIndex[state.StateNumber]])
                            {
                                if (N_slr[getNontermIndex(conflictItem.Production.LHSSymbol)] == 0)
                                {
                                    digraphTraverse <int>(conflictItem.Production.LHSSymbol, N_slr, slr_follow, slr_follows, initSLR, getNontermIndex);
                                }

                                stateLookaheads.Add(slr_follow[getNontermIndex(conflictItem.Production.LHSSymbol)]);
                            }

                            lookaheadSets.Add(stateLookaheads);
                        }
                    }
                }

                //Krok 3. Spočítat LALR(1) lookaheady.
                //Pokud SLR(1) lookaheady nevyřešily všechny konflikty, spočteme pro nedořešené stavy
                //LALR(1) lookaheady.

                if (forceLalr1 || checkForConflicts(LookaheadComplexity.SLR1))
                {
                    initRead = (trans =>
                    {
                        if (N_reads[getTransNumber(trans)] == 0)
                        {
                            digraphTraverse <NonterminalTransition>(trans, N_reads, read, reads, initDR, getTransNumber);
                        }
                        return(new BitVectorSet(read[getTransNumber(trans)]));
                    });

                    follow     = new BitVectorSet[numNonterminalTransitions];
                    N_includes = new int[numNonterminalTransitions];
                    includes   = new IncludesOracle(this);

                    foreach (State state in parserStates)
                    {
                        if (stateResolvedAt[state.StateNumber] == LookaheadComplexity.Unresolved)
                        {
                            List <BitVectorSet> stateLookaheads = new List <BitVectorSet>();

                            foreach (Item conflictItem in conflictingItems[stateLookaheadIndex[state.StateNumber]])
                            {
                                BitVectorSet lookaheadSet = new BitVectorSet(grammar.NumTerminals);

                                foreach (NonterminalTransition trans in lookback(state, conflictItem))
                                {
                                    if (N_includes[getTransNumber(trans)] == 0)
                                    {
                                        digraphTraverse <NonterminalTransition>(trans, N_includes, follow, includes, initRead, getTransNumber);
                                    }

                                    lookaheadSet.UnionWith(follow[getTransNumber(trans)]);
                                }

                                stateLookaheads.Add(lookaheadSet);
                            }

                            //v případě, že je tohle naše první počítání lookahead množin, tak musíme
                            //založit pro stav novou položku v seznamu lookaheadSets; v opačném případě
                            //přepíšeme tu, kterou jsme vytvořili při počítání minulém
                            if (forceLalr1)
                            {
                                lookaheadSets.Add(stateLookaheads);
                            }
                            else
                            {
                                lookaheadSets[stateLookaheadIndex[state.StateNumber]] = stateLookaheads;
                            }
                        }
                    }

                    //Krok 4. Ověřit parser
                    //Pokud parser stále obsahuje konflikty, vypíšeme uživateli do logu podobu stavového
                    //automatu a vyznačíme v ní konflikty. Pokud parser konflikty neobsahuje, zapíšeme
                    //poznatky do tabulek a máme hotovo.

                    bool reduceReduceConflicts;
                    bool conflicts = checkForConflicts(LookaheadComplexity.LALR1, out reduceReduceConflicts);
                    if (reduceReduceConflicts)
                    {
                        if (logfileName != null)
                        {
                            printAutomatonStates(logfileName);
                            throw new GrammarException(string.Format("Reduce/reduce conflicts detected in the resulting parser.\r\nThe grammar isn't LALR(1).\r\nCheck the log file {0} for details.", logfileName));
                        }
                        else
                        {
                            throw new GrammarException("Reduce/reduce conflicts detected in the resulting parser.\r\nThe grammar isn't LALR(1).");
                        }
                    }
                    else if (conflicts)
                    {
                        if (reportOutput != null)
                        {
                            printShiftReduceConflicts(reportOutput);
                        }
                    }
                }
            }

            ParserAction[,] parseTable = new ParserAction[parserStates.Count, grammar.NumTerminals];
            int[,] gotoTable           = new int[parserStates.Count, grammar.NumNonterminals];
            for (int i = 0; i < parserStates.Count; i++)
            {
                for (int j = 0; j < grammar.NumNonterminals; j++)
                {
                    gotoTable[i, j] = -1;
                }
            }

            for (int stateNumber = 0; stateNumber < parserStates.Count; stateNumber++)
            {
                if (stateLookaheadIndex[stateNumber] >= 0)
                {
                    for (int i = 0; i < conflictingItems[stateLookaheadIndex[stateNumber]].Count; i++)
                    {
                        ParserAction action = new ParserAction();
                        action.ActionType = ParserActionType.Reduce;
                        action.Argument   = conflictingItems[stateLookaheadIndex[stateNumber]][i].Production.ProductionCode;
                        foreach (int symbol in lookaheadSets[stateLookaheadIndex[stateNumber]][i])
                        {
                            parseTable[stateNumber, symbol] = action;
                        }
                    }
                }
                else
                {
                    foreach (Item item in parserStates[stateNumber].ItemSet)
                    {
                        if (item.IsFinal)
                        {
                            ParserAction action = new ParserAction();
                            action.ActionType = ParserActionType.Reduce;
                            action.Argument   = item.Production.ProductionCode;
                            for (int symbol = 0; symbol < grammar.NumTerminals; symbol++)
                            {
                                parseTable[stateNumber, symbol] = action;
                            }
                        }
                    }
                }

                foreach (Transition trans in parserStates[stateNumber].Transitions)
                {
                    if (trans is TerminalTransition)
                    {
                        parseTable[stateNumber, trans.TransitionSymbol].ActionType = ParserActionType.Shift;
                        parseTable[stateNumber, trans.TransitionSymbol].Argument   = trans.Destination.StateNumber;
                    }
                    else
                    {
                        gotoTable[stateNumber, trans.TransitionSymbol - grammar.NumTerminals] = trans.Destination.StateNumber;
                    }
                }
            }

            grammar.ParserData.ParseTable = parseTable;
            grammar.ParserData.GotoTable  = gotoTable;

            if (explicitLogging)
            {
                printAutomatonStates(logfileName);
            }

            if (reportOutput != null)
            {
                printSuccessReport(reportOutput);
            }
        }