/// <summary> /// Parses input tokens supplied by the <i>lexer</i> and returns the result. /// </summary> /// <param name="lexer">The ILexer which will supply the Parser with tokens.</param> /// <returns>An object containing the value returned for the root nonterminal.</returns> /// <exception cref="ParsingException">when the lexer's output doesn't conform to the grammar's rules or /// when the lexer throws a ParsingException.</exception> public object Parse(ILexer lexer, object userObject) { Stack <PartialResult> resultStack = new Stack <PartialResult>(); Stack <int> stateStack = new Stack <int>(); //počáteční stav stateStack.Push(0); bool done = false; Token nextToken = lexer.GetNextToken(); int state = stateStack.Peek(); while (!done && (lexer.HasTokens || ((state >= 0) && (parseTable[state, nextToken.SymbolCode].ActionType != ParserActionType.Fail)))) { ParserAction nextAction = parseTable[state, nextToken.SymbolCode]; switch (nextAction.ActionType) { case ParserActionType.Shift: resultStack.Push(new PartialResult(nextToken.Value, symbolNames[nextToken.SymbolCode], nextToken.LineNumber, nextToken.ColumnNumber)); stateStack.Push(nextAction.Argument); state = stateStack.Peek(); nextToken = lexer.GetNextToken(); break; case ParserActionType.Reduce: //podle informací o patřičném přepisovacím pravidle odebereme ze zásobníků //příslušný počet prvků ProductionOutline production = productions[nextAction.Argument]; Stack <PartialResult> constituents = new Stack <PartialResult>(); for (int i = 0; i < production.NumRHSSymbols; i++) { constituents.Push(resultStack.Pop()); stateStack.Pop(); } state = stateStack.Peek(); // We take the values of the constituents and compute the value of the composite // element using the relevant action. This new result replaces the old ones on the // result stack; the accompanying state for the state stack is found in the goto // table of the parser. The line and column number for the new result are taken from // the first constituent. If the nonterminal has no constituents, we take the position // of the upcoming token. int numConstituents = constituents.Count; object[] constituentValues = new object[numConstituents]; int[] constituentLines = new int[numConstituents]; int[] constituentColumns = new int[numConstituents]; for (int i = 0; i < numConstituents; i++) { PartialResult constituent = constituents.Pop(); constituentValues[i] = constituent.Value; constituentLines[i] = constituent.LineNumber; constituentColumns[i] = constituent.ColumnNumber; } object result = actions[nextAction.Argument](constituentValues, constituentLines, constituentColumns, userObject); if (numConstituents > 0) { resultStack.Push(new PartialResult(result, symbolNames[production.LHSSymbol], constituentLines[0], constituentColumns[0])); } else { resultStack.Push(new PartialResult(result, symbolNames[production.LHSSymbol], nextToken.LineNumber, nextToken.ColumnNumber)); } stateStack.Push(gotoTable[state, production.LHSSymbol - numTerminals]); state = stateStack.Peek(); //přepisovací pravidlo 0 je vždy $start ::= <start-symbol> $end a provedení //redukce podle tohoto pravidla znamená, že jsme načetli i $end a podařilo //se nám vstup rozparsovat, tudíž končíme; //pokud by se nám snažil lexer ještě nějaké tokeny vrazit, tak to ohlásíme if (nextAction.Argument == 0) { done = true; } break; case ParserActionType.Fail: StringBuilder expectedTerminals = new StringBuilder(); for (int terminal = 0; terminal < numTerminals; terminal++) { if (parseTable[state, terminal].ActionType != ParserActionType.Fail) { expectedTerminals.Append(", "); expectedTerminals.Append(symbolNames[terminal]); } } throw new ParsingException(string.Format("Unexpected terminal {0}({1}) encountered by the parser, expected one of the following terminals: {2}.", symbolNames[nextToken.SymbolCode], nextToken.Value, expectedTerminals.ToString(2, expectedTerminals.Length - 2)), nextToken.LineNumber, nextToken.ColumnNumber); } } if (lexer.HasTokens) { //lexer nám chce něco říct, ale my už jsme hotovi; tohle by se s naším lexerem nemělo nikdy //stát, jelikož po tom, co vrátí $end, už další tokeny nenabízí nextToken = lexer.GetNextToken(); throw new ParsingException(string.Format("There are additional symbols in the input string starting with terminal {0}({1}).", symbolNames[nextToken.SymbolCode], nextToken.Value), nextToken.LineNumber, nextToken.ColumnNumber); } else if ((resultStack.Count == 1) && (resultStack.Peek().SymbolName == "$start")) { //vše je, jak má být return(resultStack.Pop().Value); } else if (resultStack.Count == 0) { throw new ParsingException("There were no symbols in the input.", nextToken.LineNumber, nextToken.ColumnNumber); } else { //tohle znamená, že parser nebyl ještě se vstupem spokojen a očekává další symboly StringBuilder symbolsOnStack = new StringBuilder(); foreach (PartialResult stackItem in resultStack.Reverse()) { symbolsOnStack.Append(", "); symbolsOnStack.Append(stackItem.SymbolName); } throw new ParsingException("The entire input was reduced to more than one symbol: " + symbolsOnStack.ToString(2, symbolsOnStack.Length - 2) + ". Input text was probably incomplete.", nextToken.LineNumber, nextToken.ColumnNumber); } }
/// <summary> /// Computes the ParseTable and GotoTable of a Grammar's ParserData, logging the automata's graph /// to a logfile should the <i>grammar</i> prove to be non-LALR(1) or should the caller explicitly /// state he wants a log. Any reports generated by the processor will be sent to the <i>reportOutput</i> /// TextWriter instance. /// </summary> /// <param name="grammar">The Grammar whose tables are to be computed. GrammarDefinition ought to be /// set and filled with appropriate data and ParserData should be initialized.</param> /// <param name="logfileName">The name of the file to which the automaton is to be logged; <b>null</b> /// if logging should be disabled.</param> /// <param name="explicitLogging">A Boolean value determining whether the automaton should be /// written to the logfile even though there are no inconsistencies.</param> /// <param name="reportOutput">The TextWriter to which the report should be written; <b>null</b> /// if reporting should be disabled.</param> public void ComputeTables(Grammar grammar, string logfileName, bool explicitLogging, TextWriter reportOutput) { // INICIALIZACE this.grammar = grammar; //inicializace a výpočet productionsByRHSNonterminals productionsByRHSNonterminals = new List <Production> [grammar.GrammarDefinition.NumNonterminals]; for (int nonterminal = 0; nonterminal < productionsByRHSNonterminals.Length; nonterminal++) { productionsByRHSNonterminals[nonterminal] = new List <Production>(); } foreach (Production production in grammar.Productions) { foreach (int rhsSymbol in production.RHSSymbols) { if (rhsSymbol >= grammar.NumTerminals) { productionsByRHSNonterminals[rhsSymbol - grammar.NumTerminals].Add(production); } } } //inicializace transitionsByNonterminals, hodnoty jsou do seznamů posléze nasázeny ve funkci //exploreTransitions, která zároveň vyrábí LR(0) automat transitionsByNonterminals = new List <NonterminalTransition> [grammar.NumNonterminals]; for (int nonterminal = 0; nonterminal < grammar.NumNonterminals; nonterminal++) { transitionsByNonterminals[nonterminal] = new List <NonterminalTransition>(); } numNonterminalTransitions = 0; conflictingItems = new List <List <Item> >(); lookaheadSets = new List <List <BitVectorSet> >(); parserStates = new List <State>(); // TVORBA LR(0) AUTOMATU //vytvoříme počáteční ItemSet a nastartujeme rekurzivní //exploreTransitions Item startItem = new Item(grammar.Productions[0], 0); ItemSet startIS = new ItemSet(); startIS.Add(startItem); startIS.CloseItemSet(grammar); State initialState = new State(0, startIS); parserStates.Add(initialState); //spočítá nám parserStates, hrany mezi nimi, nonterminalTransitions (počet neterminálních hran) //a transitionsByNonterminals exploreTransitions(initialState); //tenhle kousek inicializace si musel počkat na dopočítání stavů automatu stateLookaheadIndex = new int[parserStates.Count]; for (int i = 0; i < parserStates.Count; i++) { stateLookaheadIndex[i] = -1; } //původní hodnota Look stateResolvedAt = new LookaheadComplexity[parserStates.Count]; // ŘEŠENÍ NEDETERMINISTICKÝCH STAVŮ (KONFLIKTŮ) numInconsistentStates = 0; foreach (State state in parserStates) { List <Item> finalItems = new List <Item>(); stateResolvedAt[state.StateNumber] = LookaheadComplexity.LR0; foreach (Item item in state.ItemSet) { if (item.IsFinal) { finalItems.Add(item); } } if (finalItems.Count >= 2) { stateLookaheadIndex[state.StateNumber] = numInconsistentStates; stateResolvedAt[state.StateNumber] = LookaheadComplexity.Unresolved; numInconsistentStates++; conflictingItems.Add(finalItems); } else if (finalItems.Count >= 1) { bool canRead = false; foreach (Transition trans in state.Transitions) { if (trans is TerminalTransition) { canRead = true; break; } } if (canRead) { stateLookaheadIndex[state.StateNumber] = numInconsistentStates; stateResolvedAt[state.StateNumber] = LookaheadComplexity.Unresolved; numInconsistentStates++; conflictingItems.Add(finalItems); } } } if (numInconsistentStates > 0) { //Vstupní gramatika není LR(0), bude tedy třeba spočítat lookahead množiny pro nekonzistentní //stavy. Použijeme postup DeRemera a Pennella, kdy se pokusíme každý nekonzistení stav nejdříve //vyřešit pomocí SLR(1) lookahead množin a až poté případně přikročíme k výpočtu LALR(1) lookaheadů. //Krok 1. Určit, které neterminály jsou nulovatelné. computeNullableNonterminals(); //Krok 2. Spočítat SLR(1) lookaheady. //Připravíme se na počítání Read a SLR-Follow množin a pokusíme se vyřešit konflikty //pouze pomocí SLR(1) lookaheadů. //Direct Read množina pro každou neterminální hranu initDR = (trans => { BitVectorSet set = new BitVectorSet(grammar.NumTerminals); foreach (Transition nextTrans in trans.Destination.Transitions) { if (nextTrans is TerminalTransition) { set.Add(nextTrans.TransitionSymbol); } } return(set); }); read = new BitVectorSet[numNonterminalTransitions]; N_reads = new int[numNonterminalTransitions]; reads = new ReadsOracle(this); if (!forceLalr1) { getNontermIndex = (nonterm => nonterm - grammar.NumTerminals); //původní hodnota pro nějaký neterminál bude sjednocení Read množin všech hran označených //tímto neterminálem; vyplývá téměř přímo z definice výpočtu Follow množin SLR(1) parserů initSLR = (nonterm => { BitVectorSet set = new BitVectorSet(grammar.NumTerminals); foreach (NonterminalTransition trans in transitionsByNonterminals[nonterm - grammar.NumTerminals]) { if (N_reads[getTransNumber(trans)] == 0) { digraphTraverse <NonterminalTransition>(trans, N_reads, read, reads, initDR, getTransNumber); } set.UnionWith(read[getTransNumber(trans)]); } return(set); }); slr_follow = new BitVectorSet[grammar.NumNonterminals]; N_slr = new int[grammar.NumNonterminals]; slr_follows = new SLROracle(this); foreach (State state in parserStates) { if (stateResolvedAt[state.StateNumber] == LookaheadComplexity.Unresolved) { List <BitVectorSet> stateLookaheads = new List <BitVectorSet>(); foreach (Item conflictItem in conflictingItems[stateLookaheadIndex[state.StateNumber]]) { if (N_slr[getNontermIndex(conflictItem.Production.LHSSymbol)] == 0) { digraphTraverse <int>(conflictItem.Production.LHSSymbol, N_slr, slr_follow, slr_follows, initSLR, getNontermIndex); } stateLookaheads.Add(slr_follow[getNontermIndex(conflictItem.Production.LHSSymbol)]); } lookaheadSets.Add(stateLookaheads); } } } //Krok 3. Spočítat LALR(1) lookaheady. //Pokud SLR(1) lookaheady nevyřešily všechny konflikty, spočteme pro nedořešené stavy //LALR(1) lookaheady. if (forceLalr1 || checkForConflicts(LookaheadComplexity.SLR1)) { initRead = (trans => { if (N_reads[getTransNumber(trans)] == 0) { digraphTraverse <NonterminalTransition>(trans, N_reads, read, reads, initDR, getTransNumber); } return(new BitVectorSet(read[getTransNumber(trans)])); }); follow = new BitVectorSet[numNonterminalTransitions]; N_includes = new int[numNonterminalTransitions]; includes = new IncludesOracle(this); foreach (State state in parserStates) { if (stateResolvedAt[state.StateNumber] == LookaheadComplexity.Unresolved) { List <BitVectorSet> stateLookaheads = new List <BitVectorSet>(); foreach (Item conflictItem in conflictingItems[stateLookaheadIndex[state.StateNumber]]) { BitVectorSet lookaheadSet = new BitVectorSet(grammar.NumTerminals); foreach (NonterminalTransition trans in lookback(state, conflictItem)) { if (N_includes[getTransNumber(trans)] == 0) { digraphTraverse <NonterminalTransition>(trans, N_includes, follow, includes, initRead, getTransNumber); } lookaheadSet.UnionWith(follow[getTransNumber(trans)]); } stateLookaheads.Add(lookaheadSet); } //v případě, že je tohle naše první počítání lookahead množin, tak musíme //založit pro stav novou položku v seznamu lookaheadSets; v opačném případě //přepíšeme tu, kterou jsme vytvořili při počítání minulém if (forceLalr1) { lookaheadSets.Add(stateLookaheads); } else { lookaheadSets[stateLookaheadIndex[state.StateNumber]] = stateLookaheads; } } } //Krok 4. Ověřit parser //Pokud parser stále obsahuje konflikty, vypíšeme uživateli do logu podobu stavového //automatu a vyznačíme v ní konflikty. Pokud parser konflikty neobsahuje, zapíšeme //poznatky do tabulek a máme hotovo. bool reduceReduceConflicts; bool conflicts = checkForConflicts(LookaheadComplexity.LALR1, out reduceReduceConflicts); if (reduceReduceConflicts) { if (logfileName != null) { printAutomatonStates(logfileName); throw new GrammarException(string.Format("Reduce/reduce conflicts detected in the resulting parser.\r\nThe grammar isn't LALR(1).\r\nCheck the log file {0} for details.", logfileName)); } else { throw new GrammarException("Reduce/reduce conflicts detected in the resulting parser.\r\nThe grammar isn't LALR(1)."); } } else if (conflicts) { if (reportOutput != null) { printShiftReduceConflicts(reportOutput); } } } } ParserAction[,] parseTable = new ParserAction[parserStates.Count, grammar.NumTerminals]; int[,] gotoTable = new int[parserStates.Count, grammar.NumNonterminals]; for (int i = 0; i < parserStates.Count; i++) { for (int j = 0; j < grammar.NumNonterminals; j++) { gotoTable[i, j] = -1; } } for (int stateNumber = 0; stateNumber < parserStates.Count; stateNumber++) { if (stateLookaheadIndex[stateNumber] >= 0) { for (int i = 0; i < conflictingItems[stateLookaheadIndex[stateNumber]].Count; i++) { ParserAction action = new ParserAction(); action.ActionType = ParserActionType.Reduce; action.Argument = conflictingItems[stateLookaheadIndex[stateNumber]][i].Production.ProductionCode; foreach (int symbol in lookaheadSets[stateLookaheadIndex[stateNumber]][i]) { parseTable[stateNumber, symbol] = action; } } } else { foreach (Item item in parserStates[stateNumber].ItemSet) { if (item.IsFinal) { ParserAction action = new ParserAction(); action.ActionType = ParserActionType.Reduce; action.Argument = item.Production.ProductionCode; for (int symbol = 0; symbol < grammar.NumTerminals; symbol++) { parseTable[stateNumber, symbol] = action; } } } } foreach (Transition trans in parserStates[stateNumber].Transitions) { if (trans is TerminalTransition) { parseTable[stateNumber, trans.TransitionSymbol].ActionType = ParserActionType.Shift; parseTable[stateNumber, trans.TransitionSymbol].Argument = trans.Destination.StateNumber; } else { gotoTable[stateNumber, trans.TransitionSymbol - grammar.NumTerminals] = trans.Destination.StateNumber; } } } grammar.ParserData.ParseTable = parseTable; grammar.ParserData.GotoTable = gotoTable; if (explicitLogging) { printAutomatonStates(logfileName); } if (reportOutput != null) { printSuccessReport(reportOutput); } }