public void CompileGrammarCode(Grammar grammar, string compilerOptions) { // First we wrap the user's actions into methods which are supply the user's // code with correctly typed arguments and information about line and column // locations of symbols. This wrapper code also exposes a handy accessor function // which gives us delegates to all the actions in one handy array. StringBuilder codeBuilder = new StringBuilder(); // First goes the user's header code with the possible "using" statements. codeBuilder.Append(grammar.GrammarCode.HeaderCode); codeBuilder.Append(@" namespace YetAnotherParserGenerator.UserGenerated { class ActionCollection { public static System.Func<object[], int[], int[], object, object>[] RetrieveActions() { return new System.Func<object[], int[], int[], object, object>[] { " ); for (int i = 0; i < grammar.GrammarCode.ProductionActions.Length; i++) { if (i > 0) { codeBuilder.Append(", "); } codeBuilder.Append(string.Format("Action{0}", i)); } codeBuilder.AppendLine("}; }"); for (int i = 0; i < grammar.GrammarCode.ProductionActions.Length; i++) { codeBuilder.AppendLine("public static object Action" + i.ToString() + "(object[] __args, int[] __lines, int[] __columns, object __state) {"); for (int j = 0; j < grammar.Productions[i].RHSSymbols.Count; j++) { // A terminal's value is always the string it spans in the input. if (grammar.Productions[i].RHSSymbols[j] < grammar.NumTerminals) { codeBuilder.AppendLine(string.Format("string _{0} = (string) __args[{1}];", j + 1, j)); } else { // The nonterminal value is either interpreted as the user-specified type // or as a generic object if no type was further specified by the user. string nonterminalType = grammar.GrammarCode.NonterminalTypes [grammar.Productions[i].RHSSymbols[j] - grammar.NumTerminals]; if (nonterminalType != null) { codeBuilder.AppendLine(string.Format("{0} _{1} = ({0}) __args[{2}];", nonterminalType, j + 1, j)); } else { codeBuilder.AppendLine(string.Format("object _{0} = __args[{1}];", j + 1, j)); } } codeBuilder.AppendLine(string.Format("int _line{0} = __lines[{1}];", j + 1, j)); codeBuilder.AppendLine(string.Format("int _column{0} = __columns[{1}];", j + 1, j)); } if (grammar.GrammarCode.UserObjectType != null) { codeBuilder.AppendLine(string.Format("{0} _state = ({0}) __state;", grammar.GrammarCode.UserObjectType)); } else { codeBuilder.AppendLine("object _state = __state;"); } codeBuilder.Append(grammar.GrammarCode.ProductionActions[i]); codeBuilder.AppendLine("}"); } // We close our wrapper class and namespace. codeBuilder.Append("} }"); CSharpCodeProvider compiler = new CSharpCodeProvider(); CompilerParameters cp = new CompilerParameters(); cp.GenerateExecutable = false; cp.CompilerOptions = compilerOptions; CompilerResults cr = compiler.CompileAssemblyFromSource(cp, codeBuilder.ToString()); List <string> errors = new List <string>(); foreach (CompilerError error in cr.Errors) { if (!error.IsWarning) { errors.Add(error.ToString()); } } if (errors.Count > 0) { throw new InvalidSpecificationException(errors); } grammar.ParserData.ActionAssemblyBytes = File.ReadAllBytes(cr.PathToAssembly); File.Delete(cr.PathToAssembly); }
public static Grammar ParseGrammar(string specificationPath, out IList <string> warningMessages) { #if BOOTSTRAP GrammarLexer lexer = new GrammarLexer(); lexer.SourceString = File.ReadAllText(specificationPath); Token token = lexer.GetNextToken(); //sem si budeme ukládat chyby a warningy; pokud se vyskytne nějaká chyba, čteme dál a //až na konci vyhodíme výjimku se všemi chybami a warningami; pokud vše proběhne bez //závažnějších chyb, tak seznam warningů pošlem zpátky volajícímu List <string> errorMessages = new List <string>(); warningMessages = new List <string>(); //the code to be inserted at the start of the generated code string headerCode = null; //seznam jmen všech symbolů; slouží potom jako převodní tabulka z kódu symbolu na jeho jméno List <string> symbolNames = new List <string>(); //"inverzní tabulka" k symbolNames, která nám pro jméno symbolu řekne jeho kód Dictionary <string, int> symbolCodes = new Dictionary <string, int>(); //seznam regulárních výrazů definujících terminální symboly; netvoříme z nich rovnou výsledný //lexerův regex, ale ukládáme si je zvlášť, abychom v případě chyby při kompilaci celkového regexu //mohli jednodušše otestovat, které výrazy jsou na vině List <string> regexes = new List <string>(); //pro každý výraz v regexes si pamatujeme pozici, kde jsme ho našli, abychom mohli vydat podrobnější //zprávu List <int> regexLines = new List <int>(); List <int> regexColumns = new List <int>(); //pro každý výraz v regexes si také pamatujeme kód symbolu, který je popisován oním výrazem, //v případě, že výraz má matchovat řetězce, které chceme ignorovat, je v tomto poli hodnota -1; //jedná se o runtime data, která pak přímo používá náš lexer List <int> groupSymbolCodes = new List <int>(); //výsledný regulární výraz, pomocí kterého lexer scanuje tokeny; druhá část runtime dat pro náš lexer Regex regex = null; //jméno pseudoterminálu, jehož tokeny se nemají posílat parseru, ale zahazovat string nullTerminal = ""; //globální optiony .NETímu regex stroji (case insensitive, multiline...) string regexOpts = null; if (token.SymbolCode == CODE_HEADER) { token = lexer.GetNextToken(); headerCode = token.Value; token = lexer.GetNextToken(); } // skipping LEXER token = lexer.GetNextToken(); if (token.SymbolCode == CODE_NULL) { token = lexer.GetNextToken(); nullTerminal = token.Value; token = lexer.GetNextToken(); } if (token.SymbolCode == CODE_REGEXOPTS) { regexOpts = token.Value; token = lexer.GetNextToken(); } symbolNames.Add("$end"); symbolCodes["$end"] = 0; while (token.SymbolCode == CODE_IDENTIFIER) { string symbol = token.Value; token = lexer.GetNextToken(); // skipping EQUALS token = lexer.GetNextToken(); string capturingRegex = token.Value; token = lexer.GetNextToken(); if (symbol == nullTerminal) { groupSymbolCodes.Add(-1); } else { if (!symbolCodes.ContainsKey(symbol)) { symbolNames.Add(symbol); symbolCodes[symbol] = symbolNames.Count - 1; } groupSymbolCodes.Add(symbolCodes[symbol]); } regexes.Add(capturingRegex); regexLines.Add(token.LineNumber); regexColumns.Add(token.ColumnNumber); } StringBuilder pattern = new StringBuilder(); if (regexOpts != null) { pattern.Append(regexOpts); } for (int i = 0; i < regexes.Count; i++) { //všechny uživatelovi regulární výrazy oddělíme ořítky a zapíšeme je v pořadí, v jakém nám //je zadal (v .NETím Regex enginu mají výrazy v ořítku víc nalevo přednost) a každý výraz //strčíme do capture groupy pojmenované __i, kde i je pořadové číslo výrazu, počítáno od 0 if (i != 0) { pattern.Append('|'); } pattern.AppendFormat("(?<{0}>{1})", "__" + i.ToString(), regexes[i]); } try { regex = new Regex(pattern.ToString(), RegexOptions.Compiled); } catch (ArgumentException) { try { new Regex(regexOpts); } catch (ArgumentException) { // FIXME: We no longer have the line and column data on regexOpts. errorMessages.Add(string.Format("{0},{1}: The RegEx options are invalid.", -1, -1)); } for (int i = 0; i < regexes.Count; i++) { try { new Regex(regexes[i]); } catch (ArgumentException) { errorMessages.Add(string.Format("{0},{1}: This regular expression is invalid.", regexLines[i], regexColumns[i])); } } } int numTerminals = symbolNames.Count; // skipping PARSER token = lexer.GetNextToken(); //neterminály, které se objevily na levé straně nějakého pravidla HashSet <int> reducibleNonterminals = new HashSet <int>(); //neterminály, které se objevily na pravé straně nějakého pravidla HashSet <int> usedNonterminals = new HashSet <int>(); bool[] terminalUsed = new bool[numTerminals]; List <ProductionWithAction> productions = new List <ProductionWithAction>(); symbolNames.Add("$start"); symbolCodes["$start"] = numTerminals; //semhle dáme <$start> výjimečně, protože ani nechceme, //aby ho někdo dával na pravou stranu nějakého pravidla usedNonterminals.Add(symbolCodes["$start"]); // skipping START token = lexer.GetNextToken(); // skipping LANGLE token = lexer.GetNextToken(); string startSymbol = token.Value; if (symbolCodes.ContainsKey(startSymbol) && symbolCodes[startSymbol] < numTerminals) { errorMessages.Add(string.Format("{0},{1}: The nonterminal <{2}> shares it's name with a terminal symbol.", token.LineNumber, token.ColumnNumber, startSymbol)); } token = lexer.GetNextToken(); // skipping RANGLE token = lexer.GetNextToken(); symbolNames.Add(startSymbol); symbolCodes[startSymbol] = symbolNames.Count - 1; string userObjectType = null; if (token.SymbolCode == CODE_USEROBJECT) { token = lexer.GetNextToken(); if (token.SymbolCode == CODE_QUOTED) { userObjectType = token.Value.Substring(1, token.Value.Length - 2); token = lexer.GetNextToken(); } else { StringBuilder typeBuilder = new StringBuilder(token.Value); token = lexer.GetNextToken(); while (token.SymbolCode == CODE_DOT) { typeBuilder.Append("."); token = lexer.GetNextToken(); typeBuilder.Append(token.Value); token = lexer.GetNextToken(); } userObjectType = typeBuilder.ToString(); } } //naše 0. pravidlo, které výstižně popisuje způsob, jakým si gramatiku upravujeme productions.Add(new ProductionWithAction(new Production( symbolCodes["$start"], new int[] { symbolCodes[startSymbol], symbolCodes["$end"] }), "{ return _1; }")); reducibleNonterminals.Add(symbolCodes["$start"]); usedNonterminals.Add(symbolCodes[startSymbol]); terminalUsed[symbolCodes["$end"]] = true; var typeMappings = new Dictionary <string, string>(); //zpracování pravidel while (token.SymbolCode != CODE_END) { if (token.SymbolCode == CODE_TYPE) { token = lexer.GetNextToken(); // skipping LANGLE token = lexer.GetNextToken(); string nonterminal = token.Value; if (symbolCodes.ContainsKey(nonterminal) && symbolCodes[nonterminal] < numTerminals) { errorMessages.Add(string.Format("{0},{1}: The nonterminal <{2}> shares it's name with a terminal symbol.", token.LineNumber, token.ColumnNumber, nonterminal)); } if (!symbolCodes.ContainsKey(nonterminal)) { symbolNames.Add(nonterminal); symbolCodes[nonterminal] = symbolNames.Count - 1; } token = lexer.GetNextToken(); // skipping RANGLE token = lexer.GetNextToken(); if (token.SymbolCode == CODE_QUOTED) { // QUOTED typeMappings.Add(nonterminal, token.Value.Substring(1, token.Value.Length - 2)); token = lexer.GetNextToken(); } else { StringBuilder typeBuilder = new StringBuilder(token.Value); token = lexer.GetNextToken(); while (token.SymbolCode == CODE_DOT) { typeBuilder.Append("."); token = lexer.GetNextToken(); typeBuilder.Append(token.Value); token = lexer.GetNextToken(); } typeMappings.Add(nonterminal, typeBuilder.ToString()); } } else { //extrahujeme symbol na levé straně a zpracujeme ho // skipping LANGLE token = lexer.GetNextToken(); string lhsSymbol = token.Value; if (symbolCodes.ContainsKey(lhsSymbol) && symbolCodes[lhsSymbol] < numTerminals) { errorMessages.Add(string.Format("{0},{1}: The nonterminal <{2}> shares it's name with a terminal symbol.", token.LineNumber, token.ColumnNumber, lhsSymbol)); } if (!symbolCodes.ContainsKey(lhsSymbol)) { symbolNames.Add(lhsSymbol); symbolCodes[lhsSymbol] = symbolNames.Count - 1; } if (!reducibleNonterminals.Contains(symbolCodes[lhsSymbol])) { reducibleNonterminals.Add(symbolCodes[lhsSymbol]); } token = lexer.GetNextToken(); //skipping RANGLE token = lexer.GetNextToken(); int lhsSymbolCode = symbolCodes[lhsSymbol]; //Zpracujeme výraz na pravé straně, který může sestávat z několika seznamů symbolů oddělenými //ořítky. Každý z těchto seznamů pak tvoří jedno pravidlo bez ořítek. while ((token.SymbolCode == CODE_DERIVES) || (token.SymbolCode == CODE_OR)) { token = lexer.GetNextToken(); List <int> rhsSymbols = new List <int>(); while (token.SymbolCode != CODE_CODE) { int rhsSymbolCode = -1; if (token.SymbolCode == CODE_LANGLE) { //skipping LANGLE token = lexer.GetNextToken(); string rhsSymbol = token.Value; if (symbolCodes.ContainsKey(rhsSymbol) && symbolCodes[rhsSymbol] < numTerminals) { errorMessages.Add(string.Format("{0},{1}: The nonterminal <{2}> shares it's name with a terminal symbol.", token.LineNumber, token.ColumnNumber, rhsSymbol)); } if (!symbolCodes.ContainsKey(rhsSymbol)) { symbolNames.Add(rhsSymbol); symbolCodes[rhsSymbol] = symbolNames.Count - 1; } if (!usedNonterminals.Contains(symbolCodes[rhsSymbol])) { usedNonterminals.Add(symbolCodes[rhsSymbol]); } token = lexer.GetNextToken(); //skipping RANGLE token = lexer.GetNextToken(); rhsSymbolCode = symbolCodes[rhsSymbol]; } else { string rhsSymbol = token.Value; if (!symbolCodes.ContainsKey(rhsSymbol)) { errorMessages.Add(string.Format("{0},{1}: The terminal '{2}' is used but not defined.", token.LineNumber, token.ColumnNumber, rhsSymbol)); } else { rhsSymbolCode = symbolCodes[rhsSymbol]; terminalUsed[rhsSymbolCode] = true; } token = lexer.GetNextToken(); } rhsSymbols.Add(rhsSymbolCode); } string code = token.Value; token = lexer.GetNextToken(); productions.Add(new ProductionWithAction(new Production(lhsSymbolCode, rhsSymbols), code)); } } } //ToArray voláme proto, aby se líná metoda Intersect vyhodnotila a nedošlo by pak při vykonávání //dalšího příkazu k chybě int[] theGoodOnes = usedNonterminals.Intersect(reducibleNonterminals).ToArray(); usedNonterminals.ExceptWith(theGoodOnes); reducibleNonterminals.ExceptWith(theGoodOnes); foreach (int nonterminal in usedNonterminals) { warningMessages.Add(string.Format("Warning: The nonterminal <{0}> isn't reducible.", symbolNames[nonterminal])); } foreach (int nonterminal in reducibleNonterminals) { warningMessages.Add(string.Format("Warning: The nonterminal <{0}> is defined but never used.", symbolNames[nonterminal])); } for (int terminal = 0; terminal < numTerminals; terminal++) { if (!terminalUsed[terminal]) { warningMessages.Add(string.Format("Warning: The terminal '{0}' is defined but never used.", symbolNames[terminal])); } } if (errorMessages.Count > 0) { throw new InvalidSpecificationException(errorMessages.Concat(warningMessages)); } //už máme vše načte a zkontrolováno, teď už jen setřídíme pravidla podle levé strany, //přečíslujeme je a pro každý neterminál dopočítáme indexy, na kterých začínají pravidla //s daným neterminálem Production[] productionsArray = new Production[productions.Count]; productionsArray[0] = productions[0].Production; string[] actions = new string[productions.Count]; actions[0] = productions[0].Action; IEnumerable <ProductionWithAction> sortedProductions = productions.GetRange(1, productions.Count - 1).OrderBy((prod => prod.Production.LHSSymbol)); int k = 1; foreach (ProductionWithAction productionWithAction in sortedProductions) { productionsArray[k] = productionWithAction.Production; productionsArray[k].ProductionCode = k; actions[k] = productionWithAction.Action; k++; } int numNonterminals = symbolCodes.Count - numTerminals; int[] nonterminalProductionOffset = new int[numNonterminals + 1]; int offset = 0; for (int nonterminal = 0; nonterminal < numNonterminals; nonterminal++) { nonterminalProductionOffset[nonterminal] = offset; while ((offset < productionsArray.Length) && (productionsArray[offset].LHSSymbol == numTerminals + nonterminal)) { offset++; } } nonterminalProductionOffset[nonterminalProductionOffset.Length - 1] = offset; string[] nonterminalTypes = new string[numNonterminals]; foreach (var typeMapping in typeMappings) { nonterminalTypes[symbolCodes[typeMapping.Key] - numTerminals] = typeMapping.Value; } //a teď už to jen zabalíme a pošleme GrammarDefinition grammarDefinition = new GrammarDefinition(symbolNames.ToArray(), productionsArray, nonterminalProductionOffset, numTerminals); LexerData lexerData = new LexerData(regex, groupSymbolCodes); GrammarCode grammarCode = new GrammarCode(headerCode, actions, nonterminalTypes, userObjectType); Grammar grammar = new Grammar(grammarDefinition, lexerData, grammarCode); return(grammar); #else LexerData lexerData; ParserData parserData; Grammar.ReadRuntimeDataFromStream( new MemoryStream(YetAnotherParserGenerator.Properties.Resources.SpecificationGrammar), out lexerData, out parserData); GrammarLexer lexer = new GrammarLexer(); Parser parser = new Parser(parserData); GrammarParserLocals locals = new GrammarParserLocals(specificationPath, out warningMessages); lexer.SourceString = File.ReadAllText(specificationPath); return((Grammar)parser.Parse(lexer, locals)); #endif }
/// <summary> /// Computes the ParseTable and GotoTable of a Grammar's ParserData, logging the automata's graph /// to a logfile should the <i>grammar</i> prove to be non-LALR(1) or should the caller explicitly /// state he wants a log. Any reports generated by the processor will be sent to the <i>reportOutput</i> /// TextWriter instance. /// </summary> /// <param name="grammar">The Grammar whose tables are to be computed. GrammarDefinition ought to be /// set and filled with appropriate data and ParserData should be initialized.</param> /// <param name="logfileName">The name of the file to which the automaton is to be logged; <b>null</b> /// if logging should be disabled.</param> /// <param name="explicitLogging">A Boolean value determining whether the automaton should be /// written to the logfile even though there are no inconsistencies.</param> /// <param name="reportOutput">The TextWriter to which the report should be written; <b>null</b> /// if reporting should be disabled.</param> public void ComputeTables(Grammar grammar, string logfileName, bool explicitLogging, TextWriter reportOutput) { // INICIALIZACE this.grammar = grammar; //inicializace a výpočet productionsByRHSNonterminals productionsByRHSNonterminals = new List <Production> [grammar.GrammarDefinition.NumNonterminals]; for (int nonterminal = 0; nonterminal < productionsByRHSNonterminals.Length; nonterminal++) { productionsByRHSNonterminals[nonterminal] = new List <Production>(); } foreach (Production production in grammar.Productions) { foreach (int rhsSymbol in production.RHSSymbols) { if (rhsSymbol >= grammar.NumTerminals) { productionsByRHSNonterminals[rhsSymbol - grammar.NumTerminals].Add(production); } } } //inicializace transitionsByNonterminals, hodnoty jsou do seznamů posléze nasázeny ve funkci //exploreTransitions, která zároveň vyrábí LR(0) automat transitionsByNonterminals = new List <NonterminalTransition> [grammar.NumNonterminals]; for (int nonterminal = 0; nonterminal < grammar.NumNonterminals; nonterminal++) { transitionsByNonterminals[nonterminal] = new List <NonterminalTransition>(); } numNonterminalTransitions = 0; conflictingItems = new List <List <Item> >(); lookaheadSets = new List <List <BitVectorSet> >(); parserStates = new List <State>(); // TVORBA LR(0) AUTOMATU //vytvoříme počáteční ItemSet a nastartujeme rekurzivní //exploreTransitions Item startItem = new Item(grammar.Productions[0], 0); ItemSet startIS = new ItemSet(); startIS.Add(startItem); startIS.CloseItemSet(grammar); State initialState = new State(0, startIS); parserStates.Add(initialState); //spočítá nám parserStates, hrany mezi nimi, nonterminalTransitions (počet neterminálních hran) //a transitionsByNonterminals exploreTransitions(initialState); //tenhle kousek inicializace si musel počkat na dopočítání stavů automatu stateLookaheadIndex = new int[parserStates.Count]; for (int i = 0; i < parserStates.Count; i++) { stateLookaheadIndex[i] = -1; } //původní hodnota Look stateResolvedAt = new LookaheadComplexity[parserStates.Count]; // ŘEŠENÍ NEDETERMINISTICKÝCH STAVŮ (KONFLIKTŮ) numInconsistentStates = 0; foreach (State state in parserStates) { List <Item> finalItems = new List <Item>(); stateResolvedAt[state.StateNumber] = LookaheadComplexity.LR0; foreach (Item item in state.ItemSet) { if (item.IsFinal) { finalItems.Add(item); } } if (finalItems.Count >= 2) { stateLookaheadIndex[state.StateNumber] = numInconsistentStates; stateResolvedAt[state.StateNumber] = LookaheadComplexity.Unresolved; numInconsistentStates++; conflictingItems.Add(finalItems); } else if (finalItems.Count >= 1) { bool canRead = false; foreach (Transition trans in state.Transitions) { if (trans is TerminalTransition) { canRead = true; break; } } if (canRead) { stateLookaheadIndex[state.StateNumber] = numInconsistentStates; stateResolvedAt[state.StateNumber] = LookaheadComplexity.Unresolved; numInconsistentStates++; conflictingItems.Add(finalItems); } } } if (numInconsistentStates > 0) { //Vstupní gramatika není LR(0), bude tedy třeba spočítat lookahead množiny pro nekonzistentní //stavy. Použijeme postup DeRemera a Pennella, kdy se pokusíme každý nekonzistení stav nejdříve //vyřešit pomocí SLR(1) lookahead množin a až poté případně přikročíme k výpočtu LALR(1) lookaheadů. //Krok 1. Určit, které neterminály jsou nulovatelné. computeNullableNonterminals(); //Krok 2. Spočítat SLR(1) lookaheady. //Připravíme se na počítání Read a SLR-Follow množin a pokusíme se vyřešit konflikty //pouze pomocí SLR(1) lookaheadů. //Direct Read množina pro každou neterminální hranu initDR = (trans => { BitVectorSet set = new BitVectorSet(grammar.NumTerminals); foreach (Transition nextTrans in trans.Destination.Transitions) { if (nextTrans is TerminalTransition) { set.Add(nextTrans.TransitionSymbol); } } return(set); }); read = new BitVectorSet[numNonterminalTransitions]; N_reads = new int[numNonterminalTransitions]; reads = new ReadsOracle(this); if (!forceLalr1) { getNontermIndex = (nonterm => nonterm - grammar.NumTerminals); //původní hodnota pro nějaký neterminál bude sjednocení Read množin všech hran označených //tímto neterminálem; vyplývá téměř přímo z definice výpočtu Follow množin SLR(1) parserů initSLR = (nonterm => { BitVectorSet set = new BitVectorSet(grammar.NumTerminals); foreach (NonterminalTransition trans in transitionsByNonterminals[nonterm - grammar.NumTerminals]) { if (N_reads[getTransNumber(trans)] == 0) { digraphTraverse <NonterminalTransition>(trans, N_reads, read, reads, initDR, getTransNumber); } set.UnionWith(read[getTransNumber(trans)]); } return(set); }); slr_follow = new BitVectorSet[grammar.NumNonterminals]; N_slr = new int[grammar.NumNonterminals]; slr_follows = new SLROracle(this); foreach (State state in parserStates) { if (stateResolvedAt[state.StateNumber] == LookaheadComplexity.Unresolved) { List <BitVectorSet> stateLookaheads = new List <BitVectorSet>(); foreach (Item conflictItem in conflictingItems[stateLookaheadIndex[state.StateNumber]]) { if (N_slr[getNontermIndex(conflictItem.Production.LHSSymbol)] == 0) { digraphTraverse <int>(conflictItem.Production.LHSSymbol, N_slr, slr_follow, slr_follows, initSLR, getNontermIndex); } stateLookaheads.Add(slr_follow[getNontermIndex(conflictItem.Production.LHSSymbol)]); } lookaheadSets.Add(stateLookaheads); } } } //Krok 3. Spočítat LALR(1) lookaheady. //Pokud SLR(1) lookaheady nevyřešily všechny konflikty, spočteme pro nedořešené stavy //LALR(1) lookaheady. if (forceLalr1 || checkForConflicts(LookaheadComplexity.SLR1)) { initRead = (trans => { if (N_reads[getTransNumber(trans)] == 0) { digraphTraverse <NonterminalTransition>(trans, N_reads, read, reads, initDR, getTransNumber); } return(new BitVectorSet(read[getTransNumber(trans)])); }); follow = new BitVectorSet[numNonterminalTransitions]; N_includes = new int[numNonterminalTransitions]; includes = new IncludesOracle(this); foreach (State state in parserStates) { if (stateResolvedAt[state.StateNumber] == LookaheadComplexity.Unresolved) { List <BitVectorSet> stateLookaheads = new List <BitVectorSet>(); foreach (Item conflictItem in conflictingItems[stateLookaheadIndex[state.StateNumber]]) { BitVectorSet lookaheadSet = new BitVectorSet(grammar.NumTerminals); foreach (NonterminalTransition trans in lookback(state, conflictItem)) { if (N_includes[getTransNumber(trans)] == 0) { digraphTraverse <NonterminalTransition>(trans, N_includes, follow, includes, initRead, getTransNumber); } lookaheadSet.UnionWith(follow[getTransNumber(trans)]); } stateLookaheads.Add(lookaheadSet); } //v případě, že je tohle naše první počítání lookahead množin, tak musíme //založit pro stav novou položku v seznamu lookaheadSets; v opačném případě //přepíšeme tu, kterou jsme vytvořili při počítání minulém if (forceLalr1) { lookaheadSets.Add(stateLookaheads); } else { lookaheadSets[stateLookaheadIndex[state.StateNumber]] = stateLookaheads; } } } //Krok 4. Ověřit parser //Pokud parser stále obsahuje konflikty, vypíšeme uživateli do logu podobu stavového //automatu a vyznačíme v ní konflikty. Pokud parser konflikty neobsahuje, zapíšeme //poznatky do tabulek a máme hotovo. bool reduceReduceConflicts; bool conflicts = checkForConflicts(LookaheadComplexity.LALR1, out reduceReduceConflicts); if (reduceReduceConflicts) { if (logfileName != null) { printAutomatonStates(logfileName); throw new GrammarException(string.Format("Reduce/reduce conflicts detected in the resulting parser.\r\nThe grammar isn't LALR(1).\r\nCheck the log file {0} for details.", logfileName)); } else { throw new GrammarException("Reduce/reduce conflicts detected in the resulting parser.\r\nThe grammar isn't LALR(1)."); } } else if (conflicts) { if (reportOutput != null) { printShiftReduceConflicts(reportOutput); } } } } ParserAction[,] parseTable = new ParserAction[parserStates.Count, grammar.NumTerminals]; int[,] gotoTable = new int[parserStates.Count, grammar.NumNonterminals]; for (int i = 0; i < parserStates.Count; i++) { for (int j = 0; j < grammar.NumNonterminals; j++) { gotoTable[i, j] = -1; } } for (int stateNumber = 0; stateNumber < parserStates.Count; stateNumber++) { if (stateLookaheadIndex[stateNumber] >= 0) { for (int i = 0; i < conflictingItems[stateLookaheadIndex[stateNumber]].Count; i++) { ParserAction action = new ParserAction(); action.ActionType = ParserActionType.Reduce; action.Argument = conflictingItems[stateLookaheadIndex[stateNumber]][i].Production.ProductionCode; foreach (int symbol in lookaheadSets[stateLookaheadIndex[stateNumber]][i]) { parseTable[stateNumber, symbol] = action; } } } else { foreach (Item item in parserStates[stateNumber].ItemSet) { if (item.IsFinal) { ParserAction action = new ParserAction(); action.ActionType = ParserActionType.Reduce; action.Argument = item.Production.ProductionCode; for (int symbol = 0; symbol < grammar.NumTerminals; symbol++) { parseTable[stateNumber, symbol] = action; } } } } foreach (Transition trans in parserStates[stateNumber].Transitions) { if (trans is TerminalTransition) { parseTable[stateNumber, trans.TransitionSymbol].ActionType = ParserActionType.Shift; parseTable[stateNumber, trans.TransitionSymbol].Argument = trans.Destination.StateNumber; } else { gotoTable[stateNumber, trans.TransitionSymbol - grammar.NumTerminals] = trans.Destination.StateNumber; } } } grammar.ParserData.ParseTable = parseTable; grammar.ParserData.GotoTable = gotoTable; if (explicitLogging) { printAutomatonStates(logfileName); } if (reportOutput != null) { printSuccessReport(reportOutput); } }
/// <summary> /// Computes the ParseTable and GotoTable of a Grammar's ParserData, logging the automata's graph /// to a logfile should the <i>grammar</i> prove to be non-LALR(1) or should the caller explicitly /// state he wants a log. /// </summary> /// <param name="grammar">The Grammar whose tables are to be computed. GrammarDefinition ought to be /// set and filled with appropriate data and ParserData should be initialized.</param> /// <param name="logfileName">The name of the file to which the automaton is to be logged.</param> /// <param name="explicitLogging">A Boolean value determining whether the automaton should be /// written to the logfile even though there are no inconsistencies.</param> public void ComputeTables(Grammar grammar, string logfileName, bool explicitLogging) { ComputeTables(grammar, logfileName, explicitLogging, null); }
/// <summary> /// Computes the ParseTable and GotoTable of a Grammar's ParserData, logging the automata's graph /// to a logfile should the <i>grammar</i> prove to be non-LALR(1). /// </summary> /// <param name="grammar">The Grammar whose tables are to be computed. GrammarDefinition ought to be /// set and filled with appropriate data and ParserData should be initialized.</param> /// <param name="logfileName">The name of the file to which the nondeterministic automaton /// is to be written in case the <i>grammar</i> is not LALR(1).</param> public void ComputeTables(Grammar grammar, string logfileName) { ComputeTables(grammar, logfileName, false); }
/// <summary> /// Computes the ParseTable and GotoTable of a Grammar's ParserData. /// </summary> /// <param name="grammar">The Grammar whose tables are to be computed. GrammarDefinition ought to be /// set and filled with appropriate data and ParserData should be initialized.</param> public void ComputeTables(Grammar grammar) { ComputeTables(grammar, null); }