示例#1
0
        public void CompileGrammarCode(Grammar grammar, string compilerOptions)
        {
            // First we wrap the user's actions into methods which are supply the user's
            // code with correctly typed arguments and information about line and column
            // locations of symbols. This wrapper code also exposes a handy accessor function
            // which gives us delegates to all the actions in one handy array.
            StringBuilder codeBuilder = new StringBuilder();

            // First goes the user's header code with the possible "using" statements.
            codeBuilder.Append(grammar.GrammarCode.HeaderCode);

            codeBuilder.Append(@"
				namespace YetAnotherParserGenerator.UserGenerated
				{
				class ActionCollection
				{
				public static System.Func<object[], int[], int[], object, object>[] RetrieveActions()
				{
					return new System.Func<object[], int[], int[], object, object>[] { "                    );

            for (int i = 0; i < grammar.GrammarCode.ProductionActions.Length; i++)
            {
                if (i > 0)
                {
                    codeBuilder.Append(", ");
                }
                codeBuilder.Append(string.Format("Action{0}", i));
            }

            codeBuilder.AppendLine("}; }");

            for (int i = 0; i < grammar.GrammarCode.ProductionActions.Length; i++)
            {
                codeBuilder.AppendLine("public static object Action" + i.ToString() + "(object[] __args, int[] __lines, int[] __columns, object __state) {");

                for (int j = 0; j < grammar.Productions[i].RHSSymbols.Count; j++)
                {
                    // A terminal's value is always the string it spans in the input.
                    if (grammar.Productions[i].RHSSymbols[j] < grammar.NumTerminals)
                    {
                        codeBuilder.AppendLine(string.Format("string _{0} = (string) __args[{1}];", j + 1, j));
                    }
                    else
                    {
                        // The nonterminal value is either interpreted as the user-specified type
                        // or as a generic object if no type was further specified by the user.
                        string nonterminalType = grammar.GrammarCode.NonterminalTypes
                                                 [grammar.Productions[i].RHSSymbols[j] - grammar.NumTerminals];
                        if (nonterminalType != null)
                        {
                            codeBuilder.AppendLine(string.Format("{0} _{1} = ({0}) __args[{2}];", nonterminalType, j + 1, j));
                        }
                        else
                        {
                            codeBuilder.AppendLine(string.Format("object _{0} = __args[{1}];", j + 1, j));
                        }
                    }

                    codeBuilder.AppendLine(string.Format("int _line{0} = __lines[{1}];", j + 1, j));
                    codeBuilder.AppendLine(string.Format("int _column{0} = __columns[{1}];", j + 1, j));
                }

                if (grammar.GrammarCode.UserObjectType != null)
                {
                    codeBuilder.AppendLine(string.Format("{0} _state = ({0}) __state;", grammar.GrammarCode.UserObjectType));
                }
                else
                {
                    codeBuilder.AppendLine("object _state = __state;");
                }

                codeBuilder.Append(grammar.GrammarCode.ProductionActions[i]);

                codeBuilder.AppendLine("}");
            }

            // We close our wrapper class and namespace.
            codeBuilder.Append("} }");


            CSharpCodeProvider compiler = new CSharpCodeProvider();

            CompilerParameters cp = new CompilerParameters();

            cp.GenerateExecutable = false;
            cp.CompilerOptions    = compilerOptions;
            CompilerResults cr = compiler.CompileAssemblyFromSource(cp, codeBuilder.ToString());

            List <string> errors = new List <string>();

            foreach (CompilerError error in cr.Errors)
            {
                if (!error.IsWarning)
                {
                    errors.Add(error.ToString());
                }
            }

            if (errors.Count > 0)
            {
                throw new InvalidSpecificationException(errors);
            }

            grammar.ParserData.ActionAssemblyBytes = File.ReadAllBytes(cr.PathToAssembly);
            File.Delete(cr.PathToAssembly);
        }
示例#2
0
        public static Grammar ParseGrammar(string specificationPath, out IList <string> warningMessages)
        {
#if BOOTSTRAP
            GrammarLexer lexer = new GrammarLexer();
            lexer.SourceString = File.ReadAllText(specificationPath);
            Token token = lexer.GetNextToken();

            //sem si budeme ukládat chyby a warningy; pokud se vyskytne nějaká chyba, čteme dál a
            //až na konci vyhodíme výjimku se všemi chybami a warningami; pokud vše proběhne bez
            //závažnějších chyb, tak seznam warningů pošlem zpátky volajícímu
            List <string> errorMessages = new List <string>();
            warningMessages = new List <string>();

            //the code to be inserted at the start of the generated code
            string headerCode = null;

            //seznam jmen všech symbolů; slouží potom jako převodní tabulka z kódu symbolu na jeho jméno
            List <string> symbolNames = new List <string>();
            //"inverzní tabulka" k symbolNames, která nám pro jméno symbolu řekne jeho kód
            Dictionary <string, int> symbolCodes = new Dictionary <string, int>();

            //seznam regulárních výrazů definujících terminální symboly; netvoříme z nich rovnou výsledný
            //lexerův regex, ale ukládáme si je zvlášť, abychom v případě chyby při kompilaci celkového regexu
            //mohli jednodušše otestovat, které výrazy jsou na vině
            List <string> regexes = new List <string>();
            //pro každý výraz v regexes si pamatujeme pozici, kde jsme ho našli, abychom mohli vydat podrobnější
            //zprávu
            List <int> regexLines   = new List <int>();
            List <int> regexColumns = new List <int>();
            //pro každý výraz v regexes si také pamatujeme kód symbolu, který je popisován oním výrazem,
            //v případě, že výraz má matchovat řetězce, které chceme ignorovat, je v tomto poli hodnota -1;
            //jedná se o runtime data, která pak přímo používá náš lexer
            List <int> groupSymbolCodes = new List <int>();
            //výsledný regulární výraz, pomocí kterého lexer scanuje tokeny; druhá část runtime dat pro náš lexer
            Regex regex = null;

            //jméno pseudoterminálu, jehož tokeny se nemají posílat parseru, ale zahazovat
            string nullTerminal = "";
            //globální optiony .NETímu regex stroji (case insensitive, multiline...)
            string regexOpts = null;

            if (token.SymbolCode == CODE_HEADER)
            {
                token      = lexer.GetNextToken();
                headerCode = token.Value;
                token      = lexer.GetNextToken();
            }

            // skipping LEXER
            token = lexer.GetNextToken();

            if (token.SymbolCode == CODE_NULL)
            {
                token        = lexer.GetNextToken();
                nullTerminal = token.Value;
                token        = lexer.GetNextToken();
            }

            if (token.SymbolCode == CODE_REGEXOPTS)
            {
                regexOpts = token.Value;
                token     = lexer.GetNextToken();
            }

            symbolNames.Add("$end");
            symbolCodes["$end"] = 0;

            while (token.SymbolCode == CODE_IDENTIFIER)
            {
                string symbol = token.Value;
                token = lexer.GetNextToken();
                // skipping EQUALS
                token = lexer.GetNextToken();
                string capturingRegex = token.Value;
                token = lexer.GetNextToken();

                if (symbol == nullTerminal)
                {
                    groupSymbolCodes.Add(-1);
                }
                else
                {
                    if (!symbolCodes.ContainsKey(symbol))
                    {
                        symbolNames.Add(symbol);
                        symbolCodes[symbol] = symbolNames.Count - 1;
                    }
                    groupSymbolCodes.Add(symbolCodes[symbol]);
                }

                regexes.Add(capturingRegex);
                regexLines.Add(token.LineNumber);
                regexColumns.Add(token.ColumnNumber);
            }

            StringBuilder pattern = new StringBuilder();

            if (regexOpts != null)
            {
                pattern.Append(regexOpts);
            }

            for (int i = 0; i < regexes.Count; i++)
            {
                //všechny uživatelovi regulární výrazy oddělíme ořítky a zapíšeme je v pořadí, v jakém nám
                //je zadal (v .NETím Regex enginu mají výrazy v ořítku víc nalevo přednost) a každý výraz
                //strčíme do capture groupy pojmenované __i, kde i je pořadové číslo výrazu, počítáno od 0
                if (i != 0)
                {
                    pattern.Append('|');
                }
                pattern.AppendFormat("(?<{0}>{1})", "__" + i.ToString(), regexes[i]);
            }

            try
            {
                regex = new Regex(pattern.ToString(), RegexOptions.Compiled);
            }
            catch (ArgumentException)
            {
                try
                {
                    new Regex(regexOpts);
                }
                catch (ArgumentException)
                {
                    // FIXME: We no longer have the line and column data on regexOpts.
                    errorMessages.Add(string.Format("{0},{1}: The RegEx options are invalid.", -1, -1));
                }

                for (int i = 0; i < regexes.Count; i++)
                {
                    try
                    {
                        new Regex(regexes[i]);
                    }
                    catch (ArgumentException)
                    {
                        errorMessages.Add(string.Format("{0},{1}: This regular expression is invalid.",
                                                        regexLines[i], regexColumns[i]));
                    }
                }
            }

            int numTerminals = symbolNames.Count;

            // skipping PARSER
            token = lexer.GetNextToken();

            //neterminály, které se objevily na levé straně nějakého pravidla
            HashSet <int> reducibleNonterminals = new HashSet <int>();
            //neterminály, které se objevily na pravé straně nějakého pravidla
            HashSet <int> usedNonterminals = new HashSet <int>();

            bool[] terminalUsed = new bool[numTerminals];

            List <ProductionWithAction> productions = new List <ProductionWithAction>();

            symbolNames.Add("$start");
            symbolCodes["$start"] = numTerminals;
            //semhle dáme <$start> výjimečně, protože ani nechceme,
            //aby ho někdo dával na pravou stranu nějakého pravidla
            usedNonterminals.Add(symbolCodes["$start"]);

            // skipping START
            token = lexer.GetNextToken();
            // skipping LANGLE
            token = lexer.GetNextToken();

            string startSymbol = token.Value;

            if (symbolCodes.ContainsKey(startSymbol) && symbolCodes[startSymbol] < numTerminals)
            {
                errorMessages.Add(string.Format("{0},{1}: The nonterminal <{2}> shares it's name with a terminal symbol.",
                                                token.LineNumber, token.ColumnNumber, startSymbol));
            }

            token = lexer.GetNextToken();

            // skipping RANGLE
            token = lexer.GetNextToken();

            symbolNames.Add(startSymbol);
            symbolCodes[startSymbol] = symbolNames.Count - 1;


            string userObjectType = null;

            if (token.SymbolCode == CODE_USEROBJECT)
            {
                token = lexer.GetNextToken();

                if (token.SymbolCode == CODE_QUOTED)
                {
                    userObjectType = token.Value.Substring(1, token.Value.Length - 2);
                    token          = lexer.GetNextToken();
                }
                else
                {
                    StringBuilder typeBuilder = new StringBuilder(token.Value);
                    token = lexer.GetNextToken();
                    while (token.SymbolCode == CODE_DOT)
                    {
                        typeBuilder.Append(".");
                        token = lexer.GetNextToken();
                        typeBuilder.Append(token.Value);
                        token = lexer.GetNextToken();
                    }

                    userObjectType = typeBuilder.ToString();
                }
            }

            //naše 0. pravidlo, které výstižně popisuje způsob, jakým si gramatiku upravujeme
            productions.Add(new ProductionWithAction(new Production(
                                                         symbolCodes["$start"], new int[] { symbolCodes[startSymbol], symbolCodes["$end"] }), "{ return _1; }"));

            reducibleNonterminals.Add(symbolCodes["$start"]);
            usedNonterminals.Add(symbolCodes[startSymbol]);
            terminalUsed[symbolCodes["$end"]] = true;

            var typeMappings = new Dictionary <string, string>();

            //zpracování pravidel
            while (token.SymbolCode != CODE_END)
            {
                if (token.SymbolCode == CODE_TYPE)
                {
                    token = lexer.GetNextToken();

                    // skipping LANGLE
                    token = lexer.GetNextToken();
                    string nonterminal = token.Value;

                    if (symbolCodes.ContainsKey(nonterminal) && symbolCodes[nonterminal] < numTerminals)
                    {
                        errorMessages.Add(string.Format("{0},{1}: The nonterminal <{2}> shares it's name with a terminal symbol.",
                                                        token.LineNumber, token.ColumnNumber, nonterminal));
                    }

                    if (!symbolCodes.ContainsKey(nonterminal))
                    {
                        symbolNames.Add(nonterminal);
                        symbolCodes[nonterminal] = symbolNames.Count - 1;
                    }

                    token = lexer.GetNextToken();
                    // skipping RANGLE
                    token = lexer.GetNextToken();

                    if (token.SymbolCode == CODE_QUOTED)
                    {
                        // QUOTED
                        typeMappings.Add(nonterminal, token.Value.Substring(1, token.Value.Length - 2));
                        token = lexer.GetNextToken();
                    }
                    else
                    {
                        StringBuilder typeBuilder = new StringBuilder(token.Value);
                        token = lexer.GetNextToken();
                        while (token.SymbolCode == CODE_DOT)
                        {
                            typeBuilder.Append(".");
                            token = lexer.GetNextToken();
                            typeBuilder.Append(token.Value);
                            token = lexer.GetNextToken();
                        }

                        typeMappings.Add(nonterminal, typeBuilder.ToString());
                    }
                }
                else
                {
                    //extrahujeme symbol na levé straně a zpracujeme ho
                    // skipping LANGLE
                    token = lexer.GetNextToken();

                    string lhsSymbol = token.Value;

                    if (symbolCodes.ContainsKey(lhsSymbol) && symbolCodes[lhsSymbol] < numTerminals)
                    {
                        errorMessages.Add(string.Format("{0},{1}: The nonterminal <{2}> shares it's name with a terminal symbol.",
                                                        token.LineNumber, token.ColumnNumber, lhsSymbol));
                    }

                    if (!symbolCodes.ContainsKey(lhsSymbol))
                    {
                        symbolNames.Add(lhsSymbol);
                        symbolCodes[lhsSymbol] = symbolNames.Count - 1;
                    }

                    if (!reducibleNonterminals.Contains(symbolCodes[lhsSymbol]))
                    {
                        reducibleNonterminals.Add(symbolCodes[lhsSymbol]);
                    }

                    token = lexer.GetNextToken();

                    //skipping RANGLE
                    token = lexer.GetNextToken();


                    int lhsSymbolCode = symbolCodes[lhsSymbol];

                    //Zpracujeme výraz na pravé straně, který může sestávat z několika seznamů symbolů oddělenými
                    //ořítky. Každý z těchto seznamů pak tvoří jedno pravidlo bez ořítek.
                    while ((token.SymbolCode == CODE_DERIVES) || (token.SymbolCode == CODE_OR))
                    {
                        token = lexer.GetNextToken();
                        List <int> rhsSymbols = new List <int>();

                        while (token.SymbolCode != CODE_CODE)
                        {
                            int rhsSymbolCode = -1;
                            if (token.SymbolCode == CODE_LANGLE)
                            {
                                //skipping LANGLE
                                token = lexer.GetNextToken();

                                string rhsSymbol = token.Value;

                                if (symbolCodes.ContainsKey(rhsSymbol) && symbolCodes[rhsSymbol] < numTerminals)
                                {
                                    errorMessages.Add(string.Format("{0},{1}: The nonterminal <{2}> shares it's name with a terminal symbol.",
                                                                    token.LineNumber, token.ColumnNumber, rhsSymbol));
                                }

                                if (!symbolCodes.ContainsKey(rhsSymbol))
                                {
                                    symbolNames.Add(rhsSymbol);
                                    symbolCodes[rhsSymbol] = symbolNames.Count - 1;
                                }

                                if (!usedNonterminals.Contains(symbolCodes[rhsSymbol]))
                                {
                                    usedNonterminals.Add(symbolCodes[rhsSymbol]);
                                }

                                token = lexer.GetNextToken();

                                //skipping RANGLE
                                token = lexer.GetNextToken();

                                rhsSymbolCode = symbolCodes[rhsSymbol];
                            }
                            else
                            {
                                string rhsSymbol = token.Value;

                                if (!symbolCodes.ContainsKey(rhsSymbol))
                                {
                                    errorMessages.Add(string.Format("{0},{1}: The terminal '{2}' is used but not defined.",
                                                                    token.LineNumber, token.ColumnNumber, rhsSymbol));
                                }
                                else
                                {
                                    rhsSymbolCode = symbolCodes[rhsSymbol];
                                    terminalUsed[rhsSymbolCode] = true;
                                }

                                token = lexer.GetNextToken();
                            }

                            rhsSymbols.Add(rhsSymbolCode);
                        }

                        string code = token.Value;
                        token = lexer.GetNextToken();

                        productions.Add(new ProductionWithAction(new Production(lhsSymbolCode, rhsSymbols), code));
                    }
                }
            }

            //ToArray voláme proto, aby se líná metoda Intersect vyhodnotila a nedošlo by pak při vykonávání
            //dalšího příkazu k chybě
            int[] theGoodOnes = usedNonterminals.Intersect(reducibleNonterminals).ToArray();
            usedNonterminals.ExceptWith(theGoodOnes);
            reducibleNonterminals.ExceptWith(theGoodOnes);

            foreach (int nonterminal in usedNonterminals)
            {
                warningMessages.Add(string.Format("Warning: The nonterminal <{0}> isn't reducible.",
                                                  symbolNames[nonterminal]));
            }
            foreach (int nonterminal in reducibleNonterminals)
            {
                warningMessages.Add(string.Format("Warning: The nonterminal <{0}> is defined but never used.", symbolNames[nonterminal]));
            }

            for (int terminal = 0; terminal < numTerminals; terminal++)
            {
                if (!terminalUsed[terminal])
                {
                    warningMessages.Add(string.Format("Warning: The terminal '{0}' is defined but never used.", symbolNames[terminal]));
                }
            }


            if (errorMessages.Count > 0)
            {
                throw new InvalidSpecificationException(errorMessages.Concat(warningMessages));
            }


            //už máme vše načte a zkontrolováno, teď už jen setřídíme pravidla podle levé strany,
            //přečíslujeme je a pro každý neterminál dopočítáme indexy, na kterých začínají pravidla
            //s daným neterminálem
            Production[] productionsArray = new Production[productions.Count];
            productionsArray[0] = productions[0].Production;
            string[] actions = new string[productions.Count];
            actions[0] = productions[0].Action;
            IEnumerable <ProductionWithAction> sortedProductions =
                productions.GetRange(1, productions.Count - 1).OrderBy((prod => prod.Production.LHSSymbol));

            int k = 1;
            foreach (ProductionWithAction productionWithAction in sortedProductions)
            {
                productionsArray[k] = productionWithAction.Production;
                productionsArray[k].ProductionCode = k;
                actions[k] = productionWithAction.Action;
                k++;
            }

            int numNonterminals = symbolCodes.Count - numTerminals;

            int[] nonterminalProductionOffset = new int[numNonterminals + 1];

            int offset = 0;
            for (int nonterminal = 0; nonterminal < numNonterminals; nonterminal++)
            {
                nonterminalProductionOffset[nonterminal] = offset;
                while ((offset < productionsArray.Length) &&
                       (productionsArray[offset].LHSSymbol == numTerminals + nonterminal))
                {
                    offset++;
                }
            }
            nonterminalProductionOffset[nonterminalProductionOffset.Length - 1] = offset;

            string[] nonterminalTypes = new string[numNonterminals];
            foreach (var typeMapping in typeMappings)
            {
                nonterminalTypes[symbolCodes[typeMapping.Key] - numTerminals] = typeMapping.Value;
            }

            //a teď už to jen zabalíme a pošleme
            GrammarDefinition grammarDefinition = new GrammarDefinition(symbolNames.ToArray(), productionsArray, nonterminalProductionOffset, numTerminals);
            LexerData         lexerData         = new LexerData(regex, groupSymbolCodes);
            GrammarCode       grammarCode       = new GrammarCode(headerCode, actions, nonterminalTypes, userObjectType);
            Grammar           grammar           = new Grammar(grammarDefinition, lexerData, grammarCode);

            return(grammar);
#else
            LexerData  lexerData;
            ParserData parserData;
            Grammar.ReadRuntimeDataFromStream(
                new MemoryStream(YetAnotherParserGenerator.Properties.Resources.SpecificationGrammar),
                out lexerData, out parserData);

            GrammarLexer lexer  = new GrammarLexer();
            Parser       parser = new Parser(parserData);

            GrammarParserLocals locals = new GrammarParserLocals(specificationPath, out warningMessages);
            lexer.SourceString = File.ReadAllText(specificationPath);
            return((Grammar)parser.Parse(lexer, locals));
#endif
        }
示例#3
0
        /// <summary>
        /// Computes the ParseTable and GotoTable of a Grammar's ParserData, logging the automata's graph
        /// to a logfile should the <i>grammar</i> prove to be non-LALR(1) or should the caller explicitly
        /// state he wants a log. Any reports generated by the processor will be sent to the <i>reportOutput</i>
        /// TextWriter instance.
        /// </summary>
        /// <param name="grammar">The Grammar whose tables are to be computed. GrammarDefinition ought to be
        /// set and filled with appropriate data and ParserData should be initialized.</param>
        /// <param name="logfileName">The name of the file to which the automaton is to be logged; <b>null</b>
        /// if logging should be disabled.</param>
        /// <param name="explicitLogging">A Boolean value determining whether the automaton should be
        /// written to the logfile even though there are no inconsistencies.</param>
        /// <param name="reportOutput">The TextWriter to which the report should be written; <b>null</b>
        /// if reporting should be disabled.</param>
        public void ComputeTables(Grammar grammar, string logfileName, bool explicitLogging, TextWriter reportOutput)
        {
            // INICIALIZACE

            this.grammar = grammar;

            //inicializace a výpočet productionsByRHSNonterminals
            productionsByRHSNonterminals = new List <Production> [grammar.GrammarDefinition.NumNonterminals];
            for (int nonterminal = 0; nonterminal < productionsByRHSNonterminals.Length; nonterminal++)
            {
                productionsByRHSNonterminals[nonterminal] = new List <Production>();
            }

            foreach (Production production in grammar.Productions)
            {
                foreach (int rhsSymbol in production.RHSSymbols)
                {
                    if (rhsSymbol >= grammar.NumTerminals)
                    {
                        productionsByRHSNonterminals[rhsSymbol - grammar.NumTerminals].Add(production);
                    }
                }
            }

            //inicializace transitionsByNonterminals, hodnoty jsou do seznamů posléze nasázeny ve funkci
            //exploreTransitions, která zároveň vyrábí LR(0) automat
            transitionsByNonterminals = new List <NonterminalTransition> [grammar.NumNonterminals];
            for (int nonterminal = 0; nonterminal < grammar.NumNonterminals; nonterminal++)
            {
                transitionsByNonterminals[nonterminal] = new List <NonterminalTransition>();
            }

            numNonterminalTransitions = 0;

            conflictingItems = new List <List <Item> >();
            lookaheadSets    = new List <List <BitVectorSet> >();


            parserStates = new List <State>();

            // TVORBA LR(0) AUTOMATU

            //vytvoříme počáteční ItemSet a nastartujeme rekurzivní
            //exploreTransitions

            Item    startItem = new Item(grammar.Productions[0], 0);
            ItemSet startIS   = new ItemSet();

            startIS.Add(startItem);
            startIS.CloseItemSet(grammar);

            State initialState = new State(0, startIS);

            parserStates.Add(initialState);

            //spočítá nám parserStates, hrany mezi nimi, nonterminalTransitions (počet neterminálních hran)
            //a transitionsByNonterminals
            exploreTransitions(initialState);


            //tenhle kousek inicializace si musel počkat na dopočítání stavů automatu
            stateLookaheadIndex = new int[parserStates.Count];
            for (int i = 0; i < parserStates.Count; i++)
            {
                stateLookaheadIndex[i] = -1;
            }

            //původní hodnota Look
            stateResolvedAt = new LookaheadComplexity[parserStates.Count];


            // ŘEŠENÍ NEDETERMINISTICKÝCH STAVŮ (KONFLIKTŮ)

            numInconsistentStates = 0;

            foreach (State state in parserStates)
            {
                List <Item> finalItems = new List <Item>();
                stateResolvedAt[state.StateNumber] = LookaheadComplexity.LR0;

                foreach (Item item in state.ItemSet)
                {
                    if (item.IsFinal)
                    {
                        finalItems.Add(item);
                    }
                }

                if (finalItems.Count >= 2)
                {
                    stateLookaheadIndex[state.StateNumber] = numInconsistentStates;
                    stateResolvedAt[state.StateNumber]     = LookaheadComplexity.Unresolved;
                    numInconsistentStates++;
                    conflictingItems.Add(finalItems);
                }
                else if (finalItems.Count >= 1)
                {
                    bool canRead = false;
                    foreach (Transition trans in state.Transitions)
                    {
                        if (trans is TerminalTransition)
                        {
                            canRead = true;
                            break;
                        }
                    }
                    if (canRead)
                    {
                        stateLookaheadIndex[state.StateNumber] = numInconsistentStates;
                        stateResolvedAt[state.StateNumber]     = LookaheadComplexity.Unresolved;
                        numInconsistentStates++;
                        conflictingItems.Add(finalItems);
                    }
                }
            }

            if (numInconsistentStates > 0)
            {
                //Vstupní gramatika není LR(0), bude tedy třeba spočítat lookahead množiny pro nekonzistentní
                //stavy. Použijeme postup DeRemera a Pennella, kdy se pokusíme každý nekonzistení stav nejdříve
                //vyřešit pomocí SLR(1) lookahead množin a až poté případně přikročíme k výpočtu LALR(1) lookaheadů.

                //Krok 1. Určit, které neterminály jsou nulovatelné.

                computeNullableNonterminals();

                //Krok 2. Spočítat SLR(1) lookaheady.
                //Připravíme se na počítání Read a SLR-Follow množin a pokusíme se vyřešit konflikty
                //pouze pomocí SLR(1) lookaheadů.


                //Direct Read množina pro každou neterminální hranu
                initDR =
                    (trans =>
                {
                    BitVectorSet set = new BitVectorSet(grammar.NumTerminals);
                    foreach (Transition nextTrans in trans.Destination.Transitions)
                    {
                        if (nextTrans is TerminalTransition)
                        {
                            set.Add(nextTrans.TransitionSymbol);
                        }
                    }
                    return(set);
                });

                read    = new BitVectorSet[numNonterminalTransitions];
                N_reads = new int[numNonterminalTransitions];
                reads   = new ReadsOracle(this);

                if (!forceLalr1)
                {
                    getNontermIndex = (nonterm => nonterm - grammar.NumTerminals);

                    //původní hodnota pro nějaký neterminál bude sjednocení Read množin všech hran označených
                    //tímto neterminálem; vyplývá téměř přímo z definice výpočtu Follow množin SLR(1) parserů
                    initSLR = (nonterm =>
                    {
                        BitVectorSet set = new BitVectorSet(grammar.NumTerminals);
                        foreach (NonterminalTransition trans in transitionsByNonterminals[nonterm - grammar.NumTerminals])
                        {
                            if (N_reads[getTransNumber(trans)] == 0)
                            {
                                digraphTraverse <NonterminalTransition>(trans, N_reads, read, reads, initDR, getTransNumber);
                            }
                            set.UnionWith(read[getTransNumber(trans)]);
                        }
                        return(set);
                    });

                    slr_follow  = new BitVectorSet[grammar.NumNonterminals];
                    N_slr       = new int[grammar.NumNonterminals];
                    slr_follows = new SLROracle(this);

                    foreach (State state in parserStates)
                    {
                        if (stateResolvedAt[state.StateNumber] == LookaheadComplexity.Unresolved)
                        {
                            List <BitVectorSet> stateLookaheads = new List <BitVectorSet>();

                            foreach (Item conflictItem in conflictingItems[stateLookaheadIndex[state.StateNumber]])
                            {
                                if (N_slr[getNontermIndex(conflictItem.Production.LHSSymbol)] == 0)
                                {
                                    digraphTraverse <int>(conflictItem.Production.LHSSymbol, N_slr, slr_follow, slr_follows, initSLR, getNontermIndex);
                                }

                                stateLookaheads.Add(slr_follow[getNontermIndex(conflictItem.Production.LHSSymbol)]);
                            }

                            lookaheadSets.Add(stateLookaheads);
                        }
                    }
                }

                //Krok 3. Spočítat LALR(1) lookaheady.
                //Pokud SLR(1) lookaheady nevyřešily všechny konflikty, spočteme pro nedořešené stavy
                //LALR(1) lookaheady.

                if (forceLalr1 || checkForConflicts(LookaheadComplexity.SLR1))
                {
                    initRead = (trans =>
                    {
                        if (N_reads[getTransNumber(trans)] == 0)
                        {
                            digraphTraverse <NonterminalTransition>(trans, N_reads, read, reads, initDR, getTransNumber);
                        }
                        return(new BitVectorSet(read[getTransNumber(trans)]));
                    });

                    follow     = new BitVectorSet[numNonterminalTransitions];
                    N_includes = new int[numNonterminalTransitions];
                    includes   = new IncludesOracle(this);

                    foreach (State state in parserStates)
                    {
                        if (stateResolvedAt[state.StateNumber] == LookaheadComplexity.Unresolved)
                        {
                            List <BitVectorSet> stateLookaheads = new List <BitVectorSet>();

                            foreach (Item conflictItem in conflictingItems[stateLookaheadIndex[state.StateNumber]])
                            {
                                BitVectorSet lookaheadSet = new BitVectorSet(grammar.NumTerminals);

                                foreach (NonterminalTransition trans in lookback(state, conflictItem))
                                {
                                    if (N_includes[getTransNumber(trans)] == 0)
                                    {
                                        digraphTraverse <NonterminalTransition>(trans, N_includes, follow, includes, initRead, getTransNumber);
                                    }

                                    lookaheadSet.UnionWith(follow[getTransNumber(trans)]);
                                }

                                stateLookaheads.Add(lookaheadSet);
                            }

                            //v případě, že je tohle naše první počítání lookahead množin, tak musíme
                            //založit pro stav novou položku v seznamu lookaheadSets; v opačném případě
                            //přepíšeme tu, kterou jsme vytvořili při počítání minulém
                            if (forceLalr1)
                            {
                                lookaheadSets.Add(stateLookaheads);
                            }
                            else
                            {
                                lookaheadSets[stateLookaheadIndex[state.StateNumber]] = stateLookaheads;
                            }
                        }
                    }

                    //Krok 4. Ověřit parser
                    //Pokud parser stále obsahuje konflikty, vypíšeme uživateli do logu podobu stavového
                    //automatu a vyznačíme v ní konflikty. Pokud parser konflikty neobsahuje, zapíšeme
                    //poznatky do tabulek a máme hotovo.

                    bool reduceReduceConflicts;
                    bool conflicts = checkForConflicts(LookaheadComplexity.LALR1, out reduceReduceConflicts);
                    if (reduceReduceConflicts)
                    {
                        if (logfileName != null)
                        {
                            printAutomatonStates(logfileName);
                            throw new GrammarException(string.Format("Reduce/reduce conflicts detected in the resulting parser.\r\nThe grammar isn't LALR(1).\r\nCheck the log file {0} for details.", logfileName));
                        }
                        else
                        {
                            throw new GrammarException("Reduce/reduce conflicts detected in the resulting parser.\r\nThe grammar isn't LALR(1).");
                        }
                    }
                    else if (conflicts)
                    {
                        if (reportOutput != null)
                        {
                            printShiftReduceConflicts(reportOutput);
                        }
                    }
                }
            }

            ParserAction[,] parseTable = new ParserAction[parserStates.Count, grammar.NumTerminals];
            int[,] gotoTable           = new int[parserStates.Count, grammar.NumNonterminals];
            for (int i = 0; i < parserStates.Count; i++)
            {
                for (int j = 0; j < grammar.NumNonterminals; j++)
                {
                    gotoTable[i, j] = -1;
                }
            }

            for (int stateNumber = 0; stateNumber < parserStates.Count; stateNumber++)
            {
                if (stateLookaheadIndex[stateNumber] >= 0)
                {
                    for (int i = 0; i < conflictingItems[stateLookaheadIndex[stateNumber]].Count; i++)
                    {
                        ParserAction action = new ParserAction();
                        action.ActionType = ParserActionType.Reduce;
                        action.Argument   = conflictingItems[stateLookaheadIndex[stateNumber]][i].Production.ProductionCode;
                        foreach (int symbol in lookaheadSets[stateLookaheadIndex[stateNumber]][i])
                        {
                            parseTable[stateNumber, symbol] = action;
                        }
                    }
                }
                else
                {
                    foreach (Item item in parserStates[stateNumber].ItemSet)
                    {
                        if (item.IsFinal)
                        {
                            ParserAction action = new ParserAction();
                            action.ActionType = ParserActionType.Reduce;
                            action.Argument   = item.Production.ProductionCode;
                            for (int symbol = 0; symbol < grammar.NumTerminals; symbol++)
                            {
                                parseTable[stateNumber, symbol] = action;
                            }
                        }
                    }
                }

                foreach (Transition trans in parserStates[stateNumber].Transitions)
                {
                    if (trans is TerminalTransition)
                    {
                        parseTable[stateNumber, trans.TransitionSymbol].ActionType = ParserActionType.Shift;
                        parseTable[stateNumber, trans.TransitionSymbol].Argument   = trans.Destination.StateNumber;
                    }
                    else
                    {
                        gotoTable[stateNumber, trans.TransitionSymbol - grammar.NumTerminals] = trans.Destination.StateNumber;
                    }
                }
            }

            grammar.ParserData.ParseTable = parseTable;
            grammar.ParserData.GotoTable  = gotoTable;

            if (explicitLogging)
            {
                printAutomatonStates(logfileName);
            }

            if (reportOutput != null)
            {
                printSuccessReport(reportOutput);
            }
        }
示例#4
0
 /// <summary>
 /// Computes the ParseTable and GotoTable of a Grammar's ParserData, logging the automata's graph
 /// to a logfile should the <i>grammar</i> prove to be non-LALR(1) or should the caller explicitly
 /// state he wants a log.
 /// </summary>
 /// <param name="grammar">The Grammar whose tables are to be computed. GrammarDefinition ought to be
 /// set and filled with appropriate data and ParserData should be initialized.</param>
 /// <param name="logfileName">The name of the file to which the automaton is to be logged.</param>
 /// <param name="explicitLogging">A Boolean value determining whether the automaton should be
 /// written to the logfile even though there are no inconsistencies.</param>
 public void ComputeTables(Grammar grammar, string logfileName, bool explicitLogging)
 {
     ComputeTables(grammar, logfileName, explicitLogging, null);
 }
示例#5
0
 /// <summary>
 /// Computes the ParseTable and GotoTable of a Grammar's ParserData, logging the automata's graph
 /// to a logfile should the <i>grammar</i> prove to be non-LALR(1).
 /// </summary>
 /// <param name="grammar">The Grammar whose tables are to be computed. GrammarDefinition ought to be
 /// set and filled with appropriate data and ParserData should be initialized.</param>
 /// <param name="logfileName">The name of the file to which the nondeterministic automaton
 /// is to be written in case the <i>grammar</i> is not LALR(1).</param>
 public void ComputeTables(Grammar grammar, string logfileName)
 {
     ComputeTables(grammar, logfileName, false);
 }
示例#6
0
 /// <summary>
 /// Computes the ParseTable and GotoTable of a Grammar's ParserData.
 /// </summary>
 /// <param name="grammar">The Grammar whose tables are to be computed. GrammarDefinition ought to be
 /// set and filled with appropriate data and ParserData should be initialized.</param>
 public void ComputeTables(Grammar grammar)
 {
     ComputeTables(grammar, null);
 }