示例#1
0
文件: Lexer.cs 项目: jirkamarsik/yapg
        /// <summary>
        /// Fetches the next token from the source string.
        /// </summary>
        /// <returns>The next token discovered in the input string.</returns>
        public Token GetNextToken()
        {
            if (position < input.Length)
            {
                int matchLength = 0;
                int symbolCode = -2;
                int endOfLine;

                // HEADER_CODE, the code that is to be put at the beginning of the
                // generated source file
                if (previousSymbolCode == GrammarParser.CODE_HEADER) {
                    symbolCode = GrammarParser.CODE_HEADERCODE;
                    var endOfHeaderMatch = re_endOfHeader.Match(input, position);
                    if (!endOfHeaderMatch.Success)
                        matchLength = input.Length - position;
                    else
                        matchLength = endOfHeaderMatch.Index - position;
                // REGEX, a regular expression defining a token
                } else if (previousSymbolCode == GrammarParser.CODE_EQUALS) {
                    symbolCode = GrammarParser.CODE_REGEX;
                    endOfLine = input.IndexOf('\n', position);
                    if (endOfLine == -1)
                        matchLength = input.Length - position;
                    else
                        matchLength = endOfLine - position;
                } else switch (input[position]) {
                case '<':
                    symbolCode = GrammarParser.CODE_LANGLE;
                    matchLength = 1;
                    break;
                case '>':
                    symbolCode = GrammarParser.CODE_RANGLE;
                    matchLength = 1;
                    break;
                case '.':
                    symbolCode = GrammarParser.CODE_DOT;
                    matchLength = 1;
                    break;
                case '|':
                    symbolCode = GrammarParser.CODE_OR;
                    matchLength = 1;
                    break;
                case '=':
                    symbolCode = GrammarParser.CODE_EQUALS;
                    matchLength = 1;
                    break;
                case ':':
                    if ((input[position + 1] == ':') && (input[position + 2] == '=')) {
                        symbolCode = GrammarParser.CODE_DERIVES;
                        matchLength = 3;
                    } else {
                        reportParsingError(string.Format("Unexpected character '{0}' following ':'.", input[position + 1]));
                    }
                    break;
                case '"':
                    symbolCode = GrammarParser.CODE_QUOTED;
                    int endOfQuotes = input.IndexOf('"', position + 1);
                    if (endOfQuotes == -1)
                        matchLength = input.Length - position;
                    else
                        matchLength = (endOfQuotes + 1) - position;
                    break;
                case '(':
                    var match = re_regexOpts.Match(input, position);
                    if (match.Index == position) {
                        symbolCode = GrammarParser.CODE_REGEXOPTS;
                        matchLength = match.Length;
                    } else {
                        reportParsingError(string.Format("\"{0}\" is not a proper regular expression option setter."));
                    }
                    break;
                case '#':
                    endOfLine = input.IndexOf('\n', position + 1);
                    symbolCode = GrammarParser.CODE_SKIP;
                    if (endOfLine == -1)
                        matchLength = input.Length - position;
                    else
                        matchLength = (endOfLine + 1) - position;
                    break;
                case '%':
                    var keyword = re_identifier.Match(input, position + 1);
                    switch (keyword.Value) {
                    case "header":
                        symbolCode = GrammarParser.CODE_HEADER;
                        break;
                    case "lexer":
                        symbolCode = GrammarParser.CODE_LEXER;
                        break;
                    case "null":
                        symbolCode = GrammarParser.CODE_NULL;
                        break;
                    case "parser":
                        symbolCode = GrammarParser.CODE_PARSER;
                        break;
                    case "start":
                        symbolCode = GrammarParser.CODE_START;
                        break;
                    case "type":
                        symbolCode = GrammarParser.CODE_TYPE;
                        break;
                    case "userobject":
                        symbolCode = GrammarParser.CODE_USEROBJECT;
                        break;
                    default:
                        reportParsingError(string.Format("Unknown keyword \"%{0}\".", keyword.Value));
                        break;
                    }
                    matchLength = 1 + keyword.Length;
                    break;
                case '{':
                    int blockDepth = 1;
                    int blockPosition = position + 1;
                    while ((blockDepth > 0) && (blockPosition < input.Length)) {
                        switch (input[blockPosition]) {
                        case '{':
                            blockDepth++;
                            blockPosition++;
                            break;
                        case '}':
                            blockDepth--;
                            blockPosition++;
                            break;
                        case '/':
                            if (input[blockPosition + 1] == '/') {
                                endOfLine = input.IndexOf('\n', blockPosition + 2);
                                if (endOfLine == -1) {
                                    blockPosition = input.Length;
                                } else {
                                    blockPosition = endOfLine + 1;
                                }
                            } else if (input[blockPosition + 1] == '*') {
                                int endOfComment = input.IndexOf("*/", blockPosition + 2);
                                if (endOfComment == -1) {
                                    blockPosition = input.Length;
                                } else {
                                    blockPosition = endOfComment + 2;
                                }
                            } else {
                                blockPosition++;
                            }
                            break;
                        case '\'':
                        case '"':
                            char quote = input[blockPosition];
                            Regex quoteContents = quote == '"' ? re_doubleQuoteContents : re_singleQuoteContents;
                            int literalLength = quoteContents.Match(input, blockPosition + 1).Length;
                            blockPosition += 1 + literalLength + 1;
                            break;
                        default:
                            blockPosition++;
                            break;
                        }
                    }
                    symbolCode = GrammarParser.CODE_CODE;
                    matchLength = blockPosition - position;
                    break;
                default:
                    var whitespace_match = re_whitespace.Match(input, position);
                    if (whitespace_match.Index == position) {
                        symbolCode = GrammarParser.CODE_SKIP;
                        matchLength = whitespace_match.Length;
                    } else {
                        var identifier_match = re_identifier.Match(input, position);
                        if (identifier_match.Index == position) {
                            symbolCode = GrammarParser.CODE_IDENTIFIER;
                            matchLength = identifier_match.Length;
                        } else {
                            reportParsingError(string.Format("Unexpected character '{0}'.", input[position]));
                        }
                    }
                    break;
                }

                Token token = new Token();
                token.LineNumber = lineNumber;
                token.ColumnNumber = columnNumber;
                token.SymbolCode = symbolCode;
                token.Value = input.Substring(position, matchLength);

                //přepočítáme "lidskou" adresu v textu (namatchované řetězce můžou být víceřádkové)
                int lastNewLine = token.Value.LastIndexOf('\n');
                if (lastNewLine == -1)
                {
                    columnNumber += matchLength;
                }
                else
                {
                    columnNumber = token.Value.Length - lastNewLine;
                    foreach (char c in token.Value)
                        if (c == '\n')
                            lineNumber++;
                }

                position += matchLength;
                previousSymbolCode = symbolCode;

                if (token.SymbolCode == -1) {
                    return GetNextToken();
                } else {
                    return token;
                }
            }
            else
            {
                Token finalToken = new Token();
                finalToken.LineNumber = lineNumber;
                finalToken.ColumnNumber = columnNumber;
                //token == $end
                finalToken.SymbolCode = GrammarParser.CODE_END;
                finalToken.Value = "";

                hasTokens = false;

                previousSymbolCode = 0;
                return finalToken;
            }
        }
示例#2
0
文件: Lexer.cs 项目: jirkamarsik/yapg
        /// <summary>
        /// Fetches the next token from the source string.
        /// </summary>
        /// <returns>The next token discovered in the input string.</returns>
        public Token GetNextToken()
        {
            if (position < input.Length)
            {
                Match match = regex.Match(input, position);
                Token token = new Token();
                token.LineNumber = lineNumber;
                token.ColumnNumber = columnNumber;

                //pokud regex nenamatchuje řetězec začínající na aktuální pozici, museli jsme narazit
                //na něco, co není popsáno v definicích tokenů a oznámíme to výjimkou
                if (!match.Success || (match.Index > position))
                {
                    string unexpectedCharacter = null;
                    switch (input[position])
                    {
                        case '\n':
                            unexpectedCharacter = "\\n";
                            break;
                        case '\r':
                            unexpectedCharacter = "\\r";
                            break;
                        case '\t':
                            unexpectedCharacter = "\\t";
                            break;
                        default:
                            unexpectedCharacter = input[position].ToString();
                            break;
                    }
                    throw new ParsingException(string.Format("Unexpected character '{0}' encountered by the lexer.",
                                                             unexpectedCharacter), lineNumber, columnNumber);
                }

                //regex obsahuje capture groupy s názvy __0, __1..., groupy jsou seřazeny tak, že ty s nižším
                //číslem mají přednost (dokumentovaná vlastnost regexího operátoru | )
                //když u nějaké groupy najdeme match, podíváme se do groupSymbolCodes, o jaký terminál se jedná
                for (int i = 0; i < groupSymbolCodes.Count; i++)
                {
                    if (match.Groups["__" + i.ToString()].Success)
                    {
                        token.SymbolCode = groupSymbolCodes[i];
                        token.Value = match.Value;
                        break;
                    }
                }

                position += match.Length;

                //přepočítáme "lidskou" adresu v textu (namatchované řetězce můžou být víceřádkové)
                int lastNewLine = match.Value.LastIndexOf('\n');
                if (lastNewLine == -1)
                {
                    columnNumber += match.Length;
                }
                else
                {
                    columnNumber = match.Length - lastNewLine;
                    foreach (char c in match.Value)
                        if (c == '\n')
                            lineNumber++;
                }

                if (token.SymbolCode == -1)
                    return GetNextToken();
                else
                    return token;
            }
            else
            {
                Token finalToken = new Token();
                finalToken.LineNumber = lineNumber;
                finalToken.ColumnNumber = columnNumber;
                //token == $end
                finalToken.SymbolCode = 0;
                finalToken.Value = "";

                hasTokens = false;

                return finalToken;
            }
        }
示例#3
0
        public static Grammar ParseGrammar(string specificationPath, out IList <string> warningMessages)
        {
#if BOOTSTRAP
            GrammarLexer lexer = new GrammarLexer();
            lexer.SourceString = File.ReadAllText(specificationPath);
            Token token = lexer.GetNextToken();

            //sem si budeme ukládat chyby a warningy; pokud se vyskytne nějaká chyba, čteme dál a
            //až na konci vyhodíme výjimku se všemi chybami a warningami; pokud vše proběhne bez
            //závažnějších chyb, tak seznam warningů pošlem zpátky volajícímu
            List <string> errorMessages = new List <string>();
            warningMessages = new List <string>();

            //the code to be inserted at the start of the generated code
            string headerCode = null;

            //seznam jmen všech symbolů; slouží potom jako převodní tabulka z kódu symbolu na jeho jméno
            List <string> symbolNames = new List <string>();
            //"inverzní tabulka" k symbolNames, která nám pro jméno symbolu řekne jeho kód
            Dictionary <string, int> symbolCodes = new Dictionary <string, int>();

            //seznam regulárních výrazů definujících terminální symboly; netvoříme z nich rovnou výsledný
            //lexerův regex, ale ukládáme si je zvlášť, abychom v případě chyby při kompilaci celkového regexu
            //mohli jednodušše otestovat, které výrazy jsou na vině
            List <string> regexes = new List <string>();
            //pro každý výraz v regexes si pamatujeme pozici, kde jsme ho našli, abychom mohli vydat podrobnější
            //zprávu
            List <int> regexLines   = new List <int>();
            List <int> regexColumns = new List <int>();
            //pro každý výraz v regexes si také pamatujeme kód symbolu, který je popisován oním výrazem,
            //v případě, že výraz má matchovat řetězce, které chceme ignorovat, je v tomto poli hodnota -1;
            //jedná se o runtime data, která pak přímo používá náš lexer
            List <int> groupSymbolCodes = new List <int>();
            //výsledný regulární výraz, pomocí kterého lexer scanuje tokeny; druhá část runtime dat pro náš lexer
            Regex regex = null;

            //jméno pseudoterminálu, jehož tokeny se nemají posílat parseru, ale zahazovat
            string nullTerminal = "";
            //globální optiony .NETímu regex stroji (case insensitive, multiline...)
            string regexOpts = null;

            if (token.SymbolCode == CODE_HEADER)
            {
                token      = lexer.GetNextToken();
                headerCode = token.Value;
                token      = lexer.GetNextToken();
            }

            // skipping LEXER
            token = lexer.GetNextToken();

            if (token.SymbolCode == CODE_NULL)
            {
                token        = lexer.GetNextToken();
                nullTerminal = token.Value;
                token        = lexer.GetNextToken();
            }

            if (token.SymbolCode == CODE_REGEXOPTS)
            {
                regexOpts = token.Value;
                token     = lexer.GetNextToken();
            }

            symbolNames.Add("$end");
            symbolCodes["$end"] = 0;

            while (token.SymbolCode == CODE_IDENTIFIER)
            {
                string symbol = token.Value;
                token = lexer.GetNextToken();
                // skipping EQUALS
                token = lexer.GetNextToken();
                string capturingRegex = token.Value;
                token = lexer.GetNextToken();

                if (symbol == nullTerminal)
                {
                    groupSymbolCodes.Add(-1);
                }
                else
                {
                    if (!symbolCodes.ContainsKey(symbol))
                    {
                        symbolNames.Add(symbol);
                        symbolCodes[symbol] = symbolNames.Count - 1;
                    }
                    groupSymbolCodes.Add(symbolCodes[symbol]);
                }

                regexes.Add(capturingRegex);
                regexLines.Add(token.LineNumber);
                regexColumns.Add(token.ColumnNumber);
            }

            StringBuilder pattern = new StringBuilder();

            if (regexOpts != null)
            {
                pattern.Append(regexOpts);
            }

            for (int i = 0; i < regexes.Count; i++)
            {
                //všechny uživatelovi regulární výrazy oddělíme ořítky a zapíšeme je v pořadí, v jakém nám
                //je zadal (v .NETím Regex enginu mají výrazy v ořítku víc nalevo přednost) a každý výraz
                //strčíme do capture groupy pojmenované __i, kde i je pořadové číslo výrazu, počítáno od 0
                if (i != 0)
                {
                    pattern.Append('|');
                }
                pattern.AppendFormat("(?<{0}>{1})", "__" + i.ToString(), regexes[i]);
            }

            try
            {
                regex = new Regex(pattern.ToString(), RegexOptions.Compiled);
            }
            catch (ArgumentException)
            {
                try
                {
                    new Regex(regexOpts);
                }
                catch (ArgumentException)
                {
                    // FIXME: We no longer have the line and column data on regexOpts.
                    errorMessages.Add(string.Format("{0},{1}: The RegEx options are invalid.", -1, -1));
                }

                for (int i = 0; i < regexes.Count; i++)
                {
                    try
                    {
                        new Regex(regexes[i]);
                    }
                    catch (ArgumentException)
                    {
                        errorMessages.Add(string.Format("{0},{1}: This regular expression is invalid.",
                                                        regexLines[i], regexColumns[i]));
                    }
                }
            }

            int numTerminals = symbolNames.Count;

            // skipping PARSER
            token = lexer.GetNextToken();

            //neterminály, které se objevily na levé straně nějakého pravidla
            HashSet <int> reducibleNonterminals = new HashSet <int>();
            //neterminály, které se objevily na pravé straně nějakého pravidla
            HashSet <int> usedNonterminals = new HashSet <int>();

            bool[] terminalUsed = new bool[numTerminals];

            List <ProductionWithAction> productions = new List <ProductionWithAction>();

            symbolNames.Add("$start");
            symbolCodes["$start"] = numTerminals;
            //semhle dáme <$start> výjimečně, protože ani nechceme,
            //aby ho někdo dával na pravou stranu nějakého pravidla
            usedNonterminals.Add(symbolCodes["$start"]);

            // skipping START
            token = lexer.GetNextToken();
            // skipping LANGLE
            token = lexer.GetNextToken();

            string startSymbol = token.Value;

            if (symbolCodes.ContainsKey(startSymbol) && symbolCodes[startSymbol] < numTerminals)
            {
                errorMessages.Add(string.Format("{0},{1}: The nonterminal <{2}> shares it's name with a terminal symbol.",
                                                token.LineNumber, token.ColumnNumber, startSymbol));
            }

            token = lexer.GetNextToken();

            // skipping RANGLE
            token = lexer.GetNextToken();

            symbolNames.Add(startSymbol);
            symbolCodes[startSymbol] = symbolNames.Count - 1;


            string userObjectType = null;

            if (token.SymbolCode == CODE_USEROBJECT)
            {
                token = lexer.GetNextToken();

                if (token.SymbolCode == CODE_QUOTED)
                {
                    userObjectType = token.Value.Substring(1, token.Value.Length - 2);
                    token          = lexer.GetNextToken();
                }
                else
                {
                    StringBuilder typeBuilder = new StringBuilder(token.Value);
                    token = lexer.GetNextToken();
                    while (token.SymbolCode == CODE_DOT)
                    {
                        typeBuilder.Append(".");
                        token = lexer.GetNextToken();
                        typeBuilder.Append(token.Value);
                        token = lexer.GetNextToken();
                    }

                    userObjectType = typeBuilder.ToString();
                }
            }

            //naše 0. pravidlo, které výstižně popisuje způsob, jakým si gramatiku upravujeme
            productions.Add(new ProductionWithAction(new Production(
                                                         symbolCodes["$start"], new int[] { symbolCodes[startSymbol], symbolCodes["$end"] }), "{ return _1; }"));

            reducibleNonterminals.Add(symbolCodes["$start"]);
            usedNonterminals.Add(symbolCodes[startSymbol]);
            terminalUsed[symbolCodes["$end"]] = true;

            var typeMappings = new Dictionary <string, string>();

            //zpracování pravidel
            while (token.SymbolCode != CODE_END)
            {
                if (token.SymbolCode == CODE_TYPE)
                {
                    token = lexer.GetNextToken();

                    // skipping LANGLE
                    token = lexer.GetNextToken();
                    string nonterminal = token.Value;

                    if (symbolCodes.ContainsKey(nonterminal) && symbolCodes[nonterminal] < numTerminals)
                    {
                        errorMessages.Add(string.Format("{0},{1}: The nonterminal <{2}> shares it's name with a terminal symbol.",
                                                        token.LineNumber, token.ColumnNumber, nonterminal));
                    }

                    if (!symbolCodes.ContainsKey(nonterminal))
                    {
                        symbolNames.Add(nonterminal);
                        symbolCodes[nonterminal] = symbolNames.Count - 1;
                    }

                    token = lexer.GetNextToken();
                    // skipping RANGLE
                    token = lexer.GetNextToken();

                    if (token.SymbolCode == CODE_QUOTED)
                    {
                        // QUOTED
                        typeMappings.Add(nonterminal, token.Value.Substring(1, token.Value.Length - 2));
                        token = lexer.GetNextToken();
                    }
                    else
                    {
                        StringBuilder typeBuilder = new StringBuilder(token.Value);
                        token = lexer.GetNextToken();
                        while (token.SymbolCode == CODE_DOT)
                        {
                            typeBuilder.Append(".");
                            token = lexer.GetNextToken();
                            typeBuilder.Append(token.Value);
                            token = lexer.GetNextToken();
                        }

                        typeMappings.Add(nonterminal, typeBuilder.ToString());
                    }
                }
                else
                {
                    //extrahujeme symbol na levé straně a zpracujeme ho
                    // skipping LANGLE
                    token = lexer.GetNextToken();

                    string lhsSymbol = token.Value;

                    if (symbolCodes.ContainsKey(lhsSymbol) && symbolCodes[lhsSymbol] < numTerminals)
                    {
                        errorMessages.Add(string.Format("{0},{1}: The nonterminal <{2}> shares it's name with a terminal symbol.",
                                                        token.LineNumber, token.ColumnNumber, lhsSymbol));
                    }

                    if (!symbolCodes.ContainsKey(lhsSymbol))
                    {
                        symbolNames.Add(lhsSymbol);
                        symbolCodes[lhsSymbol] = symbolNames.Count - 1;
                    }

                    if (!reducibleNonterminals.Contains(symbolCodes[lhsSymbol]))
                    {
                        reducibleNonterminals.Add(symbolCodes[lhsSymbol]);
                    }

                    token = lexer.GetNextToken();

                    //skipping RANGLE
                    token = lexer.GetNextToken();


                    int lhsSymbolCode = symbolCodes[lhsSymbol];

                    //Zpracujeme výraz na pravé straně, který může sestávat z několika seznamů symbolů oddělenými
                    //ořítky. Každý z těchto seznamů pak tvoří jedno pravidlo bez ořítek.
                    while ((token.SymbolCode == CODE_DERIVES) || (token.SymbolCode == CODE_OR))
                    {
                        token = lexer.GetNextToken();
                        List <int> rhsSymbols = new List <int>();

                        while (token.SymbolCode != CODE_CODE)
                        {
                            int rhsSymbolCode = -1;
                            if (token.SymbolCode == CODE_LANGLE)
                            {
                                //skipping LANGLE
                                token = lexer.GetNextToken();

                                string rhsSymbol = token.Value;

                                if (symbolCodes.ContainsKey(rhsSymbol) && symbolCodes[rhsSymbol] < numTerminals)
                                {
                                    errorMessages.Add(string.Format("{0},{1}: The nonterminal <{2}> shares it's name with a terminal symbol.",
                                                                    token.LineNumber, token.ColumnNumber, rhsSymbol));
                                }

                                if (!symbolCodes.ContainsKey(rhsSymbol))
                                {
                                    symbolNames.Add(rhsSymbol);
                                    symbolCodes[rhsSymbol] = symbolNames.Count - 1;
                                }

                                if (!usedNonterminals.Contains(symbolCodes[rhsSymbol]))
                                {
                                    usedNonterminals.Add(symbolCodes[rhsSymbol]);
                                }

                                token = lexer.GetNextToken();

                                //skipping RANGLE
                                token = lexer.GetNextToken();

                                rhsSymbolCode = symbolCodes[rhsSymbol];
                            }
                            else
                            {
                                string rhsSymbol = token.Value;

                                if (!symbolCodes.ContainsKey(rhsSymbol))
                                {
                                    errorMessages.Add(string.Format("{0},{1}: The terminal '{2}' is used but not defined.",
                                                                    token.LineNumber, token.ColumnNumber, rhsSymbol));
                                }
                                else
                                {
                                    rhsSymbolCode = symbolCodes[rhsSymbol];
                                    terminalUsed[rhsSymbolCode] = true;
                                }

                                token = lexer.GetNextToken();
                            }

                            rhsSymbols.Add(rhsSymbolCode);
                        }

                        string code = token.Value;
                        token = lexer.GetNextToken();

                        productions.Add(new ProductionWithAction(new Production(lhsSymbolCode, rhsSymbols), code));
                    }
                }
            }

            //ToArray voláme proto, aby se líná metoda Intersect vyhodnotila a nedošlo by pak při vykonávání
            //dalšího příkazu k chybě
            int[] theGoodOnes = usedNonterminals.Intersect(reducibleNonterminals).ToArray();
            usedNonterminals.ExceptWith(theGoodOnes);
            reducibleNonterminals.ExceptWith(theGoodOnes);

            foreach (int nonterminal in usedNonterminals)
            {
                warningMessages.Add(string.Format("Warning: The nonterminal <{0}> isn't reducible.",
                                                  symbolNames[nonterminal]));
            }
            foreach (int nonterminal in reducibleNonterminals)
            {
                warningMessages.Add(string.Format("Warning: The nonterminal <{0}> is defined but never used.", symbolNames[nonterminal]));
            }

            for (int terminal = 0; terminal < numTerminals; terminal++)
            {
                if (!terminalUsed[terminal])
                {
                    warningMessages.Add(string.Format("Warning: The terminal '{0}' is defined but never used.", symbolNames[terminal]));
                }
            }


            if (errorMessages.Count > 0)
            {
                throw new InvalidSpecificationException(errorMessages.Concat(warningMessages));
            }


            //už máme vše načte a zkontrolováno, teď už jen setřídíme pravidla podle levé strany,
            //přečíslujeme je a pro každý neterminál dopočítáme indexy, na kterých začínají pravidla
            //s daným neterminálem
            Production[] productionsArray = new Production[productions.Count];
            productionsArray[0] = productions[0].Production;
            string[] actions = new string[productions.Count];
            actions[0] = productions[0].Action;
            IEnumerable <ProductionWithAction> sortedProductions =
                productions.GetRange(1, productions.Count - 1).OrderBy((prod => prod.Production.LHSSymbol));

            int k = 1;
            foreach (ProductionWithAction productionWithAction in sortedProductions)
            {
                productionsArray[k] = productionWithAction.Production;
                productionsArray[k].ProductionCode = k;
                actions[k] = productionWithAction.Action;
                k++;
            }

            int numNonterminals = symbolCodes.Count - numTerminals;

            int[] nonterminalProductionOffset = new int[numNonterminals + 1];

            int offset = 0;
            for (int nonterminal = 0; nonterminal < numNonterminals; nonterminal++)
            {
                nonterminalProductionOffset[nonterminal] = offset;
                while ((offset < productionsArray.Length) &&
                       (productionsArray[offset].LHSSymbol == numTerminals + nonterminal))
                {
                    offset++;
                }
            }
            nonterminalProductionOffset[nonterminalProductionOffset.Length - 1] = offset;

            string[] nonterminalTypes = new string[numNonterminals];
            foreach (var typeMapping in typeMappings)
            {
                nonterminalTypes[symbolCodes[typeMapping.Key] - numTerminals] = typeMapping.Value;
            }

            //a teď už to jen zabalíme a pošleme
            GrammarDefinition grammarDefinition = new GrammarDefinition(symbolNames.ToArray(), productionsArray, nonterminalProductionOffset, numTerminals);
            LexerData         lexerData         = new LexerData(regex, groupSymbolCodes);
            GrammarCode       grammarCode       = new GrammarCode(headerCode, actions, nonterminalTypes, userObjectType);
            Grammar           grammar           = new Grammar(grammarDefinition, lexerData, grammarCode);

            return(grammar);
#else
            LexerData  lexerData;
            ParserData parserData;
            Grammar.ReadRuntimeDataFromStream(
                new MemoryStream(YetAnotherParserGenerator.Properties.Resources.SpecificationGrammar),
                out lexerData, out parserData);

            GrammarLexer lexer  = new GrammarLexer();
            Parser       parser = new Parser(parserData);

            GrammarParserLocals locals = new GrammarParserLocals(specificationPath, out warningMessages);
            lexer.SourceString = File.ReadAllText(specificationPath);
            return((Grammar)parser.Parse(lexer, locals));
#endif
        }