Inheritance: Irony.Parsing.Terminal
Example #1
0
        public SearchGrammar()
            : base(false)
        {
            this.GrammarComments =
            "Google-to-SQL full-text query format converter. Based on original project by Michael Coles.\r\n" +
            "http://www.sqlservercentral.com/articles/Full-Text+Search+(2008)/64248/ \r\n" +
            "Slightly revised to work with latest version of Irony. ";

              // Terminals
              var Term = CreateTerm("Term");
              var Phrase = new StringLiteral("Phrase", "\"");
              var ImpliedAnd = new ImpliedSymbolTerminal("ImpliedAnd");

              // NonTerminals
              var BinaryExpression = new NonTerminal("BinaryExpression");
              var BinaryOp = new NonTerminal("BinaryOp");
              var Expression = new NonTerminal("Expression");
              var PrimaryExpression = new NonTerminal("PrimaryExpression");
              var ThesaurusExpression = new NonTerminal("ThesaurusExpression");
              var ThesaurusOperator = new NonTerminal("ThesaurusOperator");
              var ExactExpression = new NonTerminal("ExactExpression");
              var ParenthesizedExpression = new NonTerminal("ParenthesizedExpression");
              var ProximityExpression = new NonTerminal("ProximityExpression");
              var ProximityList = new NonTerminal("ProximityList");

              this.Root = Expression;
              Expression.Rule = PrimaryExpression | BinaryExpression;
              BinaryExpression.Rule = Expression + BinaryOp + Expression;
              BinaryOp.Rule = ImpliedAnd | "and" | "&" | "-" | "or" | "|";
              PrimaryExpression.Rule = Term
                                 | ThesaurusExpression
                                 | ExactExpression
                                 | ParenthesizedExpression
                                 | Phrase
                                 | ProximityExpression;
              ThesaurusExpression.Rule = "~" + Term;
              ExactExpression.Rule = "+" + Term | "+" + Phrase;
              ParenthesizedExpression.Rule = "(" + Expression + ")";
              ProximityExpression.Rule = "<" + ProximityList + ">";
              MakePlusRule(ProximityList, Term);

              MarkTransient(PrimaryExpression, Expression, ProximityExpression, ParenthesizedExpression, BinaryOp);
              MarkPunctuation("<", ">", "(", ")");
              RegisterOperators(10, "or", "|");
              RegisterOperators(20, "and", "&", "-");
              RegisterOperators(20, ImpliedAnd);
              //Register brace pairs to improve error reporting
              RegisterBracePair("(", ")");
              RegisterBracePair("<", ">");
              //Do not report ImpliedAnd as expected symbol - it is not really a symbol
              this.AddToNoReportGroup(ImpliedAnd);
              //also do not report braces as expected
              this.AddToNoReportGroup("(", ")", "<", ">");

              LanguageFlags |= LanguageFlags.CanRunSample;
        }
        public ExcelFormulaGrammar() : base(false)
        {
            #region 1-Terminals

            #region Symbols and operators
            var comma = ToTerm(",");
            var colon = ToTerm(":");
            var semicolon = ToTerm(";");
            var OpenParen = ToTerm("(");
            var CloseParen = ToTerm(")");
            var CloseSquareParen = ToTerm("]");
            var OpenSquareParen = ToTerm("[");
            var exclamationMark = ToTerm("!");
            var CloseCurlyParen = ToTerm("}");
            var OpenCurlyParen = ToTerm("{");

            var mulop = ToTerm("*");
            var plusop = ToTerm("+");
            var divop = ToTerm("/");
            var minop = ToTerm("-");
            var concatop = ToTerm("&");
            var expop = ToTerm("^");
            // Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal
            // Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and it's default priority is low
            var intersectop = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect);

            var percentop = ToTerm("%");

            var gtop = ToTerm(">");
            var eqop = ToTerm("=");
            var ltop = ToTerm("<");
            var neqop = ToTerm("<>");
            var gteop = ToTerm(">=");
            var lteop = ToTerm("<=");
            #endregion

            #region Literals
            var BoolToken = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE");
            BoolToken.Priority = TerminalPriority.Bool;

            var NumberToken = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None);
            NumberToken.DefaultIntTypes = new TypeCode[] { TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt };

            var TextToken = new StringLiteral(GrammarNames.TokenText, "\"", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak);

            var ErrorToken = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A");
            var RefErrorToken = ToTerm("#REF!", GrammarNames.TokenRefError);
            #endregion

            #region Functions

            var UDFToken = new RegexBasedTerminal(GrammarNames.TokenUDF, "(_xll\\.)?[a-zA-Z0-9_.]+\\(");
            UDFToken.Priority = TerminalPriority.UDF;

            var ExcelRefFunctionToken = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(");
            ExcelRefFunctionToken.Priority = TerminalPriority.ExcelRefFunction;
            
            var ExcelConditionalRefFunctionToken = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(");
            ExcelConditionalRefFunctionToken.Priority = TerminalPriority.ExcelRefFunction;

            var ExcelFunction = new RegexBasedTerminal(GrammarNames.ExcelFunction, "(" + String.Join("|", excelFunctionList)  +")\\(");
            ExcelFunction.Priority = TerminalPriority.ExcelFunction;

            // Using this instead of Empty allows a more accurate trees
            var EmptyArgumentToken = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument);

            #endregion

            #region References and names

            var VRangeToken = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?[A-Z]{1,4}:[$]?[A-Z]{1,4}");
            var HRangeToken = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?[1-9][0-9]*:[$]?[1-9][0-9]*");
            
            const string CellTokenRegex = "[$]?[A-Z]{1,4}[$]?[1-9][0-9]*";
            var CellToken = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex);
            CellToken.Priority = TerminalPriority.CellToken;

            const string NamedRangeRegex = @"[A-Za-z\\_][\w\.]*";
            var NamedRangeToken = new RegexBasedTerminal(GrammarNames.TokenNamedRange, NamedRangeRegex);
            NamedRangeToken.Priority = TerminalPriority.NamedRange;

            // To prevent e.g. "A1A1" being parsed as 2 celltokens
            var NamedRangeCombinationToken = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, "(TRUE|FALSE|" + CellTokenRegex + ")" + NamedRangeRegex);
            NamedRangeCombinationToken.Priority = TerminalPriority.NamedRangeCombination;

            const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\"";
            const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!";

            var SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, sheetRegEx);
            SheetToken.Priority = TerminalPriority.SheetToken;

            const string firstSheetName = "[a-zA-Z0-9]+:";
            var MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, firstSheetName + sheetRegEx);
            MultipleSheetsToken.Priority = TerminalPriority.MultipleSheetsToken;

            var FileToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, "[0-9]+");
            FileToken.Priority = TerminalPriority.FileToken;;

            var QuotedFileSheetToken = new RegexBasedTerminal(GrammarNames.TokenFileSheetQuoted, @"'\[\d+\]([" + singleQuotedContent + @"]|'')+'!");
            QuotedFileSheetToken.Priority = TerminalPriority.QuotedFileToken;

            var ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+");
            ReservedNameToken.Priority = TerminalPriority.ReservedName;

            var DDEToken = new RegexBasedTerminal(GrammarNames.TokenDDE, @"'([\[\]" + singleQuotedContent + @"]|'')+'");

            #endregion

            #region Punctuation
            MarkPunctuation(exclamationMark);
            MarkPunctuation(OpenParen, CloseParen);
            MarkPunctuation(OpenSquareParen, CloseSquareParen);
            MarkPunctuation(OpenCurlyParen, CloseCurlyParen);
            #endregion
            #endregion

            #region 2-NonTerminals
            // Most nonterminals are first defined here, so they can be used anywhere in the rules
            // Otherwise you can only use nonterminals that have been defined previously

            var Argument = new NonTerminal(GrammarNames.Argument);
            var Arguments = new NonTerminal(GrammarNames.Arguments);
            var ArrayColumns = new NonTerminal(GrammarNames.ArrayColumns);
            var ArrayConstant = new NonTerminal(GrammarNames.ArrayConstant);
            var ArrayFormula = new NonTerminal(GrammarNames.ArrayFormula);
            var ArrayRows = new NonTerminal(GrammarNames.ArrayRows);
            var Bool = new NonTerminal(GrammarNames.Bool);
            var Cell = new NonTerminal(GrammarNames.Cell);
            var Constant = new NonTerminal(GrammarNames.Constant);
            var ConstantArray = new NonTerminal(GrammarNames.ConstantArray);
            var DynamicDataExchange = new NonTerminal(GrammarNames.DynamicDataExchange);
            var EmptyArgument = new NonTerminal(GrammarNames.EmptyArgument);
            var Error = new NonTerminal(GrammarNames.Error);
            var File = new NonTerminal(GrammarNames.File);
            var Formula = new NonTerminal(GrammarNames.Formula);
            var FormulaWithEq = new NonTerminal(GrammarNames.FormulaWithEq);
            var FunctionCall = new NonTerminal(GrammarNames.FunctionCall);
            var FunctionName = new NonTerminal(GrammarNames.FunctionName);
            var HRange = new NonTerminal(GrammarNames.HorizontalRange);
            var InfixOp = new NonTerminal(GrammarNames.TransientInfixOp);
            var MultipleSheets = new NonTerminal(GrammarNames.MultipleSheets);
            var NamedRange = new NonTerminal(GrammarNames.NamedRange);
            var Number = new NonTerminal(GrammarNames.Number);
            var PostfixOp = new NonTerminal(GrammarNames.TransientPostfixOp);
            var Prefix = new NonTerminal(GrammarNames.Prefix);
            var PrefixOp = new NonTerminal(GrammarNames.TransientPrefixOp);
            var QuotedFileSheet = new NonTerminal(GrammarNames.QuotedFileSheet);
            var Reference = new NonTerminal(GrammarNames.Reference);
            //var ReferenceFunction = new NonTerminal(GrammarNames.ReferenceFunction);
            var ReferenceItem = new NonTerminal(GrammarNames.TransientReferenceItem);
            var ReferenceFunctionCall = new NonTerminal(GrammarNames.ReferenceFunctionCall);
            var RefError = new NonTerminal(GrammarNames.RefError);
            var RefFunctionName = new NonTerminal(GrammarNames.RefFunctionName);
            var ReservedName = new NonTerminal(GrammarNames.ReservedName);
            var Sheet = new NonTerminal(GrammarNames.Sheet);
            var Start = new NonTerminal(GrammarNames.TransientStart);
            var Text = new NonTerminal(GrammarNames.Text);
            var UDFName = new NonTerminal(GrammarNames.UDFName);
            var UDFunctionCall = new NonTerminal(GrammarNames.UDFunctionCall);
            var Union = new NonTerminal(GrammarNames.Union);
            var VRange = new NonTerminal(GrammarNames.VerticalRange);
            #endregion


            #region 3-Rules

            #region Base rules
            Root = Start;

            Start.Rule = FormulaWithEq
                         | Formula
                         | ArrayFormula
                         ;
            MarkTransient(Start);

            ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen;

            FormulaWithEq.Rule = eqop + Formula;

            Formula.Rule =
                Reference
                | Constant
                | FunctionCall
                | ConstantArray
                | OpenParen + Formula + CloseParen
                | ReservedName
                ;
            //MarkTransient(Formula);

            ReservedName.Rule = ReservedNameToken;

            Constant.Rule = Number
                            | Text
                            | Bool
                            | Error
                            ;

            Text.Rule = TextToken;
            Number.Rule = NumberToken;
            Bool.Rule = BoolToken;
            Error.Rule = ErrorToken;
            RefError.Rule = RefErrorToken;
            #endregion

            #region Functions

            FunctionCall.Rule =
                  FunctionName + Arguments + CloseParen
                | PrefixOp + Formula
                | Formula + PostfixOp
                | Formula + InfixOp + Formula
                ;
                
            FunctionName.Rule = ExcelFunction;

            Arguments.Rule = MakeStarRule(Arguments, comma, Argument);
            //Arguments.Rule = Argument | Argument + comma + Arguments;

            EmptyArgument.Rule = EmptyArgumentToken;
            Argument.Rule = Formula | EmptyArgument;
            //MarkTransient(Argument);

            PrefixOp.Rule =
                ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop
                | ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop;
            MarkTransient(PrefixOp);

            InfixOp.Rule =
                  expop
                | mulop
                | divop
                | plusop
                | minop
                | concatop
                | gtop
                | eqop
                | ltop
                | neqop
                | gteop
                | lteop;
            MarkTransient(InfixOp);

            //PostfixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPostFix) + percentop;
            // ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action
            PostfixOp.Rule = PreferShiftHere() + percentop;
            MarkTransient(PostfixOp);
            #endregion

            #region References

            Reference.Rule = ReferenceItem
                | ReferenceFunctionCall
                | OpenParen + Reference + PreferShiftHere() + CloseParen
                | Prefix + ReferenceItem
                | DynamicDataExchange
                ;

            ReferenceFunctionCall.Rule =
                  Reference + colon + Reference
                | Reference + intersectop + Reference
                | OpenParen + Union + CloseParen
                | RefFunctionName + Arguments + CloseParen
                //| ConditionalRefFunctionName + Arguments + CloseParen
                ;

            RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken;

            Union.Rule = MakePlusRule(Union, comma, Reference);

            ReferenceItem.Rule =
                Cell
                | NamedRange
                | VRange
                | HRange
                | RefError
                | UDFunctionCall
                ;
            MarkTransient(ReferenceItem);

            UDFunctionCall.Rule = UDFName + Arguments + CloseParen;
            UDFName.Rule = UDFToken;

            VRange.Rule = VRangeToken;
            HRange.Rule = HRangeToken;
            
            //ConditionalRefFunctionName.Rule = ExcelConditionalRefFunctionToken;

            QuotedFileSheet.Rule = QuotedFileSheetToken;
            Sheet.Rule = SheetToken;
            MultipleSheets.Rule = MultipleSheetsToken;

            Cell.Rule = CellToken;

            File.Rule = OpenSquareParen + FileToken + CloseSquareParen;

            DynamicDataExchange.Rule = File + exclamationMark + DDEToken;

            NamedRange.Rule = NamedRangeToken | NamedRangeCombinationToken;

            Prefix.Rule =
                Sheet
                | File + Sheet
                | File + exclamationMark
                | QuotedFileSheet
                | MultipleSheets
                | File + MultipleSheets;

            #endregion

            #region Arrays
            ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen;

            ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows);
            ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant);

            ArrayConstant.Rule = Constant | PrefixOp + Number | RefError;
            #endregion

            #endregion

            #region 5-Operator Precedence            
            // Some of these operators are neutral associative instead of left associative,
            // but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific
            // structure of the parse tree, we like consistency.
            RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop);
            RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop);
            RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop);
            RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop);
            RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop);
            RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop);
            RegisterOperators(Precedence.Union, Associativity.Left, comma);
            RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop);
            RegisterOperators(Precedence.Range, Associativity.Left, colon);

            //RegisterOperators(Precedence.ParameterSeparator, comma);

            #endregion
        }