Example #1
0
        public SearchGrammar() : base(false)
        {
            this.GrammarComments =
                "Google-to-SQL full-text query format converter. Based on original project by Michael Coles.\r\n" +
                "http://www.sqlservercentral.com/articles/Full-Text+Search+(2008)/64248/ \r\n" +
                "Slightly revised to work with latest version of Irony. ";

            // Terminals
            var Term       = CreateTerm("Term");
            var Phrase     = new StringLiteral("Phrase", "\"");
            var ImpliedAnd = new ImpliedSymbolTerminal("ImpliedAnd");

            // NonTerminals
            var BinaryExpression        = new NonTerminal("BinaryExpression");
            var BinaryOp                = new NonTerminal("BinaryOp");
            var Expression              = new NonTerminal("Expression");
            var PrimaryExpression       = new NonTerminal("PrimaryExpression");
            var ThesaurusExpression     = new NonTerminal("ThesaurusExpression");
            var ThesaurusOperator       = new NonTerminal("ThesaurusOperator");
            var ExactExpression         = new NonTerminal("ExactExpression");
            var ParenthesizedExpression = new NonTerminal("ParenthesizedExpression");
            var ProximityExpression     = new NonTerminal("ProximityExpression");
            var ProximityList           = new NonTerminal("ProximityList");

            this.Root              = Expression;
            Expression.Rule        = PrimaryExpression | BinaryExpression;
            BinaryExpression.Rule  = Expression + BinaryOp + Expression;
            BinaryOp.Rule          = ImpliedAnd | "and" | "&" | "-" | "or" | "|";
            PrimaryExpression.Rule = Term
                                     | ThesaurusExpression
                                     | ExactExpression
                                     | ParenthesizedExpression
                                     | Phrase
                                     | ProximityExpression;
            ThesaurusExpression.Rule     = "~" + Term;
            ExactExpression.Rule         = "+" + Term | "+" + Phrase;
            ParenthesizedExpression.Rule = "(" + Expression + ")";
            ProximityExpression.Rule     = "<" + ProximityList + ">";
            MakePlusRule(ProximityList, Term);

            MarkTransient(PrimaryExpression, Expression, ProximityExpression, ParenthesizedExpression, BinaryOp);
            MarkPunctuation("<", ">", "(", ")");
            RegisterOperators(10, "or", "|");
            RegisterOperators(20, "and", "&", "-");
            RegisterOperators(20, ImpliedAnd);
            //Register brace pairs to improve error reporting
            RegisterBracePair("(", ")");
            RegisterBracePair("<", ">");
            //Do not report ImpliedAnd as expected symbol - it is not really a symbol
            this.AddToNoReportGroup(ImpliedAnd);
            //also do not report braces as expected
            this.AddToNoReportGroup("(", ")", "<", ">");

            LanguageFlags |= LanguageFlags.CanRunSample;
        }
Example #2
0
        private FtsGrammarHelper() : base(false) // Set to case insensitive
        {
            // ReSharper disable InconsistentNaming

            // Terminals
            var Term       = CreateTerm("Term");
            var Phrase     = new StringLiteral("Phrase", "\"");
            var ImpliedAnd = new ImpliedSymbolTerminal("ImpliedAnd");

            // NonTerminals
            var BinaryExpression        = new NonTerminal("BinaryExpression");
            var BinaryOp                = new NonTerminal("BinaryOp");
            var Expression              = new NonTerminal("Expression");
            var PrimaryExpression       = new NonTerminal("PrimaryExpression");
            var ThesaurusExpression     = new NonTerminal("ThesaurusExpression");
            var ExactExpression         = new NonTerminal("ExactExpression");
            var ParenthesizedExpression = new NonTerminal("ParenthesizedExpression");
            var ProximityExpression     = new NonTerminal("ProximityExpression");
            var ProximityList           = new NonTerminal("ProximityList");

            // ReSharper restore InconsistentNaming

            Root                   = Expression;
            Expression.Rule        = PrimaryExpression | BinaryExpression;
            BinaryExpression.Rule  = Expression + BinaryOp + Expression;
            BinaryOp.Rule          = ImpliedAnd | "and" | "&" | "-" | "or" | "|";
            PrimaryExpression.Rule = Term
                                     | ThesaurusExpression
                                     | ExactExpression
                                     | ParenthesizedExpression
                                     | Phrase
                                     | ProximityExpression;
            ThesaurusExpression.Rule     = "~" + Term;
            ExactExpression.Rule         = "+" + Term | "+" + Phrase;
            ParenthesizedExpression.Rule = "(" + Expression + ")";
            ProximityExpression.Rule     = "<" + ProximityList + ">";
            MakePlusRule(ProximityList, Term);

            MarkTransient(PrimaryExpression, Expression, ProximityExpression, ParenthesizedExpression, BinaryOp);
            MarkPunctuation("<", ">", "(", ")");
            RegisterOperators(10, "or", "|");
            RegisterOperators(20, "and", "&", "-");
            RegisterOperators(20, ImpliedAnd);
            //Register brace pairs to improve error reporting
            RegisterBracePair("(", ")");
            RegisterBracePair("<", ">");
            //Do not report ImpliedAnd as expected symbol - it is not really a symbol
            AddToNoReportGroup(ImpliedAnd);
            //also do not report braces as expected
            AddToNoReportGroup("(", ")", "<", ">");
        }
Example #3
0
        public ExcelFormulaGrammar() : base(false)
        {
            #region 1-Terminals

            #region Symbols and operators
            var comma            = ToTerm(",");
            var colon            = ToTerm(":");
            var semicolon        = ToTerm(";");
            var OpenParen        = ToTerm("(");
            var CloseParen       = ToTerm(")");
            var CloseSquareParen = ToTerm("]");
            var OpenSquareParen  = ToTerm("[");
            var exclamationMark  = ToTerm("!");
            var CloseCurlyParen  = ToTerm("}");
            var OpenCurlyParen   = ToTerm("{");

            var mulop    = ToTerm("*");
            var plusop   = ToTerm("+");
            var divop    = ToTerm("/");
            var minop    = ToTerm("-");
            var concatop = ToTerm("&");
            var expop    = ToTerm("^");
            // Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal
            // Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and it's default priority is low
            var intersectop = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect);

            var percentop = ToTerm("%");

            var gtop  = ToTerm(">");
            var eqop  = ToTerm("=");
            var ltop  = ToTerm("<");
            var neqop = ToTerm("<>");
            var gteop = ToTerm(">=");
            var lteop = ToTerm("<=");
            #endregion

            #region Literals
            var BoolToken = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE");
            BoolToken.Priority = TerminalPriority.Bool;

            var NumberToken = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None);
            NumberToken.DefaultIntTypes = new TypeCode[] { TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt };

            var TextToken = new StringLiteral(GrammarNames.TokenText, "\"", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak);

            var ErrorToken    = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A");
            var RefErrorToken = ToTerm("#REF!", GrammarNames.TokenRefError);
            #endregion

            #region Functions

            var UDFToken = new RegexBasedTerminal(GrammarNames.TokenUDF, "(_xll\\.)?[a-zA-Z0-9_.]+\\(");
            UDFToken.Priority = TerminalPriority.UDF;

            var ExcelRefFunctionToken = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(");
            ExcelRefFunctionToken.Priority = TerminalPriority.ExcelRefFunction;

            var ExcelFunction = new RegexBasedTerminal(GrammarNames.ExcelFunction, "(" + String.Join("|", excelFunctionList) + ")\\(");
            ExcelFunction.Priority = TerminalPriority.ExcelFunction;

            // Using this instead of Empty allows a more accurate trees
            var EmptyArgumentToken = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument);

            #endregion

            #region References and names

            var VRangeToken = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?[A-Z]{1,4}:[$]?[A-Z]{1,4}");
            var HRangeToken = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?[1-9][0-9]*:[$]?[1-9][0-9]*");

            const string CellTokenRegex = "[$]?[A-Z]{1,4}[$]?[1-9][0-9]*";
            var          CellToken      = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex);
            CellToken.Priority = TerminalPriority.CellToken;

            const string NamedRangeRegex = @"[A-Za-z\\_][\w\.]*";
            var          NamedRangeToken = new RegexBasedTerminal(GrammarNames.TokenNamedRange, NamedRangeRegex);
            NamedRangeToken.Priority = TerminalPriority.NamedRange;

            // To prevent e.g. "A1A1" being parsed as 2 celltokens
            var NamedRangeCombinationToken = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, "(TRUE|FALSE|" + CellTokenRegex + ")" + NamedRangeRegex);
            NamedRangeCombinationToken.Priority = TerminalPriority.NamedRangeCombination;

            const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\"";
            const string sheetRegEx          = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!";

            var SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, sheetRegEx);
            SheetToken.Priority = TerminalPriority.SheetToken;

            const string firstSheetName      = "[a-zA-Z0-9]+:";
            var          MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, firstSheetName + sheetRegEx);
            MultipleSheetsToken.Priority = TerminalPriority.MultipleSheetsToken;

            var FileToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, "[0-9]+");
            FileToken.Priority = TerminalPriority.FileToken;;

            var QuotedFileSheetToken = new RegexBasedTerminal(GrammarNames.TokenFileSheetQuoted, @"'\[\d+\]([" + singleQuotedContent + @"]|'')+'!");
            QuotedFileSheetToken.Priority = TerminalPriority.QuotedFileToken;

            var ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+");
            ReservedNameToken.Priority = TerminalPriority.ReservedName;

            var DDEToken = new RegexBasedTerminal(GrammarNames.TokenDDE, @"'([\[\]" + singleQuotedContent + @"]|'')+'");

            #endregion

            #region Punctuation
            MarkPunctuation(exclamationMark);
            MarkPunctuation(OpenParen, CloseParen);
            MarkPunctuation(OpenSquareParen, CloseSquareParen);
            MarkPunctuation(OpenCurlyParen, CloseCurlyParen);
            #endregion
            #endregion

            #region 2-NonTerminals
            // Most nonterminals are first defined here, so they can be used anywhere in the rules
            // Otherwise you can only use nonterminals that have been defined previously

            var Argument            = new NonTerminal(GrammarNames.Argument);
            var Arguments           = new NonTerminal(GrammarNames.Arguments);
            var ArrayColumns        = new NonTerminal(GrammarNames.ArrayColumns);
            var ArrayConstant       = new NonTerminal(GrammarNames.ArrayConstant);
            var ArrayFormula        = new NonTerminal(GrammarNames.ArrayFormula);
            var ArrayRows           = new NonTerminal(GrammarNames.ArrayRows);
            var Bool                = new NonTerminal(GrammarNames.Bool);
            var Cell                = new NonTerminal(GrammarNames.Cell);
            var Constant            = new NonTerminal(GrammarNames.Constant);
            var ConstantArray       = new NonTerminal(GrammarNames.ConstantArray);
            var DynamicDataExchange = new NonTerminal(GrammarNames.DynamicDataExchange);
            var EmptyArgument       = new NonTerminal(GrammarNames.EmptyArgument);
            var Error               = new NonTerminal(GrammarNames.Error);
            var File                = new NonTerminal(GrammarNames.File);
            var Formula             = new NonTerminal(GrammarNames.Formula);
            var FormulaWithEq       = new NonTerminal(GrammarNames.FormulaWithEq);
            var Function            = new NonTerminal(GrammarNames.Function);
            var FunctionCall        = new NonTerminal(GrammarNames.FunctionCall);
            var HRange              = new NonTerminal(GrammarNames.HorizontalRange);
            var InfixOp             = new NonTerminal(GrammarNames.TransientInfixOp);
            var MultipleSheets      = new NonTerminal(GrammarNames.MultipleSheets);
            var NamedRange          = new NonTerminal(GrammarNames.NamedRange);
            var Number              = new NonTerminal(GrammarNames.Number);
            var PostfixOp           = new NonTerminal(GrammarNames.TransientPostfixOp);
            var Prefix              = new NonTerminal(GrammarNames.Prefix);
            var PrefixOp            = new NonTerminal(GrammarNames.TransientPrefixOp);
            var QuotedFileSheet     = new NonTerminal(GrammarNames.QuotedFileSheet);
            var Reference           = new NonTerminal(GrammarNames.Reference);
            var ReferenceFunction   = new NonTerminal(GrammarNames.ReferenceFunction);
            var ReferenceItem       = new NonTerminal(GrammarNames.TransientReferenceItem);
            var RefError            = new NonTerminal(GrammarNames.RefError);
            var ReservedName        = new NonTerminal(GrammarNames.ReservedName);
            var Sheet               = new NonTerminal(GrammarNames.Sheet);
            var Start               = new NonTerminal(GrammarNames.TransientStart);
            var Text                = new NonTerminal(GrammarNames.Text);
            var Union               = new NonTerminal(GrammarNames.Union);
            var VRange              = new NonTerminal(GrammarNames.VerticalRange);
            #endregion


            #region 3-Rules

            #region Base rules
            Root = Start;

            Start.Rule = FormulaWithEq
                         | Formula
                         | ArrayFormula
            ;
            MarkTransient(Start);

            ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen;

            FormulaWithEq.Rule = eqop + Formula;

            Formula.Rule =
                Reference
                | Constant
                | FunctionCall
                | ConstantArray
                | OpenParen + Formula + CloseParen
                | ReservedName
            ;
            //MarkTransient(Formula);

            ReservedName.Rule = ReservedNameToken;

            Constant.Rule = Number
                            | Text
                            | Bool
                            | Error
            ;

            Text.Rule     = TextToken;
            Number.Rule   = NumberToken;
            Bool.Rule     = BoolToken;
            Error.Rule    = ErrorToken;
            RefError.Rule = RefErrorToken;
            #endregion

            #region Functions

            FunctionCall.Rule =
                Function + Arguments + CloseParen
                | PrefixOp + Formula
                | Formula + PostfixOp
                | Formula + InfixOp + Formula
            ;

            Function.Rule = ExcelFunction | UDFToken;

            Arguments.Rule = MakeStarRule(Arguments, comma, Argument);
            //Arguments.Rule = Argument | Argument + comma + Arguments;

            EmptyArgument.Rule = EmptyArgumentToken;
            Argument.Rule      = Formula | EmptyArgument;
            //MarkTransient(Argument);

            PrefixOp.Rule =
                ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop
                | ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop;
            MarkTransient(PrefixOp);

            InfixOp.Rule =
                expop
                | mulop
                | divop
                | plusop
                | minop
                | concatop
                | gtop
                | eqop
                | ltop
                | neqop
                | gteop
                | lteop;
            MarkTransient(InfixOp);

            //PostfixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPostFix) + percentop;
            // ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action
            PostfixOp.Rule = PreferShiftHere() + percentop;
            MarkTransient(PostfixOp);
            #endregion

            #region References

            Reference.Rule = ReferenceItem
                             | Reference + colon + Reference
                             | Reference + intersectop + Reference
                             | OpenParen + Union + CloseParen
                             | OpenParen + Reference + PreferShiftHere() + CloseParen
                             | Prefix + ReferenceItem
                             | Prefix + UDFToken + Arguments + CloseParen
                             | DynamicDataExchange
            ;

            Union.Rule = MakePlusRule(Union, comma, Reference);

            ReferenceItem.Rule =
                Cell
                | NamedRange
                | ReferenceFunction
                | VRange
                | HRange
                | RefError
            ;
            MarkTransient(ReferenceItem);

            VRange.Rule = VRangeToken;
            HRange.Rule = HRangeToken;

            ReferenceFunction.Rule =
                ExcelRefFunctionToken + Arguments + CloseParen;

            QuotedFileSheet.Rule = QuotedFileSheetToken;
            Sheet.Rule           = SheetToken;
            MultipleSheets.Rule  = MultipleSheetsToken;

            Cell.Rule = CellToken;

            File.Rule = OpenSquareParen + FileToken + CloseSquareParen;

            DynamicDataExchange.Rule = File + exclamationMark + DDEToken;

            NamedRange.Rule = NamedRangeToken | NamedRangeCombinationToken;

            Prefix.Rule =
                Sheet
                | File + Sheet
                | File + exclamationMark
                | QuotedFileSheet
                | MultipleSheets
                | File + MultipleSheets;

            #endregion

            #region Arrays
            ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen;

            ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows);
            ArrayRows.Rule    = MakePlusRule(ArrayRows, comma, ArrayConstant);

            ArrayConstant.Rule = Constant | PrefixOp + Number | RefError;
            #endregion

            #endregion

            #region 5-Operator Precedence
            // Some of these operators are neutral associative instead of left associative,
            // but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific
            // structure of the parse tree, we like consistency.
            RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop);
            RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop);
            RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop);
            RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop);
            RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop);
            RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop);
            RegisterOperators(Precedence.Reference, Associativity.Left, intersectop, colon);
            RegisterOperators(Precedence.Reference, Associativity.Left, comma);

            //RegisterOperators(Precedence.ParameterSeparator, comma);

            #endregion
        }