public SearchGrammar() : base(false) { this.GrammarComments = "Google-to-SQL full-text query format converter. Based on original project by Michael Coles.\r\n" + "http://www.sqlservercentral.com/articles/Full-Text+Search+(2008)/64248/ \r\n" + "Slightly revised to work with latest version of Irony. "; // Terminals var Term = CreateTerm("Term"); var Phrase = new StringLiteral("Phrase", "\""); var ImpliedAnd = new ImpliedSymbolTerminal("ImpliedAnd"); // NonTerminals var BinaryExpression = new NonTerminal("BinaryExpression"); var BinaryOp = new NonTerminal("BinaryOp"); var Expression = new NonTerminal("Expression"); var PrimaryExpression = new NonTerminal("PrimaryExpression"); var ThesaurusExpression = new NonTerminal("ThesaurusExpression"); var ThesaurusOperator = new NonTerminal("ThesaurusOperator"); var ExactExpression = new NonTerminal("ExactExpression"); var ParenthesizedExpression = new NonTerminal("ParenthesizedExpression"); var ProximityExpression = new NonTerminal("ProximityExpression"); var ProximityList = new NonTerminal("ProximityList"); this.Root = Expression; Expression.Rule = PrimaryExpression | BinaryExpression; BinaryExpression.Rule = Expression + BinaryOp + Expression; BinaryOp.Rule = ImpliedAnd | "and" | "&" | "-" | "or" | "|"; PrimaryExpression.Rule = Term | ThesaurusExpression | ExactExpression | ParenthesizedExpression | Phrase | ProximityExpression; ThesaurusExpression.Rule = "~" + Term; ExactExpression.Rule = "+" + Term | "+" + Phrase; ParenthesizedExpression.Rule = "(" + Expression + ")"; ProximityExpression.Rule = "<" + ProximityList + ">"; MakePlusRule(ProximityList, Term); MarkTransient(PrimaryExpression, Expression, ProximityExpression, ParenthesizedExpression, BinaryOp); MarkPunctuation("<", ">", "(", ")"); RegisterOperators(10, "or", "|"); RegisterOperators(20, "and", "&", "-"); RegisterOperators(20, ImpliedAnd); //Register brace pairs to improve error reporting RegisterBracePair("(", ")"); RegisterBracePair("<", ">"); //Do not report ImpliedAnd as expected symbol - it is not really a symbol this.AddToNoReportGroup(ImpliedAnd); //also do not report braces as expected this.AddToNoReportGroup("(", ")", "<", ">"); LanguageFlags |= LanguageFlags.CanRunSample; }
public ExcelFormulaGrammar() : base(false) { #region 1-Terminals #region Symbols and operators var comma = ToTerm(","); var colon = ToTerm(":"); var semicolon = ToTerm(";"); var OpenParen = ToTerm("("); var CloseParen = ToTerm(")"); var CloseSquareParen = ToTerm("]"); var OpenSquareParen = ToTerm("["); var exclamationMark = ToTerm("!"); var CloseCurlyParen = ToTerm("}"); var OpenCurlyParen = ToTerm("{"); var mulop = ToTerm("*"); var plusop = ToTerm("+"); var divop = ToTerm("/"); var minop = ToTerm("-"); var concatop = ToTerm("&"); var expop = ToTerm("^"); // Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal // Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and it's default priority is low var intersectop = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect); var percentop = ToTerm("%"); var gtop = ToTerm(">"); var eqop = ToTerm("="); var ltop = ToTerm("<"); var neqop = ToTerm("<>"); var gteop = ToTerm(">="); var lteop = ToTerm("<="); #endregion #region Literals var BoolToken = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE"); BoolToken.Priority = TerminalPriority.Bool; var NumberToken = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None); NumberToken.DefaultIntTypes = new TypeCode[] { TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt }; var TextToken = new StringLiteral(GrammarNames.TokenText, "\"", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak); var ErrorToken = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A"); var RefErrorToken = ToTerm("#REF!", GrammarNames.TokenRefError); #endregion #region Functions var UDFToken = new RegexBasedTerminal(GrammarNames.TokenUDF, "(_xll\\.)?[a-zA-Z0-9_.]+\\("); UDFToken.Priority = TerminalPriority.UDF; var ExcelRefFunctionToken = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\("); ExcelRefFunctionToken.Priority = TerminalPriority.ExcelRefFunction; var ExcelConditionalRefFunctionToken = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\("); ExcelConditionalRefFunctionToken.Priority = TerminalPriority.ExcelRefFunction; var ExcelFunction = new RegexBasedTerminal(GrammarNames.ExcelFunction, "(" + String.Join("|", excelFunctionList) +")\\("); ExcelFunction.Priority = TerminalPriority.ExcelFunction; // Using this instead of Empty allows a more accurate trees var EmptyArgumentToken = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument); #endregion #region References and names var VRangeToken = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?[A-Z]{1,4}:[$]?[A-Z]{1,4}"); var HRangeToken = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?[1-9][0-9]*:[$]?[1-9][0-9]*"); const string CellTokenRegex = "[$]?[A-Z]{1,4}[$]?[1-9][0-9]*"; var CellToken = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex); CellToken.Priority = TerminalPriority.CellToken; const string NamedRangeRegex = @"[A-Za-z\\_][\w\.]*"; var NamedRangeToken = new RegexBasedTerminal(GrammarNames.TokenNamedRange, NamedRangeRegex); NamedRangeToken.Priority = TerminalPriority.NamedRange; // To prevent e.g. "A1A1" being parsed as 2 celltokens var NamedRangeCombinationToken = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, "(TRUE|FALSE|" + CellTokenRegex + ")" + NamedRangeRegex); NamedRangeCombinationToken.Priority = TerminalPriority.NamedRangeCombination; const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\""; const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!"; var SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, sheetRegEx); SheetToken.Priority = TerminalPriority.SheetToken; const string firstSheetName = "[a-zA-Z0-9]+:"; var MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, firstSheetName + sheetRegEx); MultipleSheetsToken.Priority = TerminalPriority.MultipleSheetsToken; var FileToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, "[0-9]+"); FileToken.Priority = TerminalPriority.FileToken;; var QuotedFileSheetToken = new RegexBasedTerminal(GrammarNames.TokenFileSheetQuoted, @"'\[\d+\]([" + singleQuotedContent + @"]|'')+'!"); QuotedFileSheetToken.Priority = TerminalPriority.QuotedFileToken; var ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+"); ReservedNameToken.Priority = TerminalPriority.ReservedName; var DDEToken = new RegexBasedTerminal(GrammarNames.TokenDDE, @"'([\[\]" + singleQuotedContent + @"]|'')+'"); #endregion #region Punctuation MarkPunctuation(exclamationMark); MarkPunctuation(OpenParen, CloseParen); MarkPunctuation(OpenSquareParen, CloseSquareParen); MarkPunctuation(OpenCurlyParen, CloseCurlyParen); #endregion #endregion #region 2-NonTerminals // Most nonterminals are first defined here, so they can be used anywhere in the rules // Otherwise you can only use nonterminals that have been defined previously var Argument = new NonTerminal(GrammarNames.Argument); var Arguments = new NonTerminal(GrammarNames.Arguments); var ArrayColumns = new NonTerminal(GrammarNames.ArrayColumns); var ArrayConstant = new NonTerminal(GrammarNames.ArrayConstant); var ArrayFormula = new NonTerminal(GrammarNames.ArrayFormula); var ArrayRows = new NonTerminal(GrammarNames.ArrayRows); var Bool = new NonTerminal(GrammarNames.Bool); var Cell = new NonTerminal(GrammarNames.Cell); var Constant = new NonTerminal(GrammarNames.Constant); var ConstantArray = new NonTerminal(GrammarNames.ConstantArray); var DynamicDataExchange = new NonTerminal(GrammarNames.DynamicDataExchange); var EmptyArgument = new NonTerminal(GrammarNames.EmptyArgument); var Error = new NonTerminal(GrammarNames.Error); var File = new NonTerminal(GrammarNames.File); var Formula = new NonTerminal(GrammarNames.Formula); var FormulaWithEq = new NonTerminal(GrammarNames.FormulaWithEq); var FunctionCall = new NonTerminal(GrammarNames.FunctionCall); var FunctionName = new NonTerminal(GrammarNames.FunctionName); var HRange = new NonTerminal(GrammarNames.HorizontalRange); var InfixOp = new NonTerminal(GrammarNames.TransientInfixOp); var MultipleSheets = new NonTerminal(GrammarNames.MultipleSheets); var NamedRange = new NonTerminal(GrammarNames.NamedRange); var Number = new NonTerminal(GrammarNames.Number); var PostfixOp = new NonTerminal(GrammarNames.TransientPostfixOp); var Prefix = new NonTerminal(GrammarNames.Prefix); var PrefixOp = new NonTerminal(GrammarNames.TransientPrefixOp); var QuotedFileSheet = new NonTerminal(GrammarNames.QuotedFileSheet); var Reference = new NonTerminal(GrammarNames.Reference); //var ReferenceFunction = new NonTerminal(GrammarNames.ReferenceFunction); var ReferenceItem = new NonTerminal(GrammarNames.TransientReferenceItem); var ReferenceFunctionCall = new NonTerminal(GrammarNames.ReferenceFunctionCall); var RefError = new NonTerminal(GrammarNames.RefError); var RefFunctionName = new NonTerminal(GrammarNames.RefFunctionName); var ReservedName = new NonTerminal(GrammarNames.ReservedName); var Sheet = new NonTerminal(GrammarNames.Sheet); var Start = new NonTerminal(GrammarNames.TransientStart); var Text = new NonTerminal(GrammarNames.Text); var UDFName = new NonTerminal(GrammarNames.UDFName); var UDFunctionCall = new NonTerminal(GrammarNames.UDFunctionCall); var Union = new NonTerminal(GrammarNames.Union); var VRange = new NonTerminal(GrammarNames.VerticalRange); #endregion #region 3-Rules #region Base rules Root = Start; Start.Rule = FormulaWithEq | Formula | ArrayFormula ; MarkTransient(Start); ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen; FormulaWithEq.Rule = eqop + Formula; Formula.Rule = Reference | Constant | FunctionCall | ConstantArray | OpenParen + Formula + CloseParen | ReservedName ; //MarkTransient(Formula); ReservedName.Rule = ReservedNameToken; Constant.Rule = Number | Text | Bool | Error ; Text.Rule = TextToken; Number.Rule = NumberToken; Bool.Rule = BoolToken; Error.Rule = ErrorToken; RefError.Rule = RefErrorToken; #endregion #region Functions FunctionCall.Rule = FunctionName + Arguments + CloseParen | PrefixOp + Formula | Formula + PostfixOp | Formula + InfixOp + Formula ; FunctionName.Rule = ExcelFunction; Arguments.Rule = MakeStarRule(Arguments, comma, Argument); //Arguments.Rule = Argument | Argument + comma + Arguments; EmptyArgument.Rule = EmptyArgumentToken; Argument.Rule = Formula | EmptyArgument; //MarkTransient(Argument); PrefixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop; MarkTransient(PrefixOp); InfixOp.Rule = expop | mulop | divop | plusop | minop | concatop | gtop | eqop | ltop | neqop | gteop | lteop; MarkTransient(InfixOp); //PostfixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPostFix) + percentop; // ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action PostfixOp.Rule = PreferShiftHere() + percentop; MarkTransient(PostfixOp); #endregion #region References Reference.Rule = ReferenceItem | ReferenceFunctionCall | OpenParen + Reference + PreferShiftHere() + CloseParen | Prefix + ReferenceItem | DynamicDataExchange ; ReferenceFunctionCall.Rule = Reference + colon + Reference | Reference + intersectop + Reference | OpenParen + Union + CloseParen | RefFunctionName + Arguments + CloseParen //| ConditionalRefFunctionName + Arguments + CloseParen ; RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken; Union.Rule = MakePlusRule(Union, comma, Reference); ReferenceItem.Rule = Cell | NamedRange | VRange | HRange | RefError | UDFunctionCall ; MarkTransient(ReferenceItem); UDFunctionCall.Rule = UDFName + Arguments + CloseParen; UDFName.Rule = UDFToken; VRange.Rule = VRangeToken; HRange.Rule = HRangeToken; //ConditionalRefFunctionName.Rule = ExcelConditionalRefFunctionToken; QuotedFileSheet.Rule = QuotedFileSheetToken; Sheet.Rule = SheetToken; MultipleSheets.Rule = MultipleSheetsToken; Cell.Rule = CellToken; File.Rule = OpenSquareParen + FileToken + CloseSquareParen; DynamicDataExchange.Rule = File + exclamationMark + DDEToken; NamedRange.Rule = NamedRangeToken | NamedRangeCombinationToken; Prefix.Rule = Sheet | File + Sheet | File + exclamationMark | QuotedFileSheet | MultipleSheets | File + MultipleSheets; #endregion #region Arrays ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen; ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows); ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant); ArrayConstant.Rule = Constant | PrefixOp + Number | RefError; #endregion #endregion #region 5-Operator Precedence // Some of these operators are neutral associative instead of left associative, // but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific // structure of the parse tree, we like consistency. RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop); RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop); RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop); RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop); RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop); RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop); RegisterOperators(Precedence.Union, Associativity.Left, comma); RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop); RegisterOperators(Precedence.Range, Associativity.Left, colon); //RegisterOperators(Precedence.ParameterSeparator, comma); #endregion }