/// <summary> /// Used to add new sub-rules to SeqRule or ChildRule rules. /// </summary> /// <param name="r"></param> public void AddRule(Rule r) { // If this assertion fails, there is a good chance that it is because // you are referring to a rule that hasn't been initialized yet. Trace.Assert(r != null); rules.Add(r); }
/// Rule operators are functions that take rules as arguments and produce new rules #region Rule operators /// <summary> /// Creates a new rule that attempts to match Rule x, but will always return true. /// It is equivalent to the expression "x | Nothing()" and is represented by the unary operator "?". /// </summary> /// <param name="x"></param> /// <returns></returns> public static Rule Opt(Rule x) { return(new OptRule(x)); }
/// <summary> /// Creates a rule wrapper that will throw an exception if the underlying rule /// fails. /// </summary> /// <param name="x"></param> /// <returns></returns> public static Rule NoFail(Rule x) { return(new NoFailRule(x)); }
/// <summary> /// Creates a rule that matches a pair of rules, consuming all nested pairs within /// as well. /// </summary> /// <param name="begin"></param> /// <param name="end"></param> /// <returns></returns> public Rule Nested(Rule begin, Rule end) { RecursiveRule recursive = new RecursiveRule(() => { return Nested(begin, end); }); return begin + NoFailSeq(Star(recursive | Not(end) + Not(begin) + Anything()) + end); }
/// <summary> /// Constructor: initializes the public rule fields. /// </summary> public CppBaseGrammar() { #region identifiers digit = CharRange('0', '9'); lower_case_letter = CharRange('a', 'z'); upper_case_letter = CharRange('A', 'Z'); letter = lower_case_letter | upper_case_letter; ident_first_char = CharSet("_") | letter; ident_next_char = ident_first_char | digit; identifier_extension = CharSeq("::") + Recursive(() => identifier); identifier = Leaf(ident_first_char + Star(ident_next_char) + Star(identifier_extension)); #endregion #region numbers octal_digit = CharRange('0', '7'); nonzero_digit = CharRange('1', '9'); hex_digit = digit | CharRange('a', 'f') | CharRange('A', 'F'); sign = CharSet("+-"); #endregion numbers #region whitespace tab = CharSeq("\t"); space = CharSeq(" "); simple_ws = tab | space; eol = Opt(CharSeq("\r")) + CharSeq("\n"); ext_line = CharSeq("\\") + Star(simple_ws) + eol; multiline_ws = simple_ws | eol; until_eol = Star(ext_line | AnythingBut(eol)); line_comment_content = until_eol; line_comment = CharSeq("//") + NoFailSeq(line_comment_content + eol); full_comment_content = Until(CharSeq("*/")); full_comment = CharSeq("/*") + NoFailSeq(full_comment_content + CharSeq("*/")); comment = line_comment | full_comment; ws = Eat(multiline_ws | comment); #endregion #region keyword rules COND = Word("?"); DOT = Word("."); COLON = Word(":"); AMP = Word("&"); PLUS = Word("+"); MINUS = Word("-"); STAR = Word("*"); SLASH = Word("/"); MOD = Word("%"); NOT = Word("!"); TILDE = Word("~"); CARET = Word("^"); PIPE = Word("|"); EQ = Word("="); COMMA = Word(","); SIZEOF = Word("sizeof"); PTR_OP = Word("->"); INC_OP = Word("++"); DEC_OP = Word("--"); LEFT_OP = Word("<<"); RIGHT_OP = Word(">>"); LT_OP = Word("<"); GT_OP = Word(">"); LE_OP = Word("<="); GE_OP = Word(">="); EQ_OP = Word("=="); NE_OP = Word("!="); AND_OP = Word("&&"); OR_OP = Word("||"); MUL_ASSIGN = Word("*="); DIV_ASSIGN = Word("/="); MOD_ASSIGN = Word("%="); ADD_ASSIGN = Word("+="); SUB_ASSIGN = Word("-="); LEFT_ASSIGN = Word("<<="); RIGHT_ASSIGN = Word(">>="); AND_ASSIGN = Word("&="); XOR_ASSIGN = Word("^="); OR_ASSIGN = Word("|="); TYPEDEF = Word("typedef"); EXTERN = Word("extern"); STATIC = Word("static"); AUTO = Word("auto"); REGISTER = Word("register"); CHAR = Word("char"); SHORT = Word("short"); INT = Word("int"); LONG = Word("long"); SIGNED = Word("signed"); UNSIGNED = Word("unsigned"); FLOAT = Word("float"); DOUBLE = Word("double"); CONST = Word("const"); VOLATILE = Word("volatile"); VOID = Word("void"); STRUCT = Word("struct"); UNION = Word("union"); ENUM = Word("enum"); ELLIPSIS = Word("..."); CASE = Word("case"); DEFAULT = Word("default"); IF = Word("if"); ELSE = Word("else"); SWITCH = Word("switch"); WHILE = Word("while"); DO = Word("do"); FOR = Word("for"); GOTO = Word("goto"); CONTINUE = Word("continue"); BREAK = Word("break"); RETURN = Word("return"); CLASS = Word("class"); TYPENAME = Word("typename"); TYPEID = Word("typeid"); TEMPLATE = Word("template"); PUBLIC = Word("public"); PROTECTED = Word("protected"); PRIVATE = Word("private"); VIRTUAL = Word("virtual"); OPERATOR = Word("operator"); USING = Word("using"); #endregion #region literals dot = CharSeq("."); dbl_quote = CharSeq("\""); quote = CharSeq("\'"); simple_escape = CharSeq("\\") + CharSet("abfnrtv'\"?\\"); octal_escape = CharSeq("\\") + octal_digit + Opt(octal_digit + Opt(octal_digit)); hex_escape = CharSeq("\\x") + Star(hex_digit); escape_sequence = simple_escape | octal_escape | hex_escape; c_char = escape_sequence | Not(quote) + Anything(); s_char = escape_sequence | Not(dbl_quote) + Anything(); long_suffix = CharSet("Ll"); unsigned_suffix = CharSet("Uu"); digit_sequence = Plus(digit); exponent = Opt(sign) + digit_sequence; exponent_prefix = CharSet("Ee"); exponent_part = exponent_prefix + exponent; float_suffix = CharSet("LlFf"); simple_float = CharSeq(".") + digit_sequence | digit_sequence + dot + Opt(digit_sequence); exponential_float = digit_sequence + exponent_part | simple_float + exponent_part; unsigned_float = simple_float | exponential_float; hex_prefix = CharSeq("0X") | CharSeq("0x"); hex_literal = hex_prefix + Plus(hex_digit); octal_literal = CharSeq("0") + Star(octal_digit); decimal_literal = nonzero_digit + Star(digit); unsigned_literal = decimal_literal | octal_literal | hex_literal; integer_suffix = long_suffix | unsigned_suffix | unsigned_suffix + long_suffix | long_suffix + unsigned_suffix; int_literal = unsigned_literal + Not(dot) + Opt(integer_suffix); float_literal = unsigned_float + Opt(float_suffix); char_literal = Opt(CharSeq("L")) + quote + Star(c_char) + quote; string_literal = Opt(CharSeq("L")) + dbl_quote + Star(s_char) + dbl_quote; boolean_literal = Word("true") | Word("false"); literal = (int_literal | char_literal | float_literal | string_literal | boolean_literal) //+ NoFail(Not(ident_next_char)) //0x000u error + ws; #endregion #region pre-processor directives pragma = Word("#") + Word("pragma") + until_eol; included_file = string_literal | CharSeq("<") + Star(Not(CharSeq(">")) + Anything()) + CharSeq(">"); include = Word("#") + Word("include") + included_file; ifdef_macro = Word("#") + Word("if") + until_eol + eol; endif_macro = Word("#") + Word("endif") + until_eol + eol; elif_macro = Word("#") + Word("elif") + until_eol + eol; else_macro = Word("#") + Word("else") + until_eol + eol; #endregion #region symbols semicolon = CharSeq(";"); eos = Word(";"); #endregion InitializeRules<CppBaseGrammar>(); }
public SkipRule(Rule x) { AddRule(x); }
public PlusRule(Rule x) { AddRule(x); }
public StoreRule(Rule x) { AddRule(x); }
public StarRule(Rule x) { AddRule(x); }
public SeqRule(Rule a, Rule b) { AddRule(a); AddRule(b); }
public ChoiceRule(Rule a, Rule b) { AddRule(a); AddRule(b); }
public CppStructuralGrammar() { declaration_list = Recursive(() => Star(declaration)); bracketed_group = Delimiter("[") + declaration_list + NoFail(Delimiter("]")); paran_group = Delimiter("(") + declaration_list + NoFail(Delimiter(")")); brace_group = Delimiter("{") + declaration_list + NoFail(Delimiter("}")); symbol = Not(CharSeq("/*") | CharSeq("//")) + CharSet("~!@%^&*-+=|:<>.?/,") + multiline_ws; template_decl = TEMPLATE + NoFail(Nested("<", ">")) + ws; typedef_decl = TYPEDEF + multiline_ws; class_decl = CLASS + Opt(identifier) + multiline_ws; struct_decl = STRUCT + Opt(identifier) + multiline_ws; union_decl = UNION + Opt(identifier) + multiline_ws; enum_decl = ENUM + Opt(identifier) + multiline_ws; label = identifier + ws + COLON + multiline_ws; comment_set = Star(comment + multiline_ws) + multiline_ws; same_line_comment = simple_ws + comment; pp_directive = CharSeq("#") + NoFailSeq(ws + identifier + simple_ws + until_eol + eol); type_decl = Opt(template_decl) + (class_decl | struct_decl | union_decl | enum_decl); node = bracketed_group | paran_group | brace_group | type_decl | typedef_decl | literal | symbol | label | identifier; declaration_content = Plus(node + multiline_ws); declaration = comment_set + pp_directive + multiline_ws | comment_set + semicolon + Opt(same_line_comment) + multiline_ws | comment_set + declaration_content + Opt(semicolon) + Opt(same_line_comment) + multiline_ws; file = declaration_list + ws + NoFail(EndOfInput()); //=============================================================================================== // Tidy up the grammar, and assign rule names from the field names. InitializeRules<CppStructuralGrammar>(); }
public CppStructuralGrammar() { declaration_list = Recursive(() => Star(declaration)); bracketed_group = Delimiter("[") + declaration_list + NoFail(Delimiter("]")); paran_group = Delimiter("(") + declaration_list + NoFail(Delimiter(")")); symbol = Not(CharSeq("/*") | CharSeq("//")) + CharSet("~!@%^&*-+=|:<>.?/,") + Eat(multiline_ws); template_decl = TEMPLATE + NoFail(Nested("<", ">")) + ws; typedef_decl = TYPEDEF + Eat(multiline_ws); class_decl = CLASS + Opt(identifier) + Eat(multiline_ws); struct_decl = STRUCT + Opt(identifier) + Eat(multiline_ws); union_decl = UNION + Opt(identifier) + Eat(multiline_ws); enum_decl = ENUM + Opt(identifier) + Eat(multiline_ws); label = identifier + ws + COLON + Eat(multiline_ws); comment_set = Star(comment + Eat(multiline_ws)) + Eat(multiline_ws); brace_group = Delimiter("{") + declaration_list + Opt(comment_set) + NoFail(Delimiter("}")); function_group = Opt(EXTERN) + Opt(STATIC) + Eat(multiline_ws) + identifier + Eat(multiline_ws) + identifier + paran_group + Not(semicolon) + brace_group; same_line_comment = Eat(simple_ws) + comment; pp_directive = CharSeq("#") + NoFailSeq(ws + identifier + Eat(simple_ws) + until_eol); type_decl = Opt(template_decl) + (class_decl | struct_decl | union_decl | enum_decl); node = bracketed_group | paran_group | function_group | brace_group | type_decl | typedef_decl | literal | symbol | label | identifier; declaration_content = Plus(node + Eat(multiline_ws)); declaration = comment_set + pp_directive + Eat(multiline_ws) | comment_set + semicolon + Opt(same_line_comment) + Eat(multiline_ws) | comment_set + declaration_content + Opt(semicolon) + Opt(same_line_comment) + Eat(multiline_ws) + comment_set; file = declaration_list + ws + NoFail(EndOfInput()); //=============================================================================================== // Tidy up the grammar, and assign rule names from the field names. InitializeRules <CppStructuralGrammar>(); }
public static Rule DelimitedGroup(string begin, Rule r, string close) { return CharSeq(begin) + multiline_ws + Star(r) + CharSeq(close) + multiline_ws; }
/// <summary> /// Like the Star operation, will attempt to match a Rule x as many times as possible /// except that it will return false if it does not match at least once. /// It is represented by the unary operator "+". /// </summary> /// <param name="x"></param> /// <returns></returns> public static Rule Plus(Rule x) { return(new PlusRule(x)); }
/// <summary> /// Creates a rule that matches single characters, up to and including a /// termination rule. /// </summary> /// <param name="x"></param> /// <returns></returns> public static Rule Until(Rule x) { return(Star(AnythingBut(x))); }
public NotRule(Rule x) { AddRule(x); }
public OptRule(Rule x) { AddRule(x); }
protected override bool InternalMatch(ParserState p) { Rule r = func(); return(r.Match(p)); }
public ParsingException(ParseNode parent, Rule rule, ParserState ps) { // Store the failed node, the parent node (which should be named), and the associated rule parentNode = parent; if (parentNode != null) parentNode = parentNode.GetNamedParent(); failedRule = rule; if (parentNode != null) parentRule = parentNode.GetRule(); // set the main text variables text = ps.text; // set the index into the text index = ps.index; if (index >= text.Length) index = text.Length - 1; // initialize a bunch of values lineStart = 0; col = 0; row = 0; int i = 0; // Compute the column, row, and lineStart for (; i < index; ++i) { if (text[i] == '\n') { lineStart = i + 1; col = 0; ++row; } else { ++col; } } // Compute the line end while (i < text.Length) if (text[i++] == '\n') break; lineEnd = i; // Compute the line length lineLength = lineEnd - lineStart; // Get the line text (don't include the new line) line = text.Substring(lineStart, lineLength - 1); // Assume Tabs of length of four string tab = " "; // Compute the pointer (^) line will be // based on the fact that we will be replacing tabs // with spaces. string tmp = line.Substring(0, col); tmp = tmp.Replace("\t", tab); ptr = new String(' ', tmp.Length); ptr += "^"; // Replace tabs with spaces line = line.Replace("\t", tab); }
public NoFailRule(Rule x) { AddRule(x); }
/// <summary> /// Creates a rule that matches the rule R multiple times, delimited by commas. /// </summary> /// <param name="r"></param> /// <returns></returns> public Rule CommaList(Rule r) { return r + Star(COMMA + r); }
/// <summary> /// Creates a rule that matches any single character, as long as the Rule x /// is not matched. /// </summary> /// <param name="r"></param> /// <returns></returns> public static Rule AnythingBut(Rule x) { return(Not(x) + Anything()); }
/// <summary> /// Parses a rule and creates a parse node /// </summary> /// <param name="x"></param> /// <returns></returns> public static Rule Store(Rule x) { return(new StoreRule(x)); }
/// <summary> /// Creates a rule that attempts to match Rule x as many times as possible /// and will always returns true. It is represented by the unary operator "*". /// </summary> /// <param name="x"></param> /// <returns></returns> public static Rule Star(Rule x) { return(new StarRule(x)); }
public LeafRule(Rule x) { AddRule(x); }
/// <summary> /// Creates a rule, that returns true if Rule x returns false, or returns false /// otherwise. It will never advance the parser index. It is represented by the /// unsary operator "^". /// </summary> /// <param name="x"></param> /// <returns></returns> public static Rule Not(Rule x) { return(new NotRule(x)); }
/// <summary> /// Reads everything up to and including a specific rule. /// </summary> /// <param name="x"></param> /// <returns></returns> public static Rule UntilPast(Rule x) { return(Until(x) + x); }