/// <summary> /// Creates a rule that matches a pair of rules, consuming all nested pairs within /// as well. /// </summary> /// <param name="begin"></param> /// <param name="end"></param> /// <returns></returns> public Rule Nested(Rule begin, Rule end) { RecursiveRule recursive = new RecursiveRule(() => { return(Nested(begin, end)); }); return(begin + NoFailSeq(Star(recursive | (Not(end) + Not(begin) + Anything())) + end)); }
/// <summary> /// Constructor: initializes the public rule fields. /// </summary> public CppBaseGrammar() { #region numbers digit = CharRange('0', '9'); octal_digit = CharRange('0', '7'); nonzero_digit = CharRange('1', '9'); hex_digit = digit | CharRange('a', 'f') | CharRange('A', 'F'); sign = CharSet("+-"); #endregion numbers #region identifiers lower_case_letter = CharRange('a', 'z'); upper_case_letter = CharRange('A', 'Z'); letter = lower_case_letter | upper_case_letter; ident_first_char = CharSet("_") | letter; ident_next_char = ident_first_char | digit; identifier_extension = CharSeq("::") + Recursive(() => Opt(identifier)); identifier = Leaf(ident_first_char + Star(ident_next_char) + Star(identifier_extension)); #endregion #region whitespace tab = CharSeq("\t"); space = CharSeq(" "); simple_ws = tab | space; eol = Opt(CharSeq("\r")) + CharSeq("\n"); ext_line = CharSeq("\\") + Star(simple_ws) + eol; multiline_ws = simple_ws | eol; until_eol = Star(ext_line | AnythingBut(eol | EndOfInput())); line_comment_content = until_eol; line_comment = CharSeq("//") + NoFailSeq(line_comment_content + (eol | EndOfInput())); full_comment_content = Until(CharSeq("*/")); full_comment = CharSeq("/*") + NoFailSeq(full_comment_content + CharSeq("*/")); comment = line_comment | full_comment; ws = Eat(multiline_ws | comment); #endregion #region keyword rules COND = Word("?"); DOT = Word("."); COLON = Word(":"); AMP = Word("&"); PLUS = Word("+"); MINUS = Word("-"); STAR = Word("*"); SLASH = Word("/"); MOD = Word("%"); NOT = Word("!"); TILDE = Word("~"); CARET = Word("^"); PIPE = Word("|"); EQ = Word("="); COMMA = Word(","); SIZEOF = Word("sizeof"); PTR_OP = Word("->"); INC_OP = Word("++"); DEC_OP = Word("--"); LEFT_OP = Word("<<"); RIGHT_OP = Word(">>"); LT_OP = Word("<"); GT_OP = Word(">"); LE_OP = Word("<="); GE_OP = Word(">="); EQ_OP = Word("=="); NE_OP = Word("!="); AND_OP = Word("&&"); OR_OP = Word("||"); MUL_ASSIGN = Word("*="); DIV_ASSIGN = Word("/="); MOD_ASSIGN = Word("%="); ADD_ASSIGN = Word("+="); SUB_ASSIGN = Word("-="); LEFT_ASSIGN = Word("<<="); RIGHT_ASSIGN = Word(">>="); AND_ASSIGN = Word("&="); XOR_ASSIGN = Word("^="); OR_ASSIGN = Word("|="); TYPEDEF = Word("typedef"); EXTERN = Word("extern"); STATIC = Word("static"); AUTO = Word("auto"); REGISTER = Word("register"); CHAR = Word("char"); SHORT = Word("short"); INT = Word("int"); LONG = Word("long"); SIGNED = Word("signed"); UNSIGNED = Word("unsigned"); FLOAT = Word("float"); DOUBLE = Word("double"); CONST = Word("const"); VOLATILE = Word("volatile"); VOID = Word("void"); STRUCT = Word("struct"); UNION = Word("union"); ENUM = Word("enum"); ELLIPSIS = Word("..."); CASE = Word("case"); DEFAULT = Word("default"); IF = Word("if"); ELSE = Word("else"); SWITCH = Word("switch"); WHILE = Word("while"); DO = Word("do"); FOR = Word("for"); GOTO = Word("goto"); CONTINUE = Word("continue"); BREAK = Word("break"); RETURN = Word("return"); CLASS = Word("class"); TYPENAME = Word("typename"); TYPEID = Word("typeid"); TEMPLATE = Word("template"); PUBLIC = Word("public"); PROTECTED = Word("protected"); PRIVATE = Word("private"); VIRTUAL = Word("virtual"); OPERATOR = Word("operator"); USING = Word("using"); #endregion #region literals dot = CharSeq("."); dbl_quote = CharSeq("\""); quote = CharSeq("\'"); simple_escape = CharSeq("\\") + CharSet("abfnrtv'\"?\\"); octal_escape = CharSeq("\\") + octal_digit + Opt(octal_digit + Opt(octal_digit)); hex_escape = CharSeq("\\x") + Star(hex_digit); escape_sequence = simple_escape | octal_escape | hex_escape; c_char = escape_sequence | Not(quote) + Anything(); s_char = escape_sequence | Not(dbl_quote) + Anything(); long_suffix = CharSet("Ll"); unsigned_suffix = CharSet("Uu"); digit_sequence = Plus(digit); exponent = Opt(sign) + digit_sequence; exponent_prefix = CharSet("Ee"); exponent_part = exponent_prefix + exponent; float_suffix = CharSet("LlFf"); simple_float = CharSeq(".") + digit_sequence | digit_sequence + dot + Opt(digit_sequence); exponential_float = digit_sequence + exponent_part | simple_float + exponent_part; unsigned_float = simple_float | exponential_float; hex_prefix = CharSeq("0X") | CharSeq("0x"); hex_literal = hex_prefix + Plus(hex_digit); octal_literal = CharSeq("0") + Star(octal_digit); decimal_literal = nonzero_digit + Star(digit); unsigned_literal = hex_literal | octal_literal | decimal_literal; integer_suffix = unsigned_suffix + Opt(long_suffix) + Opt(long_suffix) | long_suffix + Opt(long_suffix) + Opt(unsigned_suffix); int_literal = unsigned_literal + Not(dot) + Opt(integer_suffix); float_literal = unsigned_float + Opt(float_suffix); char_literal = Opt(CharSeq("L")) + quote + Star(c_char) + quote; string_literal = Opt(CharSeq("L")) + dbl_quote + Star(s_char) + dbl_quote; boolean_literal = Word("true") | Word("false"); literal = (int_literal | char_literal | float_literal | string_literal | boolean_literal) + NoFail(Not(ident_next_char)) + ws; #endregion #region pre-processor directives pragma = Word("#") + Word("pragma") + until_eol; included_file = string_literal | CharSeq("<") + Star(Not(CharSeq(">")) + Anything()) + CharSeq(">"); include = Word("#") + Word("include") + included_file; ifdef_macro = Word("#") + Word("if") + until_eol + eol; endif_macro = Word("#") + Word("endif") + until_eol + eol; elif_macro = Word("#") + Word("elif") + until_eol + eol; else_macro = Word("#") + Word("else") + until_eol + eol; #endregion #region symbols semicolon = CharSeq(";"); eos = Word(";"); #endregion #region operators operator_char = CharSet("=<>!-+*/[]%&|^~?"); operator_prefix = Leaf(Star(ident_first_char + Star(ident_next_char) + CharSeq("::"))); operator_seq = Leaf(operator_char + Opt(operator_char)); @operator = operator_prefix + OPERATOR + operator_seq; #endregion InitializeRules <CppBaseGrammar>(); }
public LeafRule(Rule x) { AddRule(x); }
/// <summary> /// Creates a rule that matches the rule R multiple times, delimited by commas. /// </summary> /// <param name="r"></param> /// <returns></returns> public Rule CommaList(Rule r) { return(r + Star(COMMA + r)); }
public NoFailRule(Rule x) { AddRule(x); }
public SkipRule(Rule x) { AddRule(x); }
public OptRule(Rule x) { AddRule(x); }
protected override bool InternalMatch(ParserState p) { Rule r = func(); return(r.Match(p)); }
public NotRule(Rule x) { AddRule(x); }
public PlusRule(Rule x) { AddRule(x); }
public StarRule(Rule x) { AddRule(x); }
public SeqRule(Rule a, Rule b) { AddRule(a); AddRule(b); }
public ChoiceRule(Rule a, Rule b) { AddRule(a); AddRule(b); }