Regex CreateColouringRegex(WebView2RegexOptions options) { var pb_escape = new PatternBuilder( ); pb_escape.BeginGroup("escape"); pb_escape.Add(@"\\c[A-Za-z]"); // \cx control char pb_escape.Add(@"\\x[0-9a-fA-F]{1,2}"); // hexadecimal char pb_escape.Add(@"\\u[0-9a-fA-F]{1,4}"); // hexadecimal char if (options.u) { // language=regex pb_escape.Add(@"\\u\{[0-9a-fA-F]+(\}|$)"); // hexadecimal char // language=regex pb_escape.Add(@"\\(p|P)\{.*?(\}|$)"); // unicode property } pb_escape.Add(@"\\."); // \. pb_escape.EndGroup( ); var pb = new PatternBuilder( ); pb.AddGroup(null, $@"\[\]?({pb_escape.ToPattern( )} |.)*?(\]|$)"); // language=regex pb.Add(@"\(\?(?'name'<(?![=!]).*?(>|$))"); // language=regex pb.Add(@"(?'name'\\k<.*?(>|$))"); pb.Add(pb_escape.ToPattern( )); return(pb.ToRegex( )); }
Regex CreateHighlightingRegex(IcuRegexOptions options) { var pb = new PatternBuilder( ); pb.Add(@"\(\?\#.*?(\)|$)"); // comment if (options.UREGEX_COMMENTS) { pb.Add(@"\#.*?(\n|$)"); // line-comment } pb.Add(@"\\Q.*?(\\E|$)"); // quoted part pb.Add(@"(?'left_par'\()"); // '(' pb.Add(@"(?'right_par'\))"); // ')' pb.Add(@"\\[NpPx]\{.*?(\}|$)"); // (skip) pb.Add(@"(?'left_brace'\{).*?((?'right_brace'\})|$)"); // '{...}' string posix_bracket = @"(\[:.*?(:\]|$))"; // [:...:] pb.Add($@" (?'left_bracket'\[) \]? (?> {posix_bracket} | (?'left_bracket'\[)(?<c>) | (\\. | [^\[\]])+ | (?'right_bracket'\])(?<-c>))* (?(c)(?!)) (?'right_bracket'\])? | (?'right_bracket'\]) " ); pb.Add(@"\\."); // '\...' return(pb.ToRegex( )); }
static Regex CreateHighlightingRegex(GrammarEnum grammar) { var pb = new PatternBuilder( ); if (grammar == GrammarEnum.extended || grammar == GrammarEnum.ECMAScript || grammar == GrammarEnum.egrep || grammar == GrammarEnum.awk) { pb.Add(@"(?'left_par'\()"); // '(' pb.Add(@"(?'right_par'\))"); // ')' pb.Add(@"(?'left_brace'\{).*?((?'right_brace'\})|$)"); // '{...}' } if (grammar == GrammarEnum.basic || grammar == GrammarEnum.grep) { pb.Add(@"(?'left_par'\\\()"); // '\)' pb.Add(@"(?'right_par'\\\))"); // '\(' pb.Add(@"(?'left_brace'\\{).*?((?'right_brace'\\})|$)"); // '\{...\}' } pb.Add(@"((?'left_bracket'\[) ((\[:.*? (:\]|$)) | \\. | .)*? ((?'right_bracket'\])|$) )"); // [...] pb.Add(@"\\."); // '\...' return(pb.ToRegex( )); }
//public static implicit operator TokenFragment(PatternBuilder builder) //{ // return new TokenPattern(builder.CreateArray()); //} public static PatternBuilder operator +(PatternBuilder op1, PatternBuilder op2) { var result = new PatternBuilder(); for (var i = 0; i < op1._entryCount; i++) { result.Add(op1[i]); } for (var j = 0; j < op2._entryCount; j++) { result.Add(op2[j]); } return(result); }
Regex CreateColouringRegex(bool isVerbose) { var pb_escape = new PatternBuilder( ); pb_escape.BeginGroup("escape"); pb_escape.Add(@"\\x[0-9a-fA-F]{1,2}"); // hexa, two digits pb_escape.Add(@"\\0[0-7]+"); // octal, after '0' pb_escape.Add(@"\\[1-7][0-7]{2,}"); // octal, three digits pb_escape.Add(@"\\N\{.+?(\} | $)"); // Unicode name, ex.: \N{DIGIT ONE} pb_escape.Add(@"\\."); pb_escape.EndGroup( ); // var pb = new PatternBuilder( ); pb.BeginGroup("comment"); pb.Add(@"\(\?\#.*?(\)|$)"); // comment if (isVerbose) { pb.Add(@"\#.*?(\n|$)"); // line-comment*/ } pb.EndGroup( ); // pb.Add(@"\(\?P(?'name'<.*?(>|$))"); pb.Add(@"\(\?P=(?'name'.*?(\)|$))"); pb.Add(@"(?'name'\\[1-9][0-9]?(?![0-9]))"); // pb.Add(pb_escape.ToPattern( )); // string char_group = @"( \[ \]? .*? (\]|$) )"; pb.Add(char_group); // return(pb.ToRegex( )); }
Regex CreateHighlightingRegex(DRegexOptions options) { var pb = new PatternBuilder( ); pb.Add(@"\(\?\#.*?(\)|$)"); // inline comment pb.Add(@"(?'left_par'\()"); // '(' pb.Add(@"(?'right_par'\))"); // ')' pb.Add(@"\\[pP]\{.*?(\} | $)"); // property pb.Add(@"(?'left_brace'\{) (\d+(,\d*)? | ,\d+) ((?'right_brace'\})|$)"); // '{...}' string posix_bracket = ""; // Not supported: @"(\[:.*?(:\]|$))"; // [:...:] pb.Add($@" (?'left_bracket'\[) \]? (?> {posix_bracket}{( posix_bracket.Length == 0 ? "" : " |" )} (?'left_bracket'\[)(?<c>) | (\\. | [^\[\]])+ | (?'right_bracket'\])(?<-c>))* (?(c)(?!)) (?'right_bracket'\])? | (?'right_bracket'\]) " ); pb.Add(@"\\."); // '\...' return(pb.ToRegex( )); }
Regex CreateColouringRegex(DRegexOptions options) { var pb_escape = new PatternBuilder( ); pb_escape.BeginGroup("escape"); pb_escape.Add(@"\\c[A-Za-z]"); // Matches the control character corresponding to letter C pb_escape.Add(@"\\x[0-9a-fA-F]{0,2}"); // Matches a character with hexadecimal value of XX. pb_escape.Add(@"\\u[0-9a-fA-F]{0,4}"); // Matches a character with hexadecimal value of XXXX. pb_escape.Add(@"\\U[0-9a-fA-F]{0,8}"); // Matches a character with hexadecimal value of YYYYYY. pb_escape.Add(@"\\[pP]\{.*?(\}|$)"); pb_escape.Add(@"\\[pP].?"); pb_escape.Add(@"\\."); pb_escape.EndGroup( ); var pb = new PatternBuilder( ); pb.BeginGroup("comment"); pb.Add(@"\(\?\#.*?(\)|$)"); // An inline comment that is ignored while matching. pb.EndGroup( ); pb.Add(@"\(\?P(?'name'<.*?(>|$))"); // Matches named subexpression regex labeling it with name 'name'. { // (nested groups: https://stackoverflow.com/questions/546433/regular-expression-to-match-balanced-parentheses) string posix_bracket = ""; // Not supported: @"(?'escape'\[:.*?(:\]|$))"; // [:...:], use escape colour pb.Add($@" \[ \]? (?> {posix_bracket}{( posix_bracket.Length == 0 ? "" : " |" )} \[(?<c>) | ({pb_escape.ToPattern( )} | [^\[\]])+ | \](?<-c>))* (?(c)(?!)) \] " ); } pb.Add(pb_escape.ToPattern( )); return(pb.ToRegex( )); }
Regex CreateColouringRegex( ) { bool is_literal = OptionsControl.IsOptionSelected("literal"); if (is_literal) { return(PatternBuilder.AlwaysFailsRegex); } var pb_escape = new PatternBuilder( ); pb_escape.BeginGroup("escape"); pb_escape.Add(@"\\[pP][A-Za-z]"); // Unicode character class (one-letter name) pb_escape.Add(@"\\[pP]\{.*?(\}|$)"); // Unicode character class pb_escape.Add(@"\\0[0-7]{1,2}"); // octal, two digits after 0 pb_escape.Add(@"\\[0-7]{1,3}"); // octal, three digits pb_escape.Add(@"\\x[0-9a-fA-F]{1,2}"); // hexa, two digits pb_escape.Add(@"\\x\{[0-9a-fA-F]*(\}|$)"); // hexa, error if empty pb_escape.Add(@"\\Q.*?(\\E|$)"); // quoted sequence, \Q...\E pb_escape.Add(@"\\."); pb_escape.EndGroup( ); // var pb_class = new PatternBuilder( ).AddGroup("class", @"\[(?'c'[:]) .*? (\k<c>\] | $)"); // only [: :], no [= =], no [. .] // var pb = new PatternBuilder( ); pb.Add(pb_escape.ToPattern( )); // pb.AddGroup(null, $@"\[ \]? ({pb_class.ToPattern( )} | {pb_escape.ToPattern( )} | . )*? (\]|$)"); // TODO: check 'escape' part // pb.Add(@"\(\?P(?'name'<.*?>)"); return(pb.ToRegex( )); }
static Regex CreateHighlightingRegex( ) { var pb = new PatternBuilder( ); pb.Add(@"\\."); pb.AddGroup("left_par", @"(?'left_par'\()"); // '(' pb.AddGroup("right_par", @"(?'right_par'\))"); // ')' return(pb.ToRegex( )); }
Regex CreateHighlightingRegex(bool isXorXX) { var pb = new PatternBuilder( ); pb.Add(@"(\(\?\#.*?(\)|$))"); // comment if (isXorXX) { pb.Add(@"(\#[^\n]*)"); // line comment } pb.Add(@"\\Q.*?(\\E|$)"); // quoted sequence, \Q...\E pb.Add(@"\\[xNopPbBgk]\{.*?(\}|$)"); // (skip) pb.Add(@"(?'left_par'\()"); // '(' pb.Add(@"(?'right_par'\))"); // ')' pb.Add(@"(?'left_brace'\{) \s* \d+ \s* (,\s*\d*)? \s* ((?'right_brace'\})|$)"); // '{...}' pb.Add(@"((?'left_bracket'\[) \]? ((\[:.*? (:\]|$)) | \\. | .)*? ((?'right_bracket'\])|$) )"); // [...] pb.Add(@"\\."); // '\...' return(pb.ToRegex( )); }
static Regex CreateColouringRegex(GrammarEnum grammar) { var pb_escape = new PatternBuilder( ); pb_escape.BeginGroup("escape"); if (grammar == GrammarEnum.ECMAScript) { pb_escape.Add(@"\\c[A-Za-z]"); } if (grammar == GrammarEnum.ECMAScript) { pb_escape.Add(@"\\x[0-9A-Fa-f]{1,2}"); // (two digits required) } if (grammar == GrammarEnum.awk) { pb_escape.Add(@"\\[0-7]{1,3}"); // octal code } if (grammar == GrammarEnum.ECMAScript) { pb_escape.Add(@"\\u[0-9A-Fa-f]{1,4}"); // (four digits required) } if (grammar == GrammarEnum.basic || grammar == GrammarEnum.grep) { pb_escape.Add(@"(?!\\\( | \\\) | \\\{ | \\\})\\."); } else { pb_escape.Add(@"\\."); } pb_escape.EndGroup( ); // var pb_class = new PatternBuilder( ).AddGroup("class", @"\[(?'c'[:=.]) .*? (\k<c>\] | $)"); // var pb = new PatternBuilder( ); pb.Add(pb_escape.ToPattern( )); pb.AddGroup(null, $@"( \[ ({pb_class.ToPattern( )} | {pb_escape.ToPattern( )} | . )*? (\]|$) )"); // (group names and comments are not supported by C++ Regex) return(pb.ToRegex( )); }
Regex CreateHighlightingRegex( ) { bool is_literal = OptionsControl.IsCompileOptionSelected("PCRE2_LITERAL"); if (is_literal) { return(PatternBuilder.AlwaysFailsRegex); } bool is_extended = OptionsControl.IsCompileOptionSelected("PCRE2_EXTENDED"); bool allow_empty_class = OptionsControl.IsCompileOptionSelected("PCRE2_ALLOW_EMPTY_CLASS"); var pb = new PatternBuilder( ); pb.Add(@"(\(\?\#.*?(\)|$))"); // comment if (is_extended) { pb.Add(@"(\#[^\n]*)"); // line comment } pb.Add(@"\\Q.*?(\\E|$)"); // quoted sequence, \Q...\E pb.Add(@"\\[oNxupP]\{.*?(\}|$)"); // (skip) pb.Add(@"(?'left_par'\()"); // '(' pb.Add(@"(?'right_par'\))"); // ')' pb.Add(@"(?'left_brace'\{) \d+ (,\d*)? ((?'right_brace'\})|$)"); // '{...}' if (allow_empty_class) { pb.Add(@"((?'left_bracket'\[) ((\[:.*? (:\]|$)) | \\. | .)*? ((?'right_bracket'\])|$) )"); // [...] } else { pb.Add(@"((?'left_bracket'\[) \]? ((\[:.*? (:\]|$)) | \\. | .)*? ((?'right_bracket'\])|$) )"); // [...] } pb.Add(@"\\."); // '\...' return(pb.ToRegex( )); }
Regex CreateHighlightingRegex(bool isVerbose) { var pb = new PatternBuilder( ); pb.Add(@"(\(\?\#.*?(\)|$))"); // comment if (isVerbose) { pb.Add(@"(\#[^\n]*)"); // line comment } pb.Add(@"\\[N]\{.*?(\}|$)"); // (skip) pb.Add(@"(?'left_par'\()"); // '(' pb.Add(@"(?'right_par'\))"); // ')' pb.Add(@"(?'left_brace'\{) (\d+ | \d*,\d*) ((?'right_brace'\})|$)"); // '{...}' pb.Add(@"((?'left_bracket'\[) ]? (\\. | .)*? ((?'right_bracket'\])|$) )"); // [...] pb.Add(@"\\."); // '\...' return(pb.ToRegex( )); }
static Regex CreateHighlightingRegex(RegexOptions options) { var pb = new PatternBuilder( ); pb.Add(@"\(\?\#.*?(\)|$)"); // comment if (options.HasFlag(RegexOptions.IgnorePatternWhitespace)) { pb.Add(@"\#[^\n]*"); // line comment } pb.Add(@"\\[pP]\{.*?(\}|$)"); // (skip) pb.Add(@"(?'left_par'\()"); // '(' pb.Add(@"(?'right_par'\))"); // ')' pb.Add(@"(?'left_brace'\{) \d+(,(\d+)?)? ((?'right_brace'\})|$)"); // '{...}' pb.Add(@"(?'left_bracket'\[) \]? (\\.|.)*? ((?'right_bracket'\])|$)"); // '[...]' pb.Add(@"\\."); // (skip) return(pb.ToRegex( )); }
Regex CreateHighlightingRegex(WebView2RegexOptions options) { var pb = new PatternBuilder( ); pb.Add(@"(?'left_par'\()"); // '(' pb.Add(@"(?'right_par'\))"); // ')' pb.Add(@"\\[pPu]\{.*?(\}|$)"); // (skip) pb.Add(@"(?'left_brace'\{).*?((?'right_brace'\})|$)"); // '{...}' pb.Add(@"(?'left_bracket'\[) \]? (\\.|.)*? ((?'right_bracket'\])|$)"); // '[...]' pb.Add(@"\\."); // (skip) return(pb.ToRegex( )); }
Regex CreateHighlightingRegex(RustRegexOptions options) { bool is_regex_builder = options.@struct == "RegexBuilder"; var pb = new PatternBuilder( ); if (is_regex_builder && options.ignore_whitespace) { pb.Add(@"\#.*?(\n|$)"); // line-comment } pb.Add(@"(?'left_par'\()"); // '(' pb.Add(@"(?'right_par'\))"); // ')' pb.Add(@"\\[xuU]\{.*?(\}|$)"); // \x{7HHHHHHH ...} etc. if (is_regex_builder && options.unicode) { pb.Add(@"\\[pP]\{.*?(\} | $)"); // property } pb.Add(@"(?'left_brace'\{) (\d+(,\d*)? | ,\d+) ((?'right_brace'\})|$)"); // '{...}' string posix_bracket = @"(\[:.*?(:\]|$))"; // [:...:] pb.Add($@" (?'left_bracket'\[) \]? (?> {posix_bracket}{( posix_bracket.Length == 0 ? "" : " |" )} (?'left_bracket'\[)(?<c>) | (\\. | [^\[\]])+ | (?'right_bracket'\])(?<-c>))* (?(c)(?!)) (?'right_bracket'\])? | (?'right_bracket'\]) " ); pb.Add(@"\\."); // '\...' return(pb.ToRegex( )); }
Regex CreateColouringRegex(IcuRegexOptions options) { var pb_escape = new PatternBuilder( ); pb_escape.BeginGroup("escape"); pb_escape.Add(@"\\c[A-Za-z]"); // \cx control char pb_escape.Add(@"\\[NpP]\{.*?(\} | $)"); // named character, property pb_escape.Add(@"\\[uUx][0-9a-fA-F]+"); // hexadecimal char pb_escape.Add(@"\\x\{[0-9a-fA-F]+(\}|$)"); // hexadecimal char pb_escape.Add(@"\\0[0-7]+"); // octal pb_escape.Add(@"\\Q.*?(\\E|$)"); // quoted part pb_escape.Add(@"\\."); // \. pb_escape.EndGroup( ); var pb = new PatternBuilder( ); pb.BeginGroup("comment"); pb.Add(@"\(\?\#.*?(\)|$)"); // comment if (options.UREGEX_COMMENTS) { pb.Add(@"\#.*?(\n|$)"); // line-comment } pb.EndGroup( ); pb.Add(@"\(\?(?'name'<(?![=!]).*?(>|$))"); pb.Add(@"(?'name'\\k<.*?(>|$))"); string posix_bracket = @"(?'escape'\[:.*?(:\]|$))"; // [:...:], use escape colour pb.Add($@" \[ \]? (?> {posix_bracket} | \[(?<c>) | ({pb_escape.ToPattern( )} | [^\[\]])+ | \](?<-c>))* (?(c)(?!)) \] " ); pb.Add(pb_escape.ToPattern( )); return(pb.ToRegex( )); }
Regex CreateHighlightingRegex( ) { bool is_literal = OptionsControl.IsOptionSelected("literal"); if (is_literal) { return(PatternBuilder.AlwaysFailsRegex); } var pb = new PatternBuilder( ); pb.Add(@"\\Q.*?(\\E|$)"); // quoted sequence, \Q...\E pb.Add(@"\\[pPx]\{.*?(\}|$)"); // (skip) pb.Add(@"(?'left_par'\()"); // '(' pb.Add(@"(?'right_par'\))"); // ')' pb.Add(@"(?'left_brace'\{) \d+ (,\d*)* ((?'right_brace'\})|$)"); // '{...}' pb.Add(@"((?'left_bracket'\[) \]? ((\[:.*? (:\]|$)) | \\. | .)*? ((?'right_bracket'\])|$) )"); pb.Add(@"\\."); // '\...' return(pb.ToRegex( )); }
static Regex CreateCachedColouringRegex(RegexOptions options) { // (some patterns includes incomplete constructs) var pb = new PatternBuilder( ); pb.BeginGroup("comment"); pb.Add(@"\(\?\#.*?(\)|$)"); if (options.HasFlag(RegexOptions.IgnorePatternWhitespace)) { pb.Add(@"\#[^\n]*"); } pb.EndGroup( ); var escapes_pb = new PatternBuilder( ); escapes_pb.BeginGroup("escape"); escapes_pb.Add(@"\\[0-7]{2,3}"); escapes_pb.Add(@"\\x[0-9A-Fa-f]{1,2}"); escapes_pb.Add(@"\\c[A-Za-z]"); escapes_pb.Add(@"\\u[0-9A-Fa-f]{1,4}"); escapes_pb.Add(@"\\(p|P)\{.*?(\}|$)"); escapes_pb.Add(@"\\k<([A-Za-z]+>)?"); escapes_pb.Add(@"\\."); escapes_pb.EndGroup( ); pb.AddGroup(null, $@"\[\]?({escapes_pb.ToPattern( )} |.)*?(\]|$)"); pb.Add(@"\(\?(?'name'<(?![=!]).*?(>|$))"); // (balancing groups covered too) pb.Add(@"\(\?(?'name''.*?('|$))"); pb.Add(@"(?'name'\\k<.*?(>|$))"); pb.Add(@"(?'name'\\k'.*?('|$))"); pb.Add(escapes_pb.ToPattern( )); var regex = pb.ToRegex( ); return(regex); }
Regex CreateColouringRegex(RustRegexOptions options) { bool is_regex = options.@struct == "Regex"; bool is_regex_builder = options.@struct == "RegexBuilder"; var pb_escape = new PatternBuilder( ); pb_escape.BeginGroup("escape"); if (is_regex || (is_regex_builder && options.unicode)) { pb_escape.Add(@"\\[pP]\{.*?(\}|$)"); // Unicode character class (general category or script) pb_escape.Add(@"\\[pP].?"); // One-letter name Unicode character class } if (is_regex_builder && options.octal) { pb_escape.Add(@"\\[0-7]{1,3}"); // octal character code (up to three digits) (when enabled) } pb_escape.Add(@"\\x\{[0-9a-fA-F]*(\}|$)?"); // any hex character code corresponding to a Unicode code point pb_escape.Add(@"\\x[0-9a-fA-F]{0,2}"); // hex character code (exactly two digits) // (only 2 digits if no 'options.unicode' pb_escape.Add(@"\\u\{[0-9a-fA-F]*(\}|$)?"); // any hex character code corresponding to a Unicode code point pb_escape.Add(@"\\u[0-9a-fA-F]{0,4}"); // hex character code (exactly four digits) pb_escape.Add(@"\\U\{[0-9a-fA-F]*(\}|$)?"); // any hex character code corresponding to a Unicode code point pb_escape.Add(@"\\U[0-9a-fA-F]{0,8}"); // hex character code (exactly eight digits) string any_esc = ""; if (!(is_regex || (is_regex_builder && options.unicode))) { any_esc += @"(?!\\[pP])"; } if (!(is_regex_builder && options.octal)) { any_esc += @"(?!\\[0-7])"; } any_esc += @"\\."; pb_escape.Add(any_esc); pb_escape.EndGroup( ); var pb = new PatternBuilder( ); pb.BeginGroup("comment"); if (is_regex_builder && options.ignore_whitespace) { pb.Add(@"\#.*?(\n|$)"); // line-comment } pb.EndGroup( ); pb.Add(@"\(\?P(?'name'<.*?(>|$))"); { // (nested groups: https://stackoverflow.com/questions/546433/regular-expression-to-match-balanced-parentheses) string posix_bracket = @"(?'escape'\[:.*?(:\]|$))"; // [:...:], use escape colour pb.Add($@" \[ \]? (?> {posix_bracket}{( posix_bracket.Length == 0 ? "" : " |" )} \[(?<c>) | ({pb_escape.ToPattern( )} | [^\[\]])+ | \](?<-c>))* (?(c)(?!)) \] " ); } pb.Add(pb_escape.ToPattern( )); return(pb.ToRegex( )); }
Regex CreateColouringRegex( ) { bool is_literal = OptionsControl.IsCompileOptionSelected("PCRE2_LITERAL"); if (is_literal) { return(PatternBuilder.AlwaysFailsRegex); } bool is_extended = OptionsControl.IsCompileOptionSelected("PCRE2_EXTENDED"); bool allow_empty_class = OptionsControl.IsCompileOptionSelected("PCRE2_ALLOW_EMPTY_CLASS"); var pb_escape = new PatternBuilder( ); pb_escape.BeginGroup("escape"); pb_escape.Add(@"\\c[A-Za-z]"); // ASCII escape pb_escape.Add(@"\\0[0-7]{1,2}"); // octal, two digits after 0 pb_escape.Add(@"\\[0-7]{1,3}"); // octal, three digits pb_escape.Add(@"\\o\{[0-9]+(\} | $)"); // octal; bad values give error pb_escape.Add(@"\\N\{U\+[0-9a-fA-F]+(\} | $)"); // hexa, error if no 'PCRE2_UTF' pb_escape.Add(@"\\x[0-9a-fA-F]{1,2}"); // hexa, two digits pb_escape.Add(@"\\x\{[0-9a-fA-F]*(\} | $)"); // hexa, error if empty pb_escape.Add(@"\\u[0-9a-fA-F]{1,4}"); // hexa, four digits, error if no 'PCRE2_ALT_BSUX', 'PCRE2_EXTRA_ALT_BSUX' pb_escape.Add(@"\\u\{[0-9a-fA-F]*(\} | $)"); // hexa, error if empty or no 'PCRE2_ALT_BSUX', 'PCRE2_EXTRA_ALT_BSUX' pb_escape.Add(@"\\[pP]\{.*?(\} | $)"); // property pb_escape.Add(@"\\Q.*?(\\E|$)"); // quoted sequence, \Q...\E // backreferences pb_escape.Add(@"\\[0-9]+"); // unbiguous // see also named groups pb_escape.Add(@"\\."); pb_escape.EndGroup( ); // var pb_class = new PatternBuilder( ).AddGroup("class", @"\[(?'c'[:=.]) .*? (\k<c>\] | $)"); // var pb = new PatternBuilder( ); pb.BeginGroup("comment"); pb.Add(@"\(\?\#.*?(\)|$)"); // comment if (is_extended) { pb.Add(@"\#.*?(\n|$)"); // line-comment } pb.EndGroup( ); // pb.Add(@"\(\?(?'name'<(?![=!]).*?(>|$))"); pb.Add(@"\(\?(?'name''.*?('|$))"); pb.Add(@"\(\?P(?'name'<.*?(>|$))"); pb.Add(@"(?'name'\\g[+]?[0-9]+)"); pb.Add(@"(?'name'\\g\{[+]?[0-9]*(\} | $))"); pb.Add(@"(?'name'\\[gk]<.*?(>|$))"); pb.Add(@"(?'name'\\[gk]'.*?('|$))"); pb.Add(@"(?'name'\\[gk]\{.*?(\}|$))"); pb.Add(@"(?'name'\(\?P=.*?(\)|$))"); // // pb.Add(pb_escape.ToPattern( )); // string char_group; if (allow_empty_class) { char_group = $@"\[ ({pb_class.ToPattern( )} | {pb_escape.ToPattern( )} | . )*? (\]|$)"; } else { char_group = $@"\[ \]? ({pb_class.ToPattern( )} | {pb_escape.ToPattern( )} | . )*? (\]|$)"; } pb.Add(char_group); // TODO: add support for '(*...)' constructs return(pb.ToRegex( )); }
Regex CreateHighlightingRegex(OnigurumaRegexInterop.OnigurumaHelper helper) { if (helper.IsONIG_SYNTAX_ASIS) { return(PatternBuilder.AlwaysFailsRegex); } var pb = new PatternBuilder( ); if (helper.IsONIG_SYN_OP2_QMARK_GROUP_EFFECT) { pb.Add(@"\(\?\#.*?(\)|$)"); // comment } if (helper.IsONIG_OPTION_EXTEND) { pb.Add(@"\#.*?(\n|$)"); // line-comment } if (helper.IsONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE) { pb.Add(@"\\Q.*?(\\E|$)"); // quoted part } if (helper.IsONIG_SYN_OP_LPAREN_SUBEXP) { pb.Add(@"(?'left_par'\()"); // '(' pb.Add(@"(?'right_par'\))"); // ')' } if (helper.IsONIG_SYN_OP_ESC_LPAREN_SUBEXP) { pb.Add(@"(?'left_par'\\\()"); // '\(' pb.Add(@"(?'right_par'\\\))"); // '\)' } if (helper.IsONIG_SYN_OP_ESC_O_BRACE_OCTAL) { pb.Add(@"\\o\{.*?(\}|$)"); // \o{17777777777 ...} wide octal chars } if (helper.IsONIG_SYN_OP_ESC_X_BRACE_HEX8) { pb.Add(@"\\x\{.*?(\}|$)"); // \x{7HHHHHHH ...} wide hexadecimal chars } if (helper.IsONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY || helper.IsONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT) { pb.Add(@"\\[pP]\{.*?(\} | $)"); // property } if (helper.IsONIG_SYN_OP_BRACE_INTERVAL) { pb.Add(@"(?'left_brace'\{) (\d+(,\d*)? | ,\d+) ((?'right_brace'\})|$)"); // '{...}' } if (helper.IsONIG_SYN_OP_ESC_BRACE_INTERVAL) { pb.Add(@"(?'left_brace'\\{).*?((?'right_brace'\\})|$)"); // '\{...\}' } string posix_bracket = ""; if (helper.IsONIG_SYN_OP_POSIX_BRACKET) { posix_bracket = @"(\[:.*?(:\]|$))"; // [:...:] } if (helper.IsONIG_SYN_OP_BRACKET_CC) { pb.Add($@" (?'left_bracket'\[) \]? (?> {posix_bracket}{( posix_bracket.Length == 0 ? "" : " |" )} (?'left_bracket'\[)(?<c>) | (\\. | [^\[\]])+ | (?'right_bracket'\])(?<-c>))* (?(c)(?!)) (?'right_bracket'\])? | (?'right_bracket'\]) " ); } pb.Add(@"\\."); // '\...' return(pb.ToRegex( )); }
Regex CreateColouringRegex(OnigurumaRegexInterop.OnigurumaHelper helper) { if (helper.IsONIG_SYNTAX_ASIS) { return(PatternBuilder.AlwaysFailsRegex); } var pb_escape = new PatternBuilder( ); pb_escape.BeginGroup("escape"); pb_escape.Add(@"\\0[0-7]{1,2}"); // octal, two digits after 0 pb_escape.Add(@"\\[0-7]{1,3}"); // octal, three digits if (helper.IsONIG_SYN_OP_ESC_O_BRACE_OCTAL) { pb_escape.Add(@"\\o\{[0-7]+ (\s+ [0-7]+)* (\}|$)"); // \o{17777777777 ...} wide octal chars } pb_escape.Add(@"\\u[0-9a-fA-F]+"); // \uHHHH wide hexadecimal char if (helper.IsONIG_SYN_OP_ESC_X_HEX2) { pb_escape.Add(@"\\x[0-9a-fA-F]+"); // \xHH hexadecimal char } if (helper.IsONIG_SYN_OP_ESC_X_BRACE_HEX8) { pb_escape.Add(@"\\x\{[0-9a-fA-F]+ (\s+ [0-9a-fA-F]+)* (\}|$)"); // \x{7HHHHHHH ...} wide hexadecimal chars } if (helper.IsONIG_SYN_OP_ESC_C_CONTROL) { pb_escape.Add(@"\\c[A-Za-z]"); // \cx control char pb_escape.Add(@"\\C-([A-Za-z])?"); // \C-x control char } pb_escape.Add(@"\\M-([A-Za-z])?"); // \M-x meta (x|0x80) pb_escape.Add(@"\\M-(\\C-([A-Za-z])?)?"); // \M-x meta control char pb_escape.Add(@"\\[pP]\{.*?(\} | $)"); // property /* * Probably not useful * * if( helper.IsONIG_SYN_OP_ESC_ASTERISK_ZERO_INF ) * { * pb_escape.Add( @"(?!\\\*)"); * } * * if( helper.IsONIG_SYN_OP_ESC_PLUS_ONE_INF ) * { * pb_escape.Add( @"(?!\\\+)"); * } * * if( helper.IsONIG_SYN_OP_ESC_QMARK_ZERO_ONE ) * { * pb_escape.Add( @"(?!\\\?)"); * } * * if( helper.IsONIG_SYN_OP_ESC_BRACE_INTERVAL ) * { * pb_escape.Add( @"(?!\\[{}])"); * } */ pb_escape.Add(@"\\."); if (helper.IsONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE) { pb_escape.Add(@"\\Q.*?(\\E|$)"); // quoted part; use 'escape' name to take its colour } pb_escape.EndGroup( ); var pb = new PatternBuilder( ); pb.BeginGroup("comment"); if (helper.IsONIG_SYN_OP2_QMARK_GROUP_EFFECT) { pb.Add(@"\(\?\#.*?(\)|$)"); // comment } if (helper.IsONIG_OPTION_EXTEND) { pb.Add(@"\#.*?(\n|$)"); // line-comment } pb.EndGroup( ); if (helper.IsONIG_SYN_OP2_QMARK_LT_NAMED_GROUP) { pb.Add(@"\(\?(?'name'<(?![=!]).*?(>|$))"); pb.Add(@"\(\?(?'name''.*?('|$))"); } if (helper.IsONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY) { pb.Add(@"\(\?@(?'name'<.*?(>|$))"); pb.Add(@"\(\?@(?'name''.*?('|$))"); } if (helper.IsONIG_SYN_OP2_ESC_K_NAMED_BACKREF) { pb.Add(@"(?'name'\\k<.*?(>|$))"); pb.Add(@"(?'name'\\k'.*?('|$))");; } if (helper.IsONIG_SYN_OP2_ESC_G_SUBEXP_CALL) { pb.Add(@"(?'name'\\g<.*?(>|$))"); pb.Add(@"(?'name'\\g'.*?('|$))"); } // (nested groups: https://stackoverflow.com/questions/546433/regular-expression-to-match-balanced-parentheses) string posix_bracket = ""; if (helper.IsONIG_SYN_OP_POSIX_BRACKET) { posix_bracket = @"(?'escape'\[:.*?(:\]|$))"; // [:...:], use escape colour } pb.Add($@" \[ \]? (?> {posix_bracket}{( posix_bracket.Length == 0 ? "" : " |" )} \[(?<c>) | ({pb_escape.ToPattern( )} | [^\[\]])+ | \](?<-c>))* (?(c)(?!)) \] " ); if (helper.IsONIG_SYN_OP_ESC_LPAREN_SUBEXP) { pb.Add(@"\\\( | \\\)"); // (skip) } if (helper.IsONIG_SYN_OP_ESC_BRACE_INTERVAL) { pb.Add(@"\\\{ | \\\}"); // (skip) } pb.Add(pb_escape.ToPattern( )); return(pb.ToRegex( )); }
Regex CreateColouringRegex(bool isXorXx) { var pb_escape = new PatternBuilder( ); pb_escape.BeginGroup("escape"); pb_escape.Add(@"\\c[A-Za-z]"); // control char pb_escape.Add(@"\\x[0-9a-fA-F]{1,2}"); // hexa, two digits pb_escape.Add(@"\\x\{[0-9a-fA-F]*(\} | $)"); // hexa, error if empty pb_escape.Add(@"\\N\{.*?(\} | $)"); // Unicode name or hexa pb_escape.Add(@"\\0[0-7]{1,2}"); // octal, two digits after 0 pb_escape.Add(@"\\[0-7]{1,3}"); // octal, three digits pb_escape.Add(@"\\o\{[0-9]+(\} | $)"); // octal pb_escape.Add(@"\\[pP]([a-zA-Z] | $)"); // property pb_escape.Add(@"\\[pP]\{.*?(\} | $)"); // property pb_escape.Add(@"\\Q.*?(\\E|$)"); // quoted sequence, \Q...\E pb_escape.Add(@"\\[bB]\{.*?(\} | $)"); // Unicode boundary pb_escape.Add(@"\\."); pb_escape.EndGroup( ); // var pb_class = new PatternBuilder( ).AddGroup("class", @"\[(?'c'[:]) .*? (\k<c>\] | $)"); // [: ... :] // var pb = new PatternBuilder( ); pb.BeginGroup("comment"); pb.Add(@"\(\?\#.*?(\)|$)"); // comment if (isXorXx) { pb.Add(@"\#.*?(\n|$)"); // line-comment*/ } pb.EndGroup( ); // pb.Add(@"\(\?(?'name'<(?![=!]).*?(>|$))"); pb.Add(@"\(\?(?'name''.*?('|$))"); pb.Add(@"\(\?P(?'name'<.*?(>|$))"); pb.Add(@"\(\?P(?'name'[=>].*?(\)|$))"); pb.Add(@"(?'name'\\g[0-9]+)"); pb.Add(@"(?'name'\\[gk]\{.*?(\}|$))"); pb.Add(@"(?'name'\\[gk]<.*?(>|$))"); pb.Add(@"(?'name'\\k'.*?('|$))"); // pb.Add(pb_escape.ToPattern( )); // string char_group = $@"( \[ \]? ({pb_class.ToPattern( )} | {pb_escape.ToPattern( )} | . )*? (\]|$) )"; pb.Add(char_group); // return(pb.ToRegex( )); }
static Regex CreateColouringRegex(GrammarEnum grammar, bool modX) { bool is_perl = grammar == GrammarEnum.perl || grammar == GrammarEnum.ECMAScript || grammar == GrammarEnum.normal || grammar == GrammarEnum.JavaScript || grammar == GrammarEnum.JScript; bool is_POSIX_extended = grammar == GrammarEnum.extended || grammar == GrammarEnum.egrep || grammar == GrammarEnum.awk; bool is_POSIX_basic = grammar == GrammarEnum.basic || grammar == GrammarEnum.sed || grammar == GrammarEnum.grep || grammar == GrammarEnum.emacs; bool is_emacs = grammar == GrammarEnum.emacs; var pb_escape = new PatternBuilder( ); pb_escape.BeginGroup("escape"); if (is_perl || is_POSIX_extended || is_POSIX_basic) { pb_escape.Add(@"\\[1-9]"); // back reference } if (is_perl || is_POSIX_extended) { pb_escape.Add(@"\\c[A-Za-z]"); // ASCII escape } if (is_perl || is_POSIX_extended) { pb_escape.Add(@"\\x[0-9A-Fa-f]{1,2}"); // hex, two digits } if (is_perl || is_POSIX_extended) { pb_escape.Add(@"\\x\{[0-9A-Fa-f]+(\}|$)"); // hex, four digits } if (is_perl || is_POSIX_extended) { pb_escape.Add(@"\\0[0-7]{1,3}"); // octal, three digits } if (is_perl || is_POSIX_extended) { pb_escape.Add(@"\\N\{.*?(\}|$)"); // symbolic name } if (is_perl || is_POSIX_extended) { pb_escape.Add(@"\\[pP]\{.*?(\}|$)"); // property } if (is_perl || is_POSIX_extended) { pb_escape.Add(@"\\[pP]."); // property, short name } if (is_perl || is_POSIX_extended) { pb_escape.Add(@"\\Q.*?(\\E|$)"); } ; // quoted sequence if (is_emacs) { pb_escape.Add(@"\\[sS]."); // syntax group } if (is_perl || is_POSIX_extended) { pb_escape.Add(@"\\."); // various } if (is_POSIX_basic) { pb_escape.Add(@"(?!\\\( | \\\) | \\\{ | \\\})\\."); // various } pb_escape.EndGroup( ); var pb_class = new PatternBuilder( ); pb_class.BeginGroup("class"); if (is_perl || is_POSIX_extended || is_POSIX_basic) { pb_class.Add(@"\[(?'c'[:=.]) .*? (\k<c>\] | $)"); } pb_class.EndGroup( ); var pb = new PatternBuilder( ); pb.BeginGroup("comment"); if (is_perl) { pb.Add(@"\(\?\#.*?(\)|$)"); // comment } if (is_perl && modX) { pb.Add(@"\#.*?(\n|$)"); // line-comment*/ } pb.EndGroup( ); if (is_perl) { pb.Add(@"\(\?(?'name'<(?![=!]).*?(>|$)) | \(\?(?'name''.*?('|$))"); } if (is_perl) { pb.Add(@"(?'name'\\g-?[1-9]) | (?'name'\\g\{.*?(\}|$))"); // back reference } if (is_perl) { pb.Add(@"(?'name'\\[gk]<.*?(>|$)) | (?'name'\\[gk]'.*?('|$))"); // back reference } if (is_perl || is_POSIX_extended || is_POSIX_basic) { pb.AddGroup(null, $@"\[ \]? ({pb_class.ToPattern( )} | {pb_escape.ToPattern( )} | . )*? (\]|$)"); } pb.Add(pb_escape.ToPattern( )); return(pb.ToRegex( )); }
internal void AddSpecial(string input, string internal_) { pattern.Add(input, internal_); }
static Regex CreateHighlightingRegex(GrammarEnum grammar, bool modX) { bool is_perl = grammar == GrammarEnum.perl || grammar == GrammarEnum.ECMAScript || grammar == GrammarEnum.normal || grammar == GrammarEnum.JavaScript || grammar == GrammarEnum.JScript; bool is_POSIX_extended = grammar == GrammarEnum.extended || grammar == GrammarEnum.egrep || grammar == GrammarEnum.awk; bool is_POSIX_basic = grammar == GrammarEnum.basic || grammar == GrammarEnum.sed || grammar == GrammarEnum.grep || grammar == GrammarEnum.emacs; bool is_emacs = grammar == GrammarEnum.emacs; var pb = new PatternBuilder( ); if (is_perl) { pb.Add(@"(\(\?\#.*?(\)|$))"); // comment } if (is_perl && modX) { pb.Add(@"(\#[^\n]*)"); // line comment } if (is_perl || is_POSIX_extended) { pb.Add(@"\\Q.*?(\\E|$)"); // skip \Q...\E pb.Add(@"\\[xNpPgk]\{.*?(\}|$)"); // (skip) } if (is_perl || is_POSIX_extended) { pb.AddGroup("left_par", @"\("); // '(' pb.AddGroup("right_par", @"\)"); // ')' pb.Add(@"(?'left_brace'\{) \s* \d+ \s* (, \s* \d*)? \s* ((?'right_brace'\})|$)"); // '{...}' (spaces are allowed) } if (is_POSIX_basic) { pb.AddGroup("left_par", @"\\\("); // '\(' pb.AddGroup("right_par", @"\\\)"); // '\)' pb.Add(@"(?'left_brace'\\{).*?((?'right_brace'\\})|$)"); // '\{...\}' } if (is_perl || is_POSIX_extended || is_POSIX_basic) { pb.Add(@"((?'left_bracket'\[) \]? ((\[:.*? (:\]|$)) | \\. | .)*? ((?'right_bracket'\])|$) )"); // [...] pb.Add(@"\\."); // '\...' } return(pb.ToRegex( )); }