Example #1
0
        Regex CreateColouringRegex(WebView2RegexOptions options)
        {
            var pb_escape = new PatternBuilder( );

            pb_escape.BeginGroup("escape");
            pb_escape.Add(@"\\c[A-Za-z]");               // \cx control char
            pb_escape.Add(@"\\x[0-9a-fA-F]{1,2}");       // hexadecimal char
            pb_escape.Add(@"\\u[0-9a-fA-F]{1,4}");       // hexadecimal char

            if (options.u)
            {
                // language=regex
                pb_escape.Add(@"\\u\{[0-9a-fA-F]+(\}|$)");              // hexadecimal char
                // language=regex
                pb_escape.Add(@"\\(p|P)\{.*?(\}|$)");                   // unicode property
            }


            pb_escape.Add(@"\\.");               // \.
            pb_escape.EndGroup( );

            var pb = new PatternBuilder( );

            pb.AddGroup(null, $@"\[\]?({pb_escape.ToPattern( )} |.)*?(\]|$)");

            // language=regex
            pb.Add(@"\(\?(?'name'<(?![=!]).*?(>|$))");
            // language=regex
            pb.Add(@"(?'name'\\k<.*?(>|$))");

            pb.Add(pb_escape.ToPattern( ));

            return(pb.ToRegex( ));
        }
Example #2
0
        Regex CreateHighlightingRegex(IcuRegexOptions options)
        {
            var pb = new PatternBuilder( );

            pb.Add(@"\(\?\#.*?(\)|$)");               // comment

            if (options.UREGEX_COMMENTS)
            {
                pb.Add(@"\#.*?(\n|$)");                            // line-comment
            }
            pb.Add(@"\\Q.*?(\\E|$)");                              // quoted part

            pb.Add(@"(?'left_par'\()");                            // '('
            pb.Add(@"(?'right_par'\))");                           // ')'
            pb.Add(@"\\[NpPx]\{.*?(\}|$)");                        // (skip)
            pb.Add(@"(?'left_brace'\{).*?((?'right_brace'\})|$)"); // '{...}'

            string posix_bracket = @"(\[:.*?(:\]|$))";             // [:...:]

            pb.Add($@"
						(?'left_bracket'\[)
						\]?
						(?> {posix_bracket} | (?'left_bracket'\[)(?<c>) | (\\. | [^\[\]])+ | (?'right_bracket'\])(?<-c>))*
						(?(c)(?!))
						(?'right_bracket'\])?
						|
						(?'right_bracket'\])
						"                        );

            pb.Add(@"\\.");               // '\...'

            return(pb.ToRegex( ));
        }
Example #3
0
        static Regex CreateHighlightingRegex(GrammarEnum grammar)
        {
            var pb = new PatternBuilder( );

            if (grammar == GrammarEnum.extended ||
                grammar == GrammarEnum.ECMAScript ||
                grammar == GrammarEnum.egrep ||
                grammar == GrammarEnum.awk)
            {
                pb.Add(@"(?'left_par'\()");                            // '('
                pb.Add(@"(?'right_par'\))");                           // ')'
                pb.Add(@"(?'left_brace'\{).*?((?'right_brace'\})|$)"); // '{...}'
            }

            if (grammar == GrammarEnum.basic ||
                grammar == GrammarEnum.grep)
            {
                pb.Add(@"(?'left_par'\\\()");                            // '\)'
                pb.Add(@"(?'right_par'\\\))");                           // '\('
                pb.Add(@"(?'left_brace'\\{).*?((?'right_brace'\\})|$)"); // '\{...\}'
            }

            pb.Add(@"((?'left_bracket'\[) ((\[:.*? (:\]|$)) | \\. | .)*? ((?'right_bracket'\])|$) )"); // [...]
            pb.Add(@"\\.");                                                                            // '\...'

            return(pb.ToRegex( ));
        }
Example #4
0
            //public static implicit operator TokenFragment(PatternBuilder builder)
            //{
            //	return new TokenPattern(builder.CreateArray());
            //}

            public static PatternBuilder operator +(PatternBuilder op1, PatternBuilder op2)
            {
                var result = new PatternBuilder();

                for (var i = 0; i < op1._entryCount; i++)
                {
                    result.Add(op1[i]);
                }

                for (var j = 0; j < op2._entryCount; j++)
                {
                    result.Add(op2[j]);
                }

                return(result);
            }
Example #5
0
        Regex CreateColouringRegex(bool isVerbose)
        {
            var pb_escape = new PatternBuilder( );

            pb_escape.BeginGroup("escape");

            pb_escape.Add(@"\\x[0-9a-fA-F]{1,2}");     // hexa, two digits
            pb_escape.Add(@"\\0[0-7]+");               // octal, after '0'
            pb_escape.Add(@"\\[1-7][0-7]{2,}");        // octal, three digits
            pb_escape.Add(@"\\N\{.+?(\} | $)");        // Unicode name, ex.: \N{DIGIT ONE}
            pb_escape.Add(@"\\.");

            pb_escape.EndGroup( );

            //

            var pb = new PatternBuilder( );

            pb.BeginGroup("comment");

            pb.Add(@"\(\?\#.*?(\)|$)");               // comment
            if (isVerbose)
            {
                pb.Add(@"\#.*?(\n|$)");                           // line-comment*/
            }
            pb.EndGroup( );

            //

            pb.Add(@"\(\?P(?'name'<.*?(>|$))");
            pb.Add(@"\(\?P=(?'name'.*?(\)|$))");
            pb.Add(@"(?'name'\\[1-9][0-9]?(?![0-9]))");

            //

            pb.Add(pb_escape.ToPattern( ));

            //

            string char_group = @"( \[ \]? .*? (\]|$) )";

            pb.Add(char_group);

            //

            return(pb.ToRegex( ));
        }
Example #6
0
        Regex CreateHighlightingRegex(DRegexOptions options)
        {
            var pb = new PatternBuilder( );

            pb.Add(@"\(\?\#.*?(\)|$)");                                              // inline comment

            pb.Add(@"(?'left_par'\()");                                              // '('
            pb.Add(@"(?'right_par'\))");                                             // ')'

            pb.Add(@"\\[pP]\{.*?(\} | $)");                                          // property

            pb.Add(@"(?'left_brace'\{) (\d+(,\d*)? | ,\d+) ((?'right_brace'\})|$)"); // '{...}'

            string posix_bracket = "";                                               // Not supported: @"(\[:.*?(:\]|$))"; // [:...:]

            pb.Add($@"
						(?'left_bracket'\[)
						\]?
						(?> {posix_bracket}{( posix_bracket.Length == 0 ? "" : " |" )} (?'left_bracket'\[)(?<c>) | (\\. | [^\[\]])+ | (?'right_bracket'\])(?<-c>))*
						(?(c)(?!))
						(?'right_bracket'\])?
						|
						(?'right_bracket'\])
						"                        );

            pb.Add(@"\\.");               // '\...'

            return(pb.ToRegex( ));
        }
Example #7
0
        Regex CreateColouringRegex(DRegexOptions options)
        {
            var pb_escape = new PatternBuilder( );

            pb_escape.BeginGroup("escape");

            pb_escape.Add(@"\\c[A-Za-z]");               // Matches the control character corresponding to letter C
            pb_escape.Add(@"\\x[0-9a-fA-F]{0,2}");       // Matches a character with hexadecimal value of XX.
            pb_escape.Add(@"\\u[0-9a-fA-F]{0,4}");       // Matches a character with hexadecimal value of XXXX.
            pb_escape.Add(@"\\U[0-9a-fA-F]{0,8}");       // Matches a character with hexadecimal value of YYYYYY.

            pb_escape.Add(@"\\[pP]\{.*?(\}|$)");
            pb_escape.Add(@"\\[pP].?");

            pb_escape.Add(@"\\.");

            pb_escape.EndGroup( );

            var pb = new PatternBuilder( );

            pb.BeginGroup("comment");
            pb.Add(@"\(\?\#.*?(\)|$)");               // An inline comment that is ignored while matching.
            pb.EndGroup( );

            pb.Add(@"\(\?P(?'name'<.*?(>|$))");               // Matches named subexpression regex labeling it with name 'name'.

            {
                // (nested groups: https://stackoverflow.com/questions/546433/regular-expression-to-match-balanced-parentheses)

                string posix_bracket = "";                 // Not supported: @"(?'escape'\[:.*?(:\]|$))"; // [:...:], use escape colour

                pb.Add($@"
						\[ 
						\]?
						(?> {posix_bracket}{( posix_bracket.Length == 0 ? "" : " |" )} \[(?<c>) | ({pb_escape.ToPattern( )} | [^\[\]])+ | \](?<-c>))*
						(?(c)(?!))
						\]
						"                        );
            }

            pb.Add(pb_escape.ToPattern( ));

            return(pb.ToRegex( ));
        }
Example #8
0
        Regex CreateColouringRegex( )
        {
            bool is_literal = OptionsControl.IsOptionSelected("literal");

            if (is_literal)
            {
                return(PatternBuilder.AlwaysFailsRegex);
            }

            var pb_escape = new PatternBuilder( );

            pb_escape.BeginGroup("escape");

            pb_escape.Add(@"\\[pP][A-Za-z]");             // Unicode character class (one-letter name)
            pb_escape.Add(@"\\[pP]\{.*?(\}|$)");          // Unicode character class
            pb_escape.Add(@"\\0[0-7]{1,2}");              // octal, two digits after 0
            pb_escape.Add(@"\\[0-7]{1,3}");               // octal, three digits
            pb_escape.Add(@"\\x[0-9a-fA-F]{1,2}");        // hexa, two digits
            pb_escape.Add(@"\\x\{[0-9a-fA-F]*(\}|$)");    // hexa, error if empty
            pb_escape.Add(@"\\Q.*?(\\E|$)");              // quoted sequence, \Q...\E
            pb_escape.Add(@"\\.");

            pb_escape.EndGroup( );

            //

            var pb_class = new PatternBuilder( ).AddGroup("class", @"\[(?'c'[:]) .*? (\k<c>\] | $)");               // only [: :], no [= =], no [. .]

            //

            var pb = new PatternBuilder( );

            pb.Add(pb_escape.ToPattern( ));

            //

            pb.AddGroup(null, $@"\[ \]? ({pb_class.ToPattern( )} | {pb_escape.ToPattern( )} | . )*? (\]|$)");               // TODO: check 'escape' part

            //

            pb.Add(@"\(\?P(?'name'<.*?>)");

            return(pb.ToRegex( ));
        }
Example #9
0
        static Regex CreateHighlightingRegex( )
        {
            var pb = new PatternBuilder( );

            pb.Add(@"\\.");
            pb.AddGroup("left_par", @"(?'left_par'\()");               // '('
            pb.AddGroup("right_par", @"(?'right_par'\))");             // ')'

            return(pb.ToRegex( ));
        }
Example #10
0
        Regex CreateHighlightingRegex(bool isXorXX)
        {
            var pb = new PatternBuilder( );

            pb.Add(@"(\(\?\#.*?(\)|$))");               // comment
            if (isXorXX)
            {
                pb.Add(@"(\#[^\n]*)");                                                                     // line comment
            }
            pb.Add(@"\\Q.*?(\\E|$)");                                                                      // quoted sequence, \Q...\E
            pb.Add(@"\\[xNopPbBgk]\{.*?(\}|$)");                                                           // (skip)
            pb.Add(@"(?'left_par'\()");                                                                    // '('
            pb.Add(@"(?'right_par'\))");                                                                   // ')'
            pb.Add(@"(?'left_brace'\{) \s* \d+ \s* (,\s*\d*)? \s* ((?'right_brace'\})|$)");                // '{...}'
            pb.Add(@"((?'left_bracket'\[) \]? ((\[:.*? (:\]|$)) | \\. | .)*? ((?'right_bracket'\])|$) )"); // [...]
            pb.Add(@"\\.");                                                                                // '\...'

            return(pb.ToRegex( ));
        }
Example #11
0
        static Regex CreateColouringRegex(GrammarEnum grammar)
        {
            var pb_escape = new PatternBuilder( );

            pb_escape.BeginGroup("escape");

            if (grammar == GrammarEnum.ECMAScript)
            {
                pb_escape.Add(@"\\c[A-Za-z]");
            }
            if (grammar == GrammarEnum.ECMAScript)
            {
                pb_escape.Add(@"\\x[0-9A-Fa-f]{1,2}");                                                   // (two digits required)
            }
            if (grammar == GrammarEnum.awk)
            {
                pb_escape.Add(@"\\[0-7]{1,3}");                                            // octal code
            }
            if (grammar == GrammarEnum.ECMAScript)
            {
                pb_escape.Add(@"\\u[0-9A-Fa-f]{1,4}");                                                   // (four digits required)
            }
            if (grammar == GrammarEnum.basic ||
                grammar == GrammarEnum.grep)
            {
                pb_escape.Add(@"(?!\\\( | \\\) | \\\{ | \\\})\\.");
            }
            else
            {
                pb_escape.Add(@"\\.");
            }

            pb_escape.EndGroup( );

            //

            var pb_class = new PatternBuilder( ).AddGroup("class", @"\[(?'c'[:=.]) .*? (\k<c>\] | $)");

            //

            var pb = new PatternBuilder( );

            pb.Add(pb_escape.ToPattern( ));

            pb.AddGroup(null, $@"( \[ ({pb_class.ToPattern( )} | {pb_escape.ToPattern( )} | . )*? (\]|$) )");

            // (group names and comments are not supported by C++ Regex)

            return(pb.ToRegex( ));
        }
Example #12
0
        Regex CreateHighlightingRegex( )
        {
            bool is_literal = OptionsControl.IsCompileOptionSelected("PCRE2_LITERAL");

            if (is_literal)
            {
                return(PatternBuilder.AlwaysFailsRegex);
            }

            bool is_extended       = OptionsControl.IsCompileOptionSelected("PCRE2_EXTENDED");
            bool allow_empty_class = OptionsControl.IsCompileOptionSelected("PCRE2_ALLOW_EMPTY_CLASS");

            var pb = new PatternBuilder( );

            pb.Add(@"(\(\?\#.*?(\)|$))");               // comment
            if (is_extended)
            {
                pb.Add(@"(\#[^\n]*)");                                       // line comment
            }
            pb.Add(@"\\Q.*?(\\E|$)");                                        // quoted sequence, \Q...\E
            pb.Add(@"\\[oNxupP]\{.*?(\}|$)");                                // (skip)

            pb.Add(@"(?'left_par'\()");                                      // '('
            pb.Add(@"(?'right_par'\))");                                     // ')'
            pb.Add(@"(?'left_brace'\{) \d+ (,\d*)? ((?'right_brace'\})|$)"); // '{...}'
            if (allow_empty_class)
            {
                pb.Add(@"((?'left_bracket'\[)     ((\[:.*? (:\]|$)) | \\. | .)*? ((?'right_bracket'\])|$) )");                   // [...]
            }
            else
            {
                pb.Add(@"((?'left_bracket'\[) \]? ((\[:.*? (:\]|$)) | \\. | .)*? ((?'right_bracket'\])|$) )"); // [...]
            }
            pb.Add(@"\\.");                                                                                    // '\...'

            return(pb.ToRegex( ));
        }
Example #13
0
        Regex CreateHighlightingRegex(bool isVerbose)
        {
            var pb = new PatternBuilder( );

            pb.Add(@"(\(\?\#.*?(\)|$))");               // comment
            if (isVerbose)
            {
                pb.Add(@"(\#[^\n]*)");                                                 // line comment
            }
            pb.Add(@"\\[N]\{.*?(\}|$)");                                               // (skip)
            pb.Add(@"(?'left_par'\()");                                                // '('
            pb.Add(@"(?'right_par'\))");                                               // ')'
            pb.Add(@"(?'left_brace'\{) (\d+ | \d*,\d*) ((?'right_brace'\})|$)");       // '{...}'
            pb.Add(@"((?'left_bracket'\[) ]? (\\. | .)*? ((?'right_bracket'\])|$) )"); // [...]
            pb.Add(@"\\.");                                                            // '\...'

            return(pb.ToRegex( ));
        }
Example #14
0
        static Regex CreateHighlightingRegex(RegexOptions options)
        {
            var pb = new PatternBuilder( );

            pb.Add(@"\(\?\#.*?(\)|$)");               // comment
            if (options.HasFlag(RegexOptions.IgnorePatternWhitespace))
            {
                pb.Add(@"\#[^\n]*");                                               // line comment
            }
            pb.Add(@"\\[pP]\{.*?(\}|$)");                                          // (skip)
            pb.Add(@"(?'left_par'\()");                                            // '('
            pb.Add(@"(?'right_par'\))");                                           // ')'
            pb.Add(@"(?'left_brace'\{) \d+(,(\d+)?)? ((?'right_brace'\})|$)");     // '{...}'
            pb.Add(@"(?'left_bracket'\[) \]? (\\.|.)*? ((?'right_bracket'\])|$)"); // '[...]'
            pb.Add(@"\\.");                                                        // (skip)

            return(pb.ToRegex( ));
        }
Example #15
0
        Regex CreateHighlightingRegex(WebView2RegexOptions options)
        {
            var pb = new PatternBuilder( );

            pb.Add(@"(?'left_par'\()");                                            // '('
            pb.Add(@"(?'right_par'\))");                                           // ')'
            pb.Add(@"\\[pPu]\{.*?(\}|$)");                                         // (skip)
            pb.Add(@"(?'left_brace'\{).*?((?'right_brace'\})|$)");                 // '{...}'
            pb.Add(@"(?'left_bracket'\[) \]? (\\.|.)*? ((?'right_bracket'\])|$)"); // '[...]'
            pb.Add(@"\\.");                                                        // (skip)

            return(pb.ToRegex( ));
        }
Example #16
0
        Regex CreateHighlightingRegex(RustRegexOptions options)
        {
            bool is_regex_builder = options.@struct == "RegexBuilder";

            var pb = new PatternBuilder( );

            if (is_regex_builder && options.ignore_whitespace)
            {
                pb.Add(@"\#.*?(\n|$)");               // line-comment
            }
            pb.Add(@"(?'left_par'\()");               // '('
            pb.Add(@"(?'right_par'\))");              // ')'

            pb.Add(@"\\[xuU]\{.*?(\}|$)");            // \x{7HHHHHHH ...} etc.

            if (is_regex_builder && options.unicode)
            {
                pb.Add(@"\\[pP]\{.*?(\} | $)");                                      // property
            }
            pb.Add(@"(?'left_brace'\{) (\d+(,\d*)? | ,\d+) ((?'right_brace'\})|$)"); // '{...}'

            string posix_bracket = @"(\[:.*?(:\]|$))";                               // [:...:]

            pb.Add($@"
						(?'left_bracket'\[)
						\]?
						(?> {posix_bracket}{( posix_bracket.Length == 0 ? "" : " |" )} (?'left_bracket'\[)(?<c>) | (\\. | [^\[\]])+ | (?'right_bracket'\])(?<-c>))*
						(?(c)(?!))
						(?'right_bracket'\])?
						|
						(?'right_bracket'\])
						"                        );

            pb.Add(@"\\.");               // '\...'

            return(pb.ToRegex( ));
        }
Example #17
0
        Regex CreateColouringRegex(IcuRegexOptions options)
        {
            var pb_escape = new PatternBuilder( );

            pb_escape.BeginGroup("escape");
            pb_escape.Add(@"\\c[A-Za-z]");             // \cx control char
            pb_escape.Add(@"\\[NpP]\{.*?(\} | $)");    // named character, property
            pb_escape.Add(@"\\[uUx][0-9a-fA-F]+");     // hexadecimal char
            pb_escape.Add(@"\\x\{[0-9a-fA-F]+(\}|$)"); // hexadecimal char
            pb_escape.Add(@"\\0[0-7]+");               // octal
            pb_escape.Add(@"\\Q.*?(\\E|$)");           // quoted part
            pb_escape.Add(@"\\.");                     // \.
            pb_escape.EndGroup( );

            var pb = new PatternBuilder( );

            pb.BeginGroup("comment");
            pb.Add(@"\(\?\#.*?(\)|$)");               // comment
            if (options.UREGEX_COMMENTS)
            {
                pb.Add(@"\#.*?(\n|$)");                                         // line-comment
            }
            pb.EndGroup( );

            pb.Add(@"\(\?(?'name'<(?![=!]).*?(>|$))");
            pb.Add(@"(?'name'\\k<.*?(>|$))");

            string posix_bracket = @"(?'escape'\[:.*?(:\]|$))";             // [:...:], use escape colour

            pb.Add($@"
						\[
						\]?
						(?> {posix_bracket} | \[(?<c>) | ({pb_escape.ToPattern( )} | [^\[\]])+ | \](?<-c>))*
						(?(c)(?!))
						\]
						"                        );

            pb.Add(pb_escape.ToPattern( ));

            return(pb.ToRegex( ));
        }
Example #18
0
        Regex CreateHighlightingRegex( )
        {
            bool is_literal = OptionsControl.IsOptionSelected("literal");

            if (is_literal)
            {
                return(PatternBuilder.AlwaysFailsRegex);
            }

            var pb = new PatternBuilder( );

            pb.Add(@"\\Q.*?(\\E|$)");                                        // quoted sequence, \Q...\E
            pb.Add(@"\\[pPx]\{.*?(\}|$)");                                   // (skip)
            pb.Add(@"(?'left_par'\()");                                      // '('
            pb.Add(@"(?'right_par'\))");                                     // ')'
            pb.Add(@"(?'left_brace'\{) \d+ (,\d*)* ((?'right_brace'\})|$)"); // '{...}'
            pb.Add(@"((?'left_bracket'\[) \]? ((\[:.*? (:\]|$)) | \\. | .)*? ((?'right_bracket'\])|$) )");
            pb.Add(@"\\.");                                                  // '\...'

            return(pb.ToRegex( ));
        }
Example #19
0
        static Regex CreateCachedColouringRegex(RegexOptions options)
        {
            // (some patterns includes incomplete constructs)

            var pb = new PatternBuilder( );

            pb.BeginGroup("comment");
            pb.Add(@"\(\?\#.*?(\)|$)");
            if (options.HasFlag(RegexOptions.IgnorePatternWhitespace))
            {
                pb.Add(@"\#[^\n]*");
            }
            pb.EndGroup( );

            var escapes_pb = new PatternBuilder( );

            escapes_pb.BeginGroup("escape");
            escapes_pb.Add(@"\\[0-7]{2,3}");
            escapes_pb.Add(@"\\x[0-9A-Fa-f]{1,2}");
            escapes_pb.Add(@"\\c[A-Za-z]");
            escapes_pb.Add(@"\\u[0-9A-Fa-f]{1,4}");
            escapes_pb.Add(@"\\(p|P)\{.*?(\}|$)");
            escapes_pb.Add(@"\\k<([A-Za-z]+>)?");
            escapes_pb.Add(@"\\.");
            escapes_pb.EndGroup( );

            pb.AddGroup(null, $@"\[\]?({escapes_pb.ToPattern( )} |.)*?(\]|$)");

            pb.Add(@"\(\?(?'name'<(?![=!]).*?(>|$))");               // (balancing groups covered too)
            pb.Add(@"\(\?(?'name''.*?('|$))");
            pb.Add(@"(?'name'\\k<.*?(>|$))");
            pb.Add(@"(?'name'\\k'.*?('|$))");

            pb.Add(escapes_pb.ToPattern( ));

            var regex = pb.ToRegex( );

            return(regex);
        }
Example #20
0
        Regex CreateColouringRegex(RustRegexOptions options)
        {
            bool is_regex         = options.@struct == "Regex";
            bool is_regex_builder = options.@struct == "RegexBuilder";

            var pb_escape = new PatternBuilder( );

            pb_escape.BeginGroup("escape");

            if (is_regex || (is_regex_builder && options.unicode))
            {
                pb_escape.Add(@"\\[pP]\{.*?(\}|$)");          // Unicode character class (general category or script)
                pb_escape.Add(@"\\[pP].?");                   // One-letter name Unicode character class
            }

            if (is_regex_builder && options.octal)
            {
                pb_escape.Add(@"\\[0-7]{1,3}");                   // octal character code (up to three digits) (when enabled)
            }

            pb_escape.Add(@"\\x\{[0-9a-fA-F]*(\}|$)?");          // any hex character code corresponding to a Unicode code point
            pb_escape.Add(@"\\x[0-9a-fA-F]{0,2}");               // hex character code (exactly two digits)

            // (only 2 digits if no 'options.unicode'
            pb_escape.Add(@"\\u\{[0-9a-fA-F]*(\}|$)?");          // any hex character code corresponding to a Unicode code point
            pb_escape.Add(@"\\u[0-9a-fA-F]{0,4}");               // hex character code (exactly four digits)
            pb_escape.Add(@"\\U\{[0-9a-fA-F]*(\}|$)?");          // any hex character code corresponding to a Unicode code point
            pb_escape.Add(@"\\U[0-9a-fA-F]{0,8}");               // hex character code (exactly eight digits)

            string any_esc = "";

            if (!(is_regex || (is_regex_builder && options.unicode)))
            {
                any_esc += @"(?!\\[pP])";
            }
            if (!(is_regex_builder && options.octal))
            {
                any_esc += @"(?!\\[0-7])";
            }

            any_esc += @"\\.";

            pb_escape.Add(any_esc);

            pb_escape.EndGroup( );


            var pb = new PatternBuilder( );

            pb.BeginGroup("comment");

            if (is_regex_builder && options.ignore_whitespace)
            {
                pb.Add(@"\#.*?(\n|$)");                   // line-comment
            }

            pb.EndGroup( );

            pb.Add(@"\(\?P(?'name'<.*?(>|$))");

            {
                // (nested groups: https://stackoverflow.com/questions/546433/regular-expression-to-match-balanced-parentheses)

                string posix_bracket = @"(?'escape'\[:.*?(:\]|$))";                 // [:...:], use escape colour

                pb.Add($@"
						\[ 
						\]?
						(?> {posix_bracket}{( posix_bracket.Length == 0 ? "" : " |" )} \[(?<c>) | ({pb_escape.ToPattern( )} | [^\[\]])+ | \](?<-c>))*
						(?(c)(?!))
						\]
						"                        );
            }

            pb.Add(pb_escape.ToPattern( ));

            return(pb.ToRegex( ));
        }
Example #21
0
        Regex CreateColouringRegex( )
        {
            bool is_literal = OptionsControl.IsCompileOptionSelected("PCRE2_LITERAL");

            if (is_literal)
            {
                return(PatternBuilder.AlwaysFailsRegex);
            }

            bool is_extended       = OptionsControl.IsCompileOptionSelected("PCRE2_EXTENDED");
            bool allow_empty_class = OptionsControl.IsCompileOptionSelected("PCRE2_ALLOW_EMPTY_CLASS");

            var pb_escape = new PatternBuilder( );

            pb_escape.BeginGroup("escape");

            pb_escape.Add(@"\\c[A-Za-z]");                  // ASCII escape
            pb_escape.Add(@"\\0[0-7]{1,2}");                // octal, two digits after 0
            pb_escape.Add(@"\\[0-7]{1,3}");                 // octal, three digits
            pb_escape.Add(@"\\o\{[0-9]+(\} | $)");          // octal; bad values give error
            pb_escape.Add(@"\\N\{U\+[0-9a-fA-F]+(\} | $)"); // hexa, error if no 'PCRE2_UTF'
            pb_escape.Add(@"\\x[0-9a-fA-F]{1,2}");          // hexa, two digits
            pb_escape.Add(@"\\x\{[0-9a-fA-F]*(\} | $)");    // hexa, error if empty
            pb_escape.Add(@"\\u[0-9a-fA-F]{1,4}");          // hexa, four digits, error if no 'PCRE2_ALT_BSUX', 'PCRE2_EXTRA_ALT_BSUX'
            pb_escape.Add(@"\\u\{[0-9a-fA-F]*(\} | $)");    // hexa, error if empty or no 'PCRE2_ALT_BSUX', 'PCRE2_EXTRA_ALT_BSUX'
            pb_escape.Add(@"\\[pP]\{.*?(\} | $)");          // property
            pb_escape.Add(@"\\Q.*?(\\E|$)");                // quoted sequence, \Q...\E

            // backreferences
            pb_escape.Add(@"\\[0-9]+");               // unbiguous
            // see also named groups

            pb_escape.Add(@"\\.");

            pb_escape.EndGroup( );

            //

            var pb_class = new PatternBuilder( ).AddGroup("class", @"\[(?'c'[:=.]) .*? (\k<c>\] | $)");

            //

            var pb = new PatternBuilder( );

            pb.BeginGroup("comment");

            pb.Add(@"\(\?\#.*?(\)|$)");               // comment
            if (is_extended)
            {
                pb.Add(@"\#.*?(\n|$)");                             // line-comment
            }
            pb.EndGroup( );

            //

            pb.Add(@"\(\?(?'name'<(?![=!]).*?(>|$))");
            pb.Add(@"\(\?(?'name''.*?('|$))");
            pb.Add(@"\(\?P(?'name'<.*?(>|$))");
            pb.Add(@"(?'name'\\g[+]?[0-9]+)");
            pb.Add(@"(?'name'\\g\{[+]?[0-9]*(\} | $))");
            pb.Add(@"(?'name'\\[gk]<.*?(>|$))");
            pb.Add(@"(?'name'\\[gk]'.*?('|$))");
            pb.Add(@"(?'name'\\[gk]\{.*?(\}|$))");
            pb.Add(@"(?'name'\(\?P=.*?(\)|$))");               //


            //

            pb.Add(pb_escape.ToPattern( ));

            //

            string char_group;

            if (allow_empty_class)
            {
                char_group = $@"\[     ({pb_class.ToPattern( )} | {pb_escape.ToPattern( )} | . )*? (\]|$)";
            }
            else
            {
                char_group = $@"\[ \]? ({pb_class.ToPattern( )} | {pb_escape.ToPattern( )} | . )*? (\]|$)";
            }

            pb.Add(char_group);


            // TODO: add support for '(*...)' constructs

            return(pb.ToRegex( ));
        }
Example #22
0
        Regex CreateHighlightingRegex(OnigurumaRegexInterop.OnigurumaHelper helper)
        {
            if (helper.IsONIG_SYNTAX_ASIS)
            {
                return(PatternBuilder.AlwaysFailsRegex);
            }

            var pb = new PatternBuilder( );

            if (helper.IsONIG_SYN_OP2_QMARK_GROUP_EFFECT)
            {
                pb.Add(@"\(\?\#.*?(\)|$)");                                                          // comment
            }
            if (helper.IsONIG_OPTION_EXTEND)
            {
                pb.Add(@"\#.*?(\n|$)");                                             // line-comment
            }
            if (helper.IsONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)
            {
                pb.Add(@"\\Q.*?(\\E|$)");                                                           // quoted part
            }
            if (helper.IsONIG_SYN_OP_LPAREN_SUBEXP)
            {
                pb.Add(@"(?'left_par'\()");                   // '('
                pb.Add(@"(?'right_par'\))");                  // ')'
            }

            if (helper.IsONIG_SYN_OP_ESC_LPAREN_SUBEXP)
            {
                pb.Add(@"(?'left_par'\\\()");                   // '\('
                pb.Add(@"(?'right_par'\\\))");                  // '\)'
            }

            if (helper.IsONIG_SYN_OP_ESC_O_BRACE_OCTAL)
            {
                pb.Add(@"\\o\{.*?(\}|$)");                                                        // \o{17777777777 ...} wide octal chars
            }
            if (helper.IsONIG_SYN_OP_ESC_X_BRACE_HEX8)
            {
                pb.Add(@"\\x\{.*?(\}|$)");                                                       // \x{7HHHHHHH ...} wide hexadecimal chars
            }
            if (helper.IsONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY || helper.IsONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)
            {
                pb.Add(@"\\[pP]\{.*?(\} | $)");                   // property
            }

            if (helper.IsONIG_SYN_OP_BRACE_INTERVAL)
            {
                pb.Add(@"(?'left_brace'\{) (\d+(,\d*)? | ,\d+) ((?'right_brace'\})|$)");                                                     // '{...}'
            }
            if (helper.IsONIG_SYN_OP_ESC_BRACE_INTERVAL)
            {
                pb.Add(@"(?'left_brace'\\{).*?((?'right_brace'\\})|$)");                                                         // '\{...\}'
            }
            string posix_bracket = "";

            if (helper.IsONIG_SYN_OP_POSIX_BRACKET)
            {
                posix_bracket = @"(\[:.*?(:\]|$))";                                                  // [:...:]
            }
            if (helper.IsONIG_SYN_OP_BRACKET_CC)
            {
                pb.Add($@"
						(?'left_bracket'\[)
						\]?
						(?> {posix_bracket}{( posix_bracket.Length == 0 ? "" : " |" )} (?'left_bracket'\[)(?<c>) | (\\. | [^\[\]])+ | (?'right_bracket'\])(?<-c>))*
						(?(c)(?!))
						(?'right_bracket'\])?
						|
						(?'right_bracket'\])
						"                        );
            }

            pb.Add(@"\\.");               // '\...'

            return(pb.ToRegex( ));
        }
Example #23
0
        Regex CreateColouringRegex(OnigurumaRegexInterop.OnigurumaHelper helper)
        {
            if (helper.IsONIG_SYNTAX_ASIS)
            {
                return(PatternBuilder.AlwaysFailsRegex);
            }

            var pb_escape = new PatternBuilder( );

            pb_escape.BeginGroup("escape");

            pb_escape.Add(@"\\0[0-7]{1,2}");              // octal, two digits after 0
            pb_escape.Add(@"\\[0-7]{1,3}");               // octal, three digits

            if (helper.IsONIG_SYN_OP_ESC_O_BRACE_OCTAL)
            {
                pb_escape.Add(@"\\o\{[0-7]+ (\s+ [0-7]+)* (\}|$)"); // \o{17777777777 ...} wide octal chars
            }
            pb_escape.Add(@"\\u[0-9a-fA-F]+");                      // \uHHHH wide hexadecimal char
            if (helper.IsONIG_SYN_OP_ESC_X_HEX2)
            {
                pb_escape.Add(@"\\x[0-9a-fA-F]+");                                                 // \xHH hexadecimal char
            }
            if (helper.IsONIG_SYN_OP_ESC_X_BRACE_HEX8)
            {
                pb_escape.Add(@"\\x\{[0-9a-fA-F]+ (\s+ [0-9a-fA-F]+)* (\}|$)");                                                       // \x{7HHHHHHH ...} wide hexadecimal chars
            }
            if (helper.IsONIG_SYN_OP_ESC_C_CONTROL)
            {
                pb_escape.Add(@"\\c[A-Za-z]");                   // \cx control char
                pb_escape.Add(@"\\C-([A-Za-z])?");               // \C-x control char
            }

            pb_escape.Add(@"\\M-([A-Za-z])?");               // \M-x meta  (x|0x80)
            pb_escape.Add(@"\\M-(\\C-([A-Za-z])?)?");        // \M-x meta control char
            pb_escape.Add(@"\\[pP]\{.*?(\} | $)");           // property

            /*
             * Probably not useful
             *
             * if( helper.IsONIG_SYN_OP_ESC_ASTERISK_ZERO_INF )
             * {
             *      pb_escape.Add( @"(?!\\\*)");
             * }
             *
             * if( helper.IsONIG_SYN_OP_ESC_PLUS_ONE_INF )
             * {
             *      pb_escape.Add( @"(?!\\\+)");
             * }
             *
             * if( helper.IsONIG_SYN_OP_ESC_QMARK_ZERO_ONE )
             * {
             *      pb_escape.Add( @"(?!\\\?)");
             * }
             *
             * if( helper.IsONIG_SYN_OP_ESC_BRACE_INTERVAL )
             * {
             *      pb_escape.Add( @"(?!\\[{}])");
             * }
             */

            pb_escape.Add(@"\\.");

            if (helper.IsONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)
            {
                pb_escape.Add(@"\\Q.*?(\\E|$)");                   // quoted part; use 'escape' name to take its colour
            }

            pb_escape.EndGroup( );

            var pb = new PatternBuilder( );

            pb.BeginGroup("comment");

            if (helper.IsONIG_SYN_OP2_QMARK_GROUP_EFFECT)
            {
                pb.Add(@"\(\?\#.*?(\)|$)");                                                          // comment
            }
            if (helper.IsONIG_OPTION_EXTEND)
            {
                pb.Add(@"\#.*?(\n|$)");                                             // line-comment
            }
            pb.EndGroup( );

            if (helper.IsONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)
            {
                pb.Add(@"\(\?(?'name'<(?![=!]).*?(>|$))");
                pb.Add(@"\(\?(?'name''.*?('|$))");
            }
            if (helper.IsONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)
            {
                pb.Add(@"\(\?@(?'name'<.*?(>|$))");
                pb.Add(@"\(\?@(?'name''.*?('|$))");
            }
            if (helper.IsONIG_SYN_OP2_ESC_K_NAMED_BACKREF)
            {
                pb.Add(@"(?'name'\\k<.*?(>|$))");
                pb.Add(@"(?'name'\\k'.*?('|$))");;
            }
            if (helper.IsONIG_SYN_OP2_ESC_G_SUBEXP_CALL)
            {
                pb.Add(@"(?'name'\\g<.*?(>|$))");
                pb.Add(@"(?'name'\\g'.*?('|$))");
            }

            // (nested groups: https://stackoverflow.com/questions/546433/regular-expression-to-match-balanced-parentheses)

            string posix_bracket = "";

            if (helper.IsONIG_SYN_OP_POSIX_BRACKET)
            {
                posix_bracket = @"(?'escape'\[:.*?(:\]|$))";                                                  // [:...:], use escape colour
            }
            pb.Add($@"
						\[ 
						\]?
						(?> {posix_bracket}{( posix_bracket.Length == 0 ? "" : " |" )} \[(?<c>) | ({pb_escape.ToPattern( )} | [^\[\]])+ | \](?<-c>))*
						(?(c)(?!))
						\]
						"                        );

            if (helper.IsONIG_SYN_OP_ESC_LPAREN_SUBEXP)
            {
                pb.Add(@"\\\( | \\\)");                   // (skip)
            }

            if (helper.IsONIG_SYN_OP_ESC_BRACE_INTERVAL)
            {
                pb.Add(@"\\\{ | \\\}");                   // (skip)
            }

            pb.Add(pb_escape.ToPattern( ));

            return(pb.ToRegex( ));
        }
Example #24
0
        Regex CreateColouringRegex(bool isXorXx)
        {
            var pb_escape = new PatternBuilder( );

            pb_escape.BeginGroup("escape");

            pb_escape.Add(@"\\c[A-Za-z]");               // control char
            pb_escape.Add(@"\\x[0-9a-fA-F]{1,2}");       // hexa, two digits
            pb_escape.Add(@"\\x\{[0-9a-fA-F]*(\} | $)"); // hexa, error if empty
            pb_escape.Add(@"\\N\{.*?(\} | $)");          // Unicode name or hexa
            pb_escape.Add(@"\\0[0-7]{1,2}");             // octal, two digits after 0
            pb_escape.Add(@"\\[0-7]{1,3}");              // octal, three digits
            pb_escape.Add(@"\\o\{[0-9]+(\} | $)");       // octal
            pb_escape.Add(@"\\[pP]([a-zA-Z] | $)");      // property
            pb_escape.Add(@"\\[pP]\{.*?(\} | $)");       // property
            pb_escape.Add(@"\\Q.*?(\\E|$)");             // quoted sequence, \Q...\E
            pb_escape.Add(@"\\[bB]\{.*?(\} | $)");       // Unicode boundary
            pb_escape.Add(@"\\.");

            pb_escape.EndGroup( );

            //

            var pb_class = new PatternBuilder( ).AddGroup("class", @"\[(?'c'[:]) .*? (\k<c>\] | $)");                // [: ... :]

            //

            var pb = new PatternBuilder( );

            pb.BeginGroup("comment");

            pb.Add(@"\(\?\#.*?(\)|$)");               // comment
            if (isXorXx)
            {
                pb.Add(@"\#.*?(\n|$)");                         // line-comment*/
            }
            pb.EndGroup( );

            //

            pb.Add(@"\(\?(?'name'<(?![=!]).*?(>|$))");
            pb.Add(@"\(\?(?'name''.*?('|$))");

            pb.Add(@"\(\?P(?'name'<.*?(>|$))");
            pb.Add(@"\(\?P(?'name'[=>].*?(\)|$))");

            pb.Add(@"(?'name'\\g[0-9]+)");
            pb.Add(@"(?'name'\\[gk]\{.*?(\}|$))");
            pb.Add(@"(?'name'\\[gk]<.*?(>|$))");
            pb.Add(@"(?'name'\\k'.*?('|$))");

            //

            pb.Add(pb_escape.ToPattern( ));

            //

            string char_group = $@"( \[ \]? ({pb_class.ToPattern( )} | {pb_escape.ToPattern( )} | . )*? (\]|$) )";

            pb.Add(char_group);

            //

            return(pb.ToRegex( ));
        }
Example #25
0
        static Regex CreateColouringRegex(GrammarEnum grammar, bool modX)
        {
            bool is_perl =
                grammar == GrammarEnum.perl ||
                grammar == GrammarEnum.ECMAScript ||
                grammar == GrammarEnum.normal ||
                grammar == GrammarEnum.JavaScript ||
                grammar == GrammarEnum.JScript;

            bool is_POSIX_extended =
                grammar == GrammarEnum.extended ||
                grammar == GrammarEnum.egrep ||
                grammar == GrammarEnum.awk;

            bool is_POSIX_basic =
                grammar == GrammarEnum.basic ||
                grammar == GrammarEnum.sed ||
                grammar == GrammarEnum.grep ||
                grammar == GrammarEnum.emacs;

            bool is_emacs =
                grammar == GrammarEnum.emacs;


            var pb_escape = new PatternBuilder( );

            pb_escape.BeginGroup("escape");

            if (is_perl || is_POSIX_extended || is_POSIX_basic)
            {
                pb_escape.Add(@"\\[1-9]");                                                                // back reference
            }
            if (is_perl || is_POSIX_extended)
            {
                pb_escape.Add(@"\\c[A-Za-z]");                                              // ASCII escape
            }
            if (is_perl || is_POSIX_extended)
            {
                pb_escape.Add(@"\\x[0-9A-Fa-f]{1,2}");                                              // hex, two digits
            }
            if (is_perl || is_POSIX_extended)
            {
                pb_escape.Add(@"\\x\{[0-9A-Fa-f]+(\}|$)");                                              // hex, four digits
            }
            if (is_perl || is_POSIX_extended)
            {
                pb_escape.Add(@"\\0[0-7]{1,3}");                                              // octal, three digits
            }
            if (is_perl || is_POSIX_extended)
            {
                pb_escape.Add(@"\\N\{.*?(\}|$)");                                              // symbolic name
            }
            if (is_perl || is_POSIX_extended)
            {
                pb_escape.Add(@"\\[pP]\{.*?(\}|$)");                                              // property
            }
            if (is_perl || is_POSIX_extended)
            {
                pb_escape.Add(@"\\[pP].");                                              // property, short name
            }
            if (is_perl || is_POSIX_extended)
            {
                pb_escape.Add(@"\\Q.*?(\\E|$)");
            }
            ;                                                                                   // quoted sequence
            if (is_emacs)
            {
                pb_escape.Add(@"\\[sS].");                          // syntax group
            }
            if (is_perl || is_POSIX_extended)
            {
                pb_escape.Add(@"\\.");                                              // various
            }
            if (is_POSIX_basic)
            {
                pb_escape.Add(@"(?!\\\( | \\\) | \\\{ | \\\})\\.");                                // various
            }
            pb_escape.EndGroup( );

            var pb_class = new PatternBuilder( );

            pb_class.BeginGroup("class");

            if (is_perl || is_POSIX_extended || is_POSIX_basic)
            {
                pb_class.Add(@"\[(?'c'[:=.]) .*? (\k<c>\] | $)");
            }

            pb_class.EndGroup( );


            var pb = new PatternBuilder( );

            pb.BeginGroup("comment");
            if (is_perl)
            {
                pb.Add(@"\(\?\#.*?(\)|$)");                         // comment
            }
            if (is_perl && modX)
            {
                pb.Add(@"\#.*?(\n|$)");                                 // line-comment*/
            }
            pb.EndGroup( );

            if (is_perl)
            {
                pb.Add(@"\(\?(?'name'<(?![=!]).*?(>|$)) | \(\?(?'name''.*?('|$))");
            }
            if (is_perl)
            {
                pb.Add(@"(?'name'\\g-?[1-9]) | (?'name'\\g\{.*?(\}|$))");                         // back reference
            }
            if (is_perl)
            {
                pb.Add(@"(?'name'\\[gk]<.*?(>|$)) | (?'name'\\[gk]'.*?('|$))");                         // back reference
            }
            if (is_perl || is_POSIX_extended || is_POSIX_basic)
            {
                pb.AddGroup(null, $@"\[ \]? ({pb_class.ToPattern( )} | {pb_escape.ToPattern( )} | . )*? (\]|$)");
            }

            pb.Add(pb_escape.ToPattern( ));

            return(pb.ToRegex( ));
        }
Example #26
0
 internal void AddSpecial(string input, string internal_)
 {
     pattern.Add(input, internal_);
 }
Example #27
0
        static Regex CreateHighlightingRegex(GrammarEnum grammar, bool modX)
        {
            bool is_perl =
                grammar == GrammarEnum.perl ||
                grammar == GrammarEnum.ECMAScript ||
                grammar == GrammarEnum.normal ||
                grammar == GrammarEnum.JavaScript ||
                grammar == GrammarEnum.JScript;

            bool is_POSIX_extended =
                grammar == GrammarEnum.extended ||
                grammar == GrammarEnum.egrep ||
                grammar == GrammarEnum.awk;

            bool is_POSIX_basic =
                grammar == GrammarEnum.basic ||
                grammar == GrammarEnum.sed ||
                grammar == GrammarEnum.grep ||
                grammar == GrammarEnum.emacs;

            bool is_emacs =
                grammar == GrammarEnum.emacs;


            var pb = new PatternBuilder( );

            if (is_perl)
            {
                pb.Add(@"(\(\?\#.*?(\)|$))");                         // comment
            }
            if (is_perl && modX)
            {
                pb.Add(@"(\#[^\n]*)");                                 // line comment
            }
            if (is_perl || is_POSIX_extended)
            {
                pb.Add(@"\\Q.*?(\\E|$)");                   // skip \Q...\E
                pb.Add(@"\\[xNpPgk]\{.*?(\}|$)");           // (skip)
            }

            if (is_perl || is_POSIX_extended)
            {
                pb.AddGroup("left_par", @"\(");                                                   // '('
                pb.AddGroup("right_par", @"\)");                                                  // ')'
                pb.Add(@"(?'left_brace'\{) \s* \d+ \s* (, \s* \d*)? \s* ((?'right_brace'\})|$)"); // '{...}' (spaces are allowed)
            }

            if (is_POSIX_basic)
            {
                pb.AddGroup("left_par", @"\\\(");                        // '\('
                pb.AddGroup("right_par", @"\\\)");                       // '\)'
                pb.Add(@"(?'left_brace'\\{).*?((?'right_brace'\\})|$)"); // '\{...\}'
            }

            if (is_perl || is_POSIX_extended || is_POSIX_basic)
            {
                pb.Add(@"((?'left_bracket'\[) \]? ((\[:.*? (:\]|$)) | \\. | .)*? ((?'right_bracket'\])|$) )"); // [...]
                pb.Add(@"\\.");                                                                                // '\...'
            }

            return(pb.ToRegex( ));
        }