Example #1
0
        /// <summary>
        /// Gets the state transition rules for the lexer. Each time a regex is matched,
        /// the internal state machine can be bumped to a new state which determines what
        /// regexes become valid again
        /// </summary>
        /// <returns></returns>
        protected override IDictionary <string, StateRule[]> GetStateRules()
        {
            var rules   = new Dictionary <string, StateRule[]>();
            var builder = new StateRuleBuilder();

            rules["root"] = builder.NewRuleSet()
                            .Add(@"[^<&]+", TokenTypes.Text)
                            .Add(@"&\S*?;", TokenTypes.Name.Entity)
                            .Add(@"\<\!\[CDATA\[.*?\]\]\>", TokenTypes.Comment.Preproc)
                            .Add(@"<!--", TokenTypes.Comment, "comment")
                            .Add(@"<\?.*?\?>", TokenTypes.Comment.Preproc)
                            .Add(@"<![^>]*>", TokenTypes.Comment.Preproc)
                            .Add(@"<\s*[\w:.-]+", TokenTypes.Name.Tag, "tag")
                            .Add(@"<\s*/\s*[\w:.-]+\s*>'", TokenTypes.Name.Tag)
                            .Build();

            rules["comment"] = builder.NewRuleSet()
                               .Add(@"[^-]+", TokenTypes.Text)
                               .Add(@"-->", TokenTypes.Comment, "#pop")
                               .Add(@"-", TokenTypes.Comment)
                               .Build();

            rules["tag"] = builder.NewRuleSet()
                           .Add(@"\s+", TokenTypes.Text)
                           .Add(@"[\w.:-]+\s*=", TokenTypes.Name.Attribute, "attr")
                           .Add(@"/?\s*>", TokenTypes.Name.Tag, "#pop")
                           .Build();

            rules["attr"] = builder.NewRuleSet()
                            .Add(@"\s+", TokenTypes.Text)
                            .Add(@""".*?""", TokenTypes.String, "#pop")
                            .Add(@".*?'", TokenTypes.String, "#pop")
                            .Add(@"[^\s>]+", TokenTypes.String, "#pop")
                            .Build();

            return(rules);
        }
Example #2
0
        /// <summary>
        /// Gets the state transition rules for the lexer. Each time a regex is matched,
        /// the internal state machine can be bumped to a new state which determines what
        /// regexes become valid again
        /// </summary>
        /// <returns></returns>
        protected override IDictionary <string, StateRule[]> GetStateRules()
        {
            var rules    = new Dictionary <string, StateRule[]>();
            var cs_ident = CSharpLexerLevel.Full;
            var builder  = new StateRuleBuilder();

            rules["root"] = builder.NewRuleSet()
                            .ByGroups(@"^([ \t]*(?:" + cs_ident + @"(?:\[\])?\s+)+?)" + // return type
                                      @"(" + cs_ident + @")" +                          // method name
                                      @"(\s*)(\()",                                     // signature start
                                      new LexerGroupProcessor(this),
                                      new TokenGroupProcessor(TokenTypes.Name.Function),
                                      new TokenGroupProcessor(TokenTypes.Text),
                                      new TokenGroupProcessor(TokenTypes.Punctuation))

                            .Add(@"^\s*\[.*?\]", TokenTypes.Name.Attribute)
                            .Add(@"[^\S\n]+", TokenTypes.Text)
                            .Add(@"\\\n", TokenTypes.Text) //line continuation
                            .Add(@"//.*?\n", TokenTypes.Comment.Single)
                            .Add(@"/[*].*?[*]/", TokenTypes.Comment.Multiline)
                            .Add(@"\n", TokenTypes.Text)
                            .Add(@"[~!%^&*()+=|\[\]:;,.<>/?-]", TokenTypes.Punctuation)
                            .Add(@"[{}]", TokenTypes.Punctuation)
                            .Add(@"@""(""""|[^""])*""", TokenTypes.String)
                            .Add(@"""(\\\\|\\""|[^""\n])*[""\n]", TokenTypes.String)
                            .Add(@"'\\.'|'[^\\]'", TokenTypes.String.Char)
                            .Add(@"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?" +
                                 @"[flFLdD]?|0[xX][0-9a-fA-F]+[Ll]?", TokenTypes.Number)
                            .Add(@"#[ \t]*(if|endif|else|elif|define|undef|" +
                                 @"line|error|warning|region|endregion|pragma)\b.*?\n", TokenTypes.Comment.Preproc)
                            .ByGroups(@"'\b(extern)(\s+)(alias)\b",
                                      new TokenGroupProcessor(TokenTypes.Keyword),
                                      new TokenGroupProcessor(TokenTypes.Text),
                                      new TokenGroupProcessor(TokenTypes.Keyword))
                            .Add(@"(abstract|as|async|await|base|break|case|catch|" +
                                 @"checked|const|continue|default|delegate|" +
                                 @"do|else|enum|event|explicit|extern|false|finally|" +
                                 @"fixed|for|foreach|goto|if|implicit|in|interface|" +
                                 @"internal|is|lock|new|null|operator|" +
                                 @"out|override|params|private|protected|public|readonly|" +
                                 @"ref|return|sealed|sizeof|stackalloc|static|" +
                                 @"switch|this|throw|true|try|typeof|" +
                                 @"unchecked|unsafe|virtual|void|while|" +
                                 @"get|set|new|partial|yield|add|remove|value|alias|ascending|" +
                                 @"descending|from|group|into|orderby|select|where|" +
                                 @"join|equals)\b", TokenTypes.Keyword)
                            .ByGroups(@"(global)(::)",
                                      new TokenGroupProcessor(TokenTypes.Keyword),
                                      new TokenGroupProcessor(TokenTypes.Punctuation))
                            .Add(@"(bool|byte|char|decimal|double|dynamic|float|int|long|object|" +
                                 @"sbyte|short|string|uint|ulong|ushort|var)\b\??", TokenTypes.Keyword.Type)
                            .ByGroups(@"(class|struct)(\s+)", "class",
                                      new TokenGroupProcessor(TokenTypes.Keyword),
                                      new TokenGroupProcessor(TokenTypes.Text))
                            .ByGroups(@"(namespace|using)(\s+)", "namespace",
                                      new TokenGroupProcessor(TokenTypes.Keyword),
                                      new TokenGroupProcessor(TokenTypes.Text))
                            .Add(cs_ident, TokenTypes.Name)
                            .Build();

            rules["class"] = builder.NewRuleSet()
                             .Add(cs_ident, TokenTypes.Name.Class, "#pop")
                             .Default("#pop")
                             .Build();

            rules["namespace"] = builder.NewRuleSet()
                                 .Add(@"(?=\()", TokenTypes.Text, "#pop") // using resource
                                 .Add(@"(" + cs_ident + @"|\.)+", TokenTypes.Name.Namespace, "#pop")
                                 .Build();

            return(rules);
        }
Example #3
0
        /// <summary>
        /// Gets the state transition rules for the lexer. Each time a regex is matched,
        /// the internal state machine can be bumped to a new state which determines what
        /// regexes become valid again
        /// </summary>
        /// <returns></returns>
        protected override IDictionary <string, StateRule[]> GetStateRules()
        {
            var builder = new StateRuleBuilder();

            builder.DefaultRegexOptions = RegexOptions.IgnoreCase;

            var rules = new Dictionary <string, StateRule[]>();

            rules["root"] = builder.NewRuleSet()
                            .Add(@"[^<&]+", TokenTypes.Text)
                            .Add(@"&\S*?;", TokenTypes.Name.Entity)
                            .Add(@"\<\!\[CDATA\[.*?\]\]\>", TokenTypes.Comment.Preproc)
                            .Add(@"<!--", TokenTypes.Comment, "comment")
                            .Add(@"<\?.*?\?>", TokenTypes.Comment.Preproc)
                            .Add(@"<![^>]*>", TokenTypes.Comment.Preproc)
                            .ByGroups(@"(<)(\s*)(script)(\s*)", new[] { "script-content", "tag" },
                                      new TokenGroupProcessor(TokenTypes.Punctuation),
                                      new TokenGroupProcessor(TokenTypes.Text),
                                      new TokenGroupProcessor(TokenTypes.Name.Tag),
                                      new TokenGroupProcessor(TokenTypes.Text))
                            .ByGroups(@"(<)(\s*)(style)(\s*)", new[] { "style-content", "tag" },
                                      new TokenGroupProcessor(TokenTypes.Punctuation),
                                      new TokenGroupProcessor(TokenTypes.Text),
                                      new TokenGroupProcessor(TokenTypes.Name.Tag),
                                      new TokenGroupProcessor(TokenTypes.Text))
                            .ByGroups(@"(<)(\s*)([\w:.-]+)", "tag",
                                      new TokenGroupProcessor(TokenTypes.Punctuation),
                                      new TokenGroupProcessor(TokenTypes.Text),
                                      new TokenGroupProcessor(TokenTypes.Name.Tag))
                            .ByGroups(@"(<)(\s*)(/)(\s*)([\w:.-]+)(\s*)(>)",
                                      new TokenGroupProcessor(TokenTypes.Punctuation),
                                      new TokenGroupProcessor(TokenTypes.Text),
                                      new TokenGroupProcessor(TokenTypes.Punctuation),
                                      new TokenGroupProcessor(TokenTypes.Text),
                                      new TokenGroupProcessor(TokenTypes.Name.Tag),
                                      new TokenGroupProcessor(TokenTypes.Text),
                                      new TokenGroupProcessor(TokenTypes.Punctuation))
                            .Build();

            rules["comment"] = builder.NewRuleSet()
                               .Add(@"[^-]+", TokenTypes.Comment, "#pop")
                               .Add(@"-->", TokenTypes.Comment)
                               .Add(@"-", TokenTypes.Comment)
                               .Build();

            rules["tag"] = builder.NewRuleSet()
                           .Add(@"\s+", TokenTypes.Text)
                           .ByGroups(@"([\w:-]+\s*)(=)(\s*)", "attr",
                                     new TokenGroupProcessor(TokenTypes.Name.Attribute),
                                     new TokenGroupProcessor(TokenTypes.Operator),
                                     new TokenGroupProcessor(TokenTypes.Text))
                           .Add(@"[\w:-]+", TokenTypes.Name.Attribute)
                           .ByGroups(@"(/?)(\s*)(>)", "#pop",
                                     new TokenGroupProcessor(TokenTypes.Punctuation),
                                     new TokenGroupProcessor(TokenTypes.Text),
                                     new TokenGroupProcessor(TokenTypes.Punctuation))
                           .Build();

            rules["script-content"] = builder.NewRuleSet()
                                      .ByGroups(@"(<)(\s*)(/)(\s*)(script)(\s*)(>)", "#pop",
                                                new TokenGroupProcessor(TokenTypes.Punctuation),
                                                new TokenGroupProcessor(TokenTypes.Text),
                                                new TokenGroupProcessor(TokenTypes.Punctuation),
                                                new TokenGroupProcessor(TokenTypes.Text),
                                                new TokenGroupProcessor(TokenTypes.Name.Tag),
                                                new TokenGroupProcessor(TokenTypes.Text),
                                                new TokenGroupProcessor(TokenTypes.Punctuation))
                                      .Using <JavascriptLexer>(@".+?(?=<\s*/\s*script\s*>)")
                                      .Build();


            rules["style-content"] = builder.NewRuleSet()
                                     .ByGroups(@"(<)(\s*)(/)(\s*)(style)(\s*)(>)", "#pop",
                                               new TokenGroupProcessor(TokenTypes.Punctuation),
                                               new TokenGroupProcessor(TokenTypes.Text),
                                               new TokenGroupProcessor(TokenTypes.Punctuation),
                                               new TokenGroupProcessor(TokenTypes.Text),
                                               new TokenGroupProcessor(TokenTypes.Name.Tag),
                                               new TokenGroupProcessor(TokenTypes.Text),
                                               new TokenGroupProcessor(TokenTypes.Punctuation))
                                     .Using <CssLexer>(@".+?(?=<\s*/\s*style\s*>)")
                                     .Build();

            rules["attr"] = builder.NewRuleSet()
                            .Add(@""".*?""", TokenTypes.String, "#pop")
                            .Add(@"'.*?'", TokenTypes.String, "#pop")
                            .Add(@"[^\s>]+", TokenTypes.String, "#pop")
                            .Build();

            return(rules);
        }
        /// <summary>
        /// Gets the state transition rules for the lexer. Each time a regex is matched,
        /// the internal state machine can be bumped to a new state which determines what
        /// regexes become valid again
        /// </summary>
        /// <returns></returns>
        protected override IDictionary <string, StateRule[]> GetStateRules()
        {
            var rules = new Dictionary <string, StateRule[]>();

            string JS_IDENT_START = "(?:[$_" + RegexUtil.Combine("Lu", "Ll", "Lt", "Lm", "Lo", "Nl") + "]|\\\\u[a-fA-F0-9]{4})";
            string JS_IDENT_PART  = "(?:[$" + RegexUtil.Combine("Lu", "Ll", "Lt", "Lm", "Lo", "Nl", "Mn", "Mc", "Nd", "Pc") + "\u200c\u200d]|\\\\u[a-fA-F0-9]{4})";
            string JS_IDENT       = JS_IDENT_START + "(?:" + JS_IDENT_PART + ")*";

            var builder = new StateRuleBuilder();

            builder.DefaultRegexOptions = RegexOptions.Multiline;

            rules["commentsandwhitespace"] = builder.NewRuleSet()
                                             .Add(@"\s+", TokenTypes.Text)
                                             .Add(@"<!--", TokenTypes.Comment)
                                             .Add(@"//.*?\n", TokenTypes.Comment.Single)
                                             .Add(@"/\*.*?\*/", TokenTypes.Comment.Multiline)
                                             .Build();

            rules["slashstartsregex"] = builder.NewRuleSet()
                                        .Include(rules["commentsandwhitespace"])
                                        .Add(@"/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/" + @"([gim]+\b|\B)", TokenTypes.String.Regex, "#pop")
                                        .Add(@"(?=/)", TokenTypes.Text, "#pop", "badregex")
                                        .Default("#pop")
                                        .Build();

            rules["badregex"] = builder.NewRuleSet()
                                .Add(@"\n", TokenTypes.Text, "#pop")
                                .Build();

            rules["root"] = builder.NewRuleSet()
                            .Add(@"\A#! ?/.*?\n", TokenTypes.Comment.Hashbang)
                            .Add(@"^(?=\s|/|<!--)", TokenTypes.Text, "slashstartsregex")
                            .Include(rules["commentsandwhitespace"])
                            .Add(@"\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|(<<|>>>?|=>|==?|!=?|[-<>+*%&|^/])=?", TokenTypes.Operator, "slashstartsregex")
                            .Add(@"\.\.\.", TokenTypes.Punctuation)
                            .Add(@"[{(\[;,]", TokenTypes.Punctuation, "slashstartsregex")
                            .Add(@"[})\].]", TokenTypes.Punctuation)
                            .Add(@"(for|in|while|do|break|return|continue|switch|case|default|if|else|throw|try|catch|finally|new|delete|typeof|instanceof|void|yield|this|of)\b", TokenTypes.Keyword, "slashstartsregex")
                            .Add(@"(var|let|with|function)\b", TokenTypes.Keyword.Declaration, "slashstartsregex")
                            .Add(@"(abstract|boolean|byte|char|class|const|debugger|double|enum|export|extends|final|float|goto|implements|import|int|interface|long|native|package|private|protected|public|short|static|super|synchronized|throws|transient|volatile)\b", TokenTypes.Keyword.Reserved)
                            .Add(@"(true|false|null|NaN|Infinity|undefined)\b", TokenTypes.Keyword.Constant)
                            .Add(@"(Array|Boolean|Date|Error|Function|Math|netscape|Number|Object|Packages|RegExp|String|Promise|Proxy|sun|decodeURI|decodeURIComponent|encodeURI|encodeURIComponent|Error|eval|isFinite|isNaN|isSafeInteger|parseFloat|parseInt|document|this|window)\b", TokenTypes.Name.Builtin)
                            .Add(JS_IDENT, TokenTypes.Name.Other)
                            .Add(@"[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?", TokenTypes.Number.Float)
                            .Add(@"0b[01]+", TokenTypes.Number.Bin)
                            .Add(@"0o[0-7]+", TokenTypes.Number.Oct)
                            .Add(@"0x[0-9a-fA-F]+", TokenTypes.Number.Hex)
                            .Add(@"[0-9]+'", TokenTypes.Number.Integer)
                            .Add(@"""(\\\\|\\""|[^""])*""", TokenTypes.String.Double)
                            .Add(@"'(\\\\|\\'|[^'])*'", TokenTypes.String.Single)
                            .Add(@"`", TokenTypes.String.Backtick, "interp")
                            .Build();

            rules["interp"] = builder.NewRuleSet()
                              .Add(@"`", TokenTypes.String.Backtick, "#pop")
                              .Add(@"\\\\", TokenTypes.String.Backtick)
                              .Add(@"\\`", TokenTypes.String.Backtick)
                              .Add(@"\${", TokenTypes.String.Interpol, "interp-inside")
                              .Add(@"\$", TokenTypes.String.Backtick)
                              .Add(@"[^`\\$]+'", TokenTypes.String.Backtick)
                              .Build();

            rules["interp-inside"] = builder.NewRuleSet()
                                     .Add(@"}", TokenTypes.String.Interpol, "#pop")
                                     .Include(rules["root"])
                                     .Build();

            return(rules);
        }
Example #5
0
        /// <summary>
        /// Gets the state transition rules for the lexer. Each time a regex is matched,
        /// the internal state machine can be bumped to a new state which determines what
        /// regexes become valid again
        /// </summary>
        /// <returns></returns>
        protected override IDictionary <string, StateRule[]> GetStateRules()
        {
            //NEED TO IGNORE CASE

            var builder = new StateRuleBuilder();

            builder.DefaultRegexOptions = RegexOptions.IgnoreCase;

            var rules = new Dictionary <string, StateRule[]>();

            rules["root"] = builder.NewRuleSet()
                            .Add(@"\s+", TokenTypes.Text)
                            .Add(@"--.*?\n", TokenTypes.Comment.Single)
                            .Add(@"/\*", TokenTypes.Comment.Multiline, "multiline-comments")
                            .Add(RegexUtil.Words(new []
            {
                "ABORT", "ABS", "ABSOLUTE", "ACCESS", "ADA", "ADD", "ADMIN", "AFTER", "AGGREGATE",
                "ALIAS", "ALL", "ALLOCATE", "ALTER", "ANALYSE", "ANALYZE", "AND", "ANY", "ARE", "AS",
                "ASC", "ASENSITIVE", "ASSERTION", "ASSIGNMENT", "ASYMMETRIC", "AT", "ATOMIC",
                "AUTHORIZATION", "AVG", "BACKWARD", "BEFORE", "BEGIN", "BETWEEN", "BITVAR",
                "BIT_LENGTH", "BOTH", "BREADTH", "BY", "C", "CACHE", "CALL", "CALLED", "CARDINALITY",
                "CASCADE", "CASCADED", "CASE", "CAST", "CATALOG", "CATALOG_NAME", "CHAIN",
                "CHARACTERISTICS", "CHARACTER_LENGTH", "CHARACTER_SET_CATALOG",
                "CHARACTER_SET_NAME", "CHARACTER_SET_SCHEMA", "CHAR_LENGTH", "CHECK",
                "CHECKED", "CHECKPOINT", "CLASS", "CLASS_ORIGIN", "CLOB", "CLOSE", "CLUSTER",
                "COALSECE", "COBOL", "COLLATE", "COLLATION", "COLLATION_CATALOG",
                "COLLATION_NAME", "COLLATION_SCHEMA", "COLUMN", "COLUMN_NAME",
                "COMMAND_FUNCTION", "COMMAND_FUNCTION_CODE", "COMMENT", "COMMIT",
                "COMMITTED", "COMPLETION", "CONDITION_NUMBER", "CONNECT", "CONNECTION",
                "CONNECTION_NAME", "CONSTRAINT", "CONSTRAINTS", "CONSTRAINT_CATALOG",
                "CONSTRAINT_NAME", "CONSTRAINT_SCHEMA", "CONSTRUCTOR", "CONTAINS",
                "CONTINUE", "CONVERSION", "CONVERT", "COPY", "CORRESPONTING", "COUNT",
                "CREATE", "CREATEDB", "CREATEUSER", "CROSS", "CUBE", "CURRENT", "CURRENT_DATE",
                "CURRENT_PATH", "CURRENT_ROLE", "CURRENT_TIME", "CURRENT_TIMESTAMP",
                "CURRENT_USER", "CURSOR", "CURSOR_NAME", "CYCLE", "DATA", "DATABASE",
                "DATETIME_INTERVAL_CODE", "DATETIME_INTERVAL_PRECISION", "DAY",
                "DEALLOCATE", "DECLARE", "DEFAULT", "DEFAULTS", "DEFERRABLE", "DEFERRED",
                "DEFINED", "DEFINER", "DELETE", "DELIMITER", "DELIMITERS", "DEREF", "DESC",
                "DESCRIBE", "DESCRIPTOR", "DESTROY", "DESTRUCTOR", "DETERMINISTIC",
                "DIAGNOSTICS", "DICTIONARY", "DISCONNECT", "DISPATCH", "DISTINCT", "DO",
                "DOMAIN", "DROP", "DYNAMIC", "DYNAMIC_FUNCTION", "DYNAMIC_FUNCTION_CODE",
                "EACH", "ELSE", "ENCODING", "ENCRYPTED", "END", "END-EXEC", "EQUALS", "ESCAPE", "EVERY",
                "EXCEPTION", "EXCEPT", "EXCLUDING", "EXCLUSIVE", "EXEC", "EXECUTE", "EXISTING",
                "EXISTS", "EXPLAIN", "EXTERNAL", "EXTRACT", "FALSE", "FETCH", "FINAL", "FIRST", "FOR",
                "FORCE", "FOREIGN", "FORTRAN", "FORWARD", "FOUND", "FREE", "FREEZE", "FROM", "FULL",
                "FUNCTION", "G", "GENERAL", "GENERATED", "GET", "GLOBAL", "GO", "GOTO", "GRANT", "GRANTED",
                "GROUP", "GROUPING", "HANDLER", "HAVING", "HIERARCHY", "HOLD", "HOST", "IDENTITY",
                "IGNORE", "ILIKE", "IMMEDIATE", "IMMUTABLE", "IMPLEMENTATION", "IMPLICIT", "IN",
                "INCLUDING", "INCREMENT", "INDEX", "INDITCATOR", "INFIX", "INHERITS", "INITIALIZE",
                "INITIALLY", "INNER", "INOUT", "INPUT", "INSENSITIVE", "INSERT", "INSTANTIABLE",
                "INSTEAD", "INTERSECT", "INTO", "INVOKER", "IS", "ISNULL", "ISOLATION", "ITERATE", "JOIN",
                "KEY", "KEY_MEMBER", "KEY_TYPE", "LANCOMPILER", "LANGUAGE", "LARGE", "LAST",
                "LATERAL", "LEADING", "LEFT", "LENGTH", "LESS", "LEVEL", "LIKE", "LIMIT", "LISTEN", "LOAD",
                "LOCAL", "LOCALTIME", "LOCALTIMESTAMP", "LOCATION", "LOCATOR", "LOCK", "LOWER",
                "MAP", "MATCH", "MAX", "MAXVALUE", "MESSAGE_LENGTH", "MESSAGE_OCTET_LENGTH",
                "MESSAGE_TEXT", "METHOD", "MIN", "MINUTE", "MINVALUE", "MOD", "MODE", "MODIFIES",
                "MODIFY", "MONTH", "MORE", "MOVE", "MUMPS", "NAMES", "NATIONAL", "NATURAL", "NCHAR",
                "NCLOB", "NEW", "NEXT", "NO", "NOCREATEDB", "NOCREATEUSER", "NONE", "NOT", "NOTHING",
                "NOTIFY", "NOTNULL", "NULL", "NULLABLE", "NULLIF", "OBJECT", "OCTET_LENGTH", "OF", "OFF",
                "OFFSET", "OIDS", "OLD", "ON", "ONLY", "OPEN", "OPERATION", "OPERATOR", "OPTION", "OPTIONS",
                "OR", "ORDER", "ORDINALITY", "OUT", "OUTER", "OUTPUT", "OVERLAPS", "OVERLAY", "OVERRIDING",
                "OWNER", "PAD", "PARAMETER", "PARAMETERS", "PARAMETER_MODE", "PARAMATER_NAME",
                "PARAMATER_ORDINAL_POSITION", "PARAMETER_SPECIFIC_CATALOG",
                "PARAMETER_SPECIFIC_NAME", "PARAMATER_SPECIFIC_SCHEMA", "PARTIAL",
                "PASCAL", "PENDANT", "PLACING", "PLI", "POSITION", "POSTFIX", "PRECISION", "PREFIX",
                "PREORDER", "PREPARE", "PRESERVE", "PRIMARY", "PRIOR", "PRIVILEGES", "PROCEDURAL",
                "PROCEDURE", "PUBLIC", "READ", "READS", "RECHECK", "RECURSIVE", "REF", "REFERENCES",
                "REFERENCING", "REINDEX", "RELATIVE", "RENAME", "REPEATABLE", "REPLACE", "RESET",
                "RESTART", "RESTRICT", "RESULT", "RETURN", "RETURNED_LENGTH",
                "RETURNED_OCTET_LENGTH", "RETURNED_SQLSTATE", "RETURNS", "REVOKE", "RIGHT",
                "ROLE", "ROLLBACK", "ROLLUP", "ROUTINE", "ROUTINE_CATALOG", "ROUTINE_NAME",
                "ROUTINE_SCHEMA", "ROW", "ROWS", "ROW_COUNT", "RULE", "SAVE_POINT", "SCALE", "SCHEMA",
                "SCHEMA_NAME", "SCOPE", "SCROLL", "SEARCH", "SECOND", "SECURITY", "SELECT", "SELF",
                "SENSITIVE", "SERIALIZABLE", "SERVER_NAME", "SESSION", "SESSION_USER", "SET",
                "SETOF", "SETS", "SHARE", "SHOW", "SIMILAR", "SIMPLE", "SIZE", "SOME", "SOURCE", "SPACE",
                "SPECIFIC", "SPECIFICTYPE", "SPECIFIC_NAME", "SQL", "SQLCODE", "SQLERROR",
                "SQLEXCEPTION", "SQLSTATE", "SQLWARNINIG", "STABLE", "START", "STATE", "STATEMENT",
                "STATIC", "STATISTICS", "STDIN", "STDOUT", "STORAGE", "STRICT", "STRUCTURE", "STYPE",
                "SUBCLASS_ORIGIN", "SUBLIST", "SUBSTRING", "SUM", "SYMMETRIC", "SYSID", "SYSTEM",
                "SYSTEM_USER", "TABLE", "TABLE_NAME", " TEMP", "TEMPLATE", "TEMPORARY", "TERMINATE",
                "THAN", "THEN", "TIMESTAMP", "TIMEZONE_HOUR", "TIMEZONE_MINUTE", "TO", "TOAST",
                "TRAILING", "TRANSATION", "TRANSACTIONS_COMMITTED",
                "TRANSACTIONS_ROLLED_BACK", "TRANSATION_ACTIVE", "TRANSFORM",
                "TRANSFORMS", "TRANSLATE", "TRANSLATION", "TREAT", "TRIGGER", "TRIGGER_CATALOG",
                "TRIGGER_NAME", "TRIGGER_SCHEMA", "TRIM", "TRUE", "TRUNCATE", "TRUSTED", "TYPE",
                "UNCOMMITTED", "UNDER", "UNENCRYPTED", "UNION", "UNIQUE", "UNKNOWN", "UNLISTEN",
                "UNNAMED", "UNNEST", "UNTIL", "UPDATE", "UPPER", "USAGE", "USER",
                "USER_DEFINED_TYPE_CATALOG", "USER_DEFINED_TYPE_NAME",
                "USER_DEFINED_TYPE_SCHEMA", "USING", "VACUUM", "VALID", "VALIDATOR", "VALUES",
                "VARIABLE", "VERBOSE", "VERSION", "VIEW", "VOLATILE", "WHEN", "WHENEVER", "WHERE",
                "WITH", "WITHOUT", "WORK", "WRITE", "YEAR", "ZONE"
            }, suffix: @"\b"), TokenTypes.Keyword)
                            .Add(RegexUtil.Words(new []
            {
                "ARRAY", "BIGINT", "BINARY", "BIT", "BLOB", "BOOLEAN", "CHAR", "CHARACTER", "DATE",
                "DEC", "DECIMAL", "FLOAT", "INT", "INTEGER", "INTERVAL", "NUMBER", "NUMERIC", "REAL",
                "SERIAL", "SMALLINT", "VARCHAR", "VARYING", "INT8", "SERIAL8", "TEXT"
            }, suffix: @"\b"), TokenTypes.Name.Builtin)
                            .Add(@"[+*/<>=~!@#%^&|`?-]", TokenTypes.Operator)
                            .Add(@"[0-9]+", TokenTypes.Number.Integer)
                            .Add(@"'(''|[^'])*'", TokenTypes.String.Single)
                            .Add(@"""(""""|[^""])*""", TokenTypes.String.Single)
                            .Add(@"[a-z_][\w$]*", TokenTypes.Name)
                            .Add(@"[;:()\[\],.]", TokenTypes.Punctuation)
                            .Build();

            rules["multiline-comments"] = builder.NewRuleSet()
                                          .Add(@"/\*", TokenTypes.Comment.Multiline, "multiline-comments")
                                          .Add(@"\*/", TokenTypes.Comment.Multiline, "#pop")
                                          .Add(@"[^/*]+", TokenTypes.Comment.Multiline)
                                          .Add(@"[/*]", TokenTypes.Comment.Multiline)
                                          .Build();

            return(rules);
        }
Example #6
0
        /// <summary>
        /// Gets the state transition rules for the lexer. Each time a regex is matched,
        /// the internal state machine can be bumped to a new state which determines what
        /// regexes become valid again
        /// </summary>
        /// <returns></returns>
        protected override IDictionary <string, StateRule[]> GetStateRules()
        {
            var rules   = new Dictionary <string, StateRule[]>();
            var builder = new StateRuleBuilder();

            rules["basics"] = builder.NewRuleSet()
                              .Add(@"\s+", TokenTypes.Text)
                              .Add(@"/\*(?:.|\n)*?\*/", TokenTypes.Comment)
                              .Add(@"\{", TokenTypes.Punctuation, "content")
                              .Add(@"\:[\w-]+", TokenTypes.Name.Decorator)
                              .Add(@"\.[\w-]+", TokenTypes.Name.Class)
                              .Add(@"\#[\w-]+", TokenTypes.Name.Namespace)
                              .Add(@"@[\w-]+", TokenTypes.Keyword, "atrule")
                              .Add(@"[\w-]+", TokenTypes.Name.Tag)
                              .Add(@"[~^*!%&$\[\]()<>|+=@:;,./?-]", TokenTypes.Operator)
                              .Add(@"""(\\\\|\\""|[^""])*""", TokenTypes.String.Double)
                              .Add(@"'(\\\\|\\'|[^'])*'", TokenTypes.String.Single)
                              .Build();

            rules["root"] = builder.NewRuleSet()
                            .Include(rules["basics"])
                            .Build();

            rules["atrule"] = builder.NewRuleSet()
                              .Add(@"\{", TokenTypes.Punctuation, "atcontent")
                              .Add(@";", TokenTypes.Punctuation, "#pop")
                              .Include(rules["basics"])
                              .Build();

            rules["atcontent"] = builder.NewRuleSet()
                                 .Include(rules["basics"])
                                 .Add(@"\}", TokenTypes.Punctuation, "#pop", "#pop")
                                 .Build();

            rules["content"] = builder.NewRuleSet()
                               .Add(@"\s+", TokenTypes.Text)
                               .Add(@"\}", TokenTypes.Punctuation, "#pop")
                               .Add(@"url\(.*?\)", TokenTypes.String.Other)
                               .Add(@"^@.*?$", TokenTypes.Comment.Preproc)
                               .Add(RegexUtil.Words(new []
            {
                "azimuth", "background-attachment", "background-color",
                "background-image", "background-position", "background-repeat",
                "background", "border-bottom-color", "border-bottom-style",
                "border-bottom-width", "border-left-color", "border-left-style",
                "border-left-width", "border-right", "border-right-color",
                "border-right-style", "border-right-width", "border-top-color",
                "border-top-style", "border-top-width", "border-bottom",
                "border-collapse", "border-left", "border-width", "border-color",
                "border-spacing", "border-style", "border-top", "border", "caption-side",
                "clear", "clip", "color", "content", "counter-increment", "counter-reset",
                "cue-after", "cue-before", "cue", "cursor", "direction", "display",
                "elevation", "empty-cells", "float", "font-family", "font-size",
                "font-size-adjust", "font-stretch", "font-style", "font-variant",
                "font-weight", "font", "height", "letter-spacing", "line-height",
                "list-style-type", "list-style-image", "list-style-position",
                "list-style", "margin-bottom", "margin-left", "margin-right",
                "margin-top", "margin", "marker-offset", "marks", "max-height", "max-width",
                "min-height", "min-width", "opacity", "orphans", "outline-color",
                "outline-style", "outline-width", "outline", "overflow", "overflow-x",
                "overflow-y", "padding-bottom", "padding-left", "padding-right", "padding-top",
                "padding", "page", "page-break-after", "page-break-before", "page-break-inside",
                "pause-after", "pause-before", "pause", "pitch-range", "pitch",
                "play-during", "position", "quotes", "richness", "right", "size",
                "speak-header", "speak-numeral", "speak-punctuation", "speak",
                "speech-rate", "stress", "table-layout", "text-align", "text-decoration",
                "text-indent", "text-shadow", "text-transform", "top", "unicode-bidi",
                "vertical-align", "visibility", "voice-family", "volume", "white-space",
                "widows", "width", "word-spacing", "z-index", "bottom",
                "above", "absolute", "always", "armenian", "aural", "auto", "avoid", "baseline",
                "behind", "below", "bidi-override", "blink", "block", "bolder", "bold", "both",
                "capitalize", "center-left", "center-right", "center", "circle",
                "cjk-ideographic", "close-quote", "collapse", "condensed", "continuous",
                "crop", "crosshair", "cross", "cursive", "dashed", "decimal-leading-zero",
                "decimal", "default", "digits", "disc", "dotted", "double", "e-resize", "embed",
                "extra-condensed", "extra-expanded", "expanded", "fantasy", "far-left",
                "far-right", "faster", "fast", "fixed", "georgian", "groove", "hebrew", "help",
                "hidden", "hide", "higher", "high", "hiragana-iroha", "hiragana", "icon",
                "inherit", "inline-table", "inline", "inset", "inside", "invert", "italic",
                "justify", "katakana-iroha", "katakana", "landscape", "larger", "large",
                "left-side", "leftwards", "left", "level", "lighter", "line-through", "list-item",
                "loud", "lower-alpha", "lower-greek", "lower-roman", "lowercase", "ltr",
                "lower", "low", "medium", "message-box", "middle", "mix", "monospace",
                "n-resize", "narrower", "ne-resize", "no-close-quote", "no-open-quote",
                "no-repeat", "none", "normal", "nowrap", "nw-resize", "oblique", "once",
                "open-quote", "outset", "outside", "overline", "pointer", "portrait", "px",
                "relative", "repeat-x", "repeat-y", "repeat", "rgb", "ridge", "right-side",
                "rightwards", "s-resize", "sans-serif", "scroll", "se-resize",
                "semi-condensed", "semi-expanded", "separate", "serif", "show", "silent",
                "slower", "slow", "small-caps", "small-caption", "smaller", "soft", "solid",
                "spell-out", "square", "static", "status-bar", "super", "sw-resize",
                "table-caption", "table-cell", "table-column", "table-column-group",
                "table-footer-group", "table-header-group", "table-row",
                "table-row-group", "text-bottom", "text-top", "text", "thick", "thin",
                "transparent", "ultra-condensed", "ultra-expanded", "underline",
                "upper-alpha", "upper-latin", "upper-roman", "uppercase", "url",
                "visible", "w-resize", "wait", "wider", "x-fast", "x-high", "x-large", "x-loud",
                "x-low", "x-small", "x-soft", "xx-large", "xx-small", "yes"
            }, suffix: @"\b"), TokenTypes.Name.Builtin)
                               .Add(RegexUtil.Words(new []
            {
                "indigo", "gold", "firebrick", "indianred", "yellow", "darkolivegreen",
                "darkseagreen", "mediumvioletred", "mediumorchid", "chartreuse",
                "mediumslateblue", "black", "springgreen", "crimson", "lightsalmon", "brown",
                "turquoise", "olivedrab", "cyan", "silver", "skyblue", "gray", "darkturquoise",
                "goldenrod", "darkgreen", "darkviolet", "darkgray", "lightpink", "teal",
                "darkmagenta", "lightgoldenrodyellow", "lavender", "yellowgreen", "thistle",
                "violet", "navy", "orchid", "blue", "ghostwhite", "honeydew", "cornflowerblue",
                "darkblue", "darkkhaki", "mediumpurple", "cornsilk", "red", "bisque", "slategray",
                "darkcyan", "khaki", "wheat", "deepskyblue", "darkred", "steelblue", "aliceblue",
                "gainsboro", "mediumturquoise", "floralwhite", "coral", "purple", "lightgrey",
                "lightcyan", "darksalmon", "beige", "azure", "lightsteelblue", "oldlace",
                "greenyellow", "royalblue", "lightseagreen", "mistyrose", "sienna",
                "lightcoral", "orangered", "navajowhite", "lime", "palegreen", "burlywood",
                "seashell", "mediumspringgreen", "fuchsia", "papayawhip", "blanchedalmond",
                "peru", "aquamarine", "white", "darkslategray", "ivory", "dodgerblue",
                "lemonchiffon", "chocolate", "orange", "forestgreen", "slateblue", "olive",
                "mintcream", "antiquewhite", "darkorange", "cadetblue", "moccasin",
                "limegreen", "saddlebrown", "darkslateblue", "lightskyblue", "deeppink",
                "plum", "aqua", "darkgoldenrod", "maroon", "sandybrown", "magenta", "tan",
                "rosybrown", "pink", "lightblue", "palevioletred", "mediumseagreen",
                "dimgray", "powderblue", "seagreen", "snow", "mediumblue", "midnightblue",
                "paleturquoise", "palegoldenrod", "whitesmoke", "darkorchid", "salmon",
                "lightslategray", "lawngreen", "lightgreen", "tomato", "hotpink",
                "lightyellow", "lavenderblush", "linen", "mediumaquamarine", "green",
                "blueviolet", "peachpuff"
            }, suffix: @"\b"), TokenTypes.Name.Builtin)
                               .Add(@"\!important", TokenTypes.Comment.Preproc)
                               .Add(@"/\*(?:.|\n)*?\*/", TokenTypes.Comment)
                               .Add(@"\#[a-zA-Z0-9]{1,6}", TokenTypes.Number)
                               .Add(@"[.-]?[0-9]*[.]?[0-9]+(em|px|pt|pc|in|mm|cm|ex|s)\b", TokenTypes.Number)
                               .Add(@"[.-]?[0-9]*[.]?[0-9]+%", TokenTypes.Number)
                               .Add(@"-?[0-9]+", TokenTypes.Number)
                               .Add(@"[~^*!%&<>|+=@:,./?-]+", TokenTypes.Operator)
                               .Add(@"[\[\]();]+", TokenTypes.Punctuation)
                               .Add(@"""(\\\\|\\""|[^""])*""", TokenTypes.String.Double)
                               .Add(@"'(\\\\|\\'|[^'])*'", TokenTypes.String.Single)
                               .Add(@"a-zA-Z_]\w*", TokenTypes.Name)
                               .Build();

            return(rules);
        }
Example #7
0
        /// <summary>
        /// Gets the state transition rules for the lexer. Each time a regex is matched,
        /// the internal state machine can be bumped to a new state which determines what
        /// regexes become valid again
        /// </summary>
        /// <returns></returns>
        protected override IDictionary <string, StateRule[]> GetStateRules()
        {
            var rules   = new Dictionary <string, StateRule[]>();
            var builder = new StateRuleBuilder();

            rules["basic"] = builder.NewRuleSet()
                             .ByGroups(@"\b(if|fi|else|while|do|done|for|then|return|function|case|elect|continue|until|esac|elif)(\s*)\b",
                                       TokenTypes.Keyword, TokenTypes.Text)
                             .Add(@"\b(alias|bg|bind|break|builtin|caller|cd|command|compgen|complete|declare|dirs|disown|echo|enable|eval|exec|exit|export|false|fc|fg|getopts|hash|help|history|jobs|kill|let|local|logout|popd|printf|pushd|pwd|read|readonly|set|shift|shopt|source|suspend|test|time|times|trap|true|type|typeset|ulimit|umask|unalias|unset|wait)(?=[\s)`])",
                                  TokenTypes.Name.Builtin)
                             .Add(@"\A#!.+\n", TokenTypes.Comment.Hashbang)
                             .Add(@"#.*\n", TokenTypes.Comment.Single)
                             .Add(@"\\[\w\W]", TokenTypes.String.Escape)
                             .ByGroups(@"(\b\w+)(\s*)(=)", TokenTypes.Name.Variable, TokenTypes.Text, TokenTypes.Operator)
                             .Add(@"[\[\]{}()=]", TokenTypes.Operator)
                             .Add(@"<<<", TokenTypes.Operator)
                             .Add(@"<<-?\s*(\'?)\\?(\w+)[\w\W]+?\2", TokenTypes.String)
                             .Add(@"&&|\|\|", TokenTypes.Operator)
                             .Build();

            rules["data"] = builder.NewRuleSet()
                            .Add(@"(?s)\$?""(\\\\|\\[0-7]+|\\.|[^""\\$])*""", TokenTypes.String.Double)
                            .Add(@"""", TokenTypes.String.Double, "string")
                            .Add(@"(?s)\$'(\\\\|\\[0-7]+|\\.|[^'\\])*'", TokenTypes.String.Single)
                            .Add(@"(?s)'.*?'", TokenTypes.String.Single)
                            .Add(@";", TokenTypes.Punctuation)
                            .Add(@"&", TokenTypes.Punctuation)
                            .Add(@"\|", TokenTypes.Punctuation)
                            .Add(@"\s+", TokenTypes.Text)
                            .Add(@"\d+(?= |\Z)", TokenTypes.Number)
                            .Add(@"[^=\s\[\]{}()$""\'`\\<&|;]+", TokenTypes.Text)
                            .Add(@"<", TokenTypes.Text)
                            .Build();

            rules["interp"] = builder.NewRuleSet()
                              .Add(@"\$\(\(", TokenTypes.Keyword, "math")
                              .Add(@"\$\(", TokenTypes.Keyword, "paren")
                              .Add(@"\$\{#?", TokenTypes.String.Interpol, "curly")
                              .Add(@"\$[a-zA-Z_][a-zA-Z0-9_]*", TokenTypes.Name.Variable)
                              .Add(@"\$(?:\d+|[#$?!_*@-])", TokenTypes.Name.Variable)
                              .Add(@"\$", TokenTypes.Text)
                              .Build();

            rules["root"] = builder.NewRuleSet()
                            .Include(rules["basic"])
                            .Add(@"`", TokenTypes.String.Backtick, "backticks")
                            .Include(rules["data"])
                            .Include(rules["interp"])
                            .Build();

            rules["string"] = builder.NewRuleSet()
                              .Add(@"""", TokenTypes.String.Double, "#pop")
                              .Add(@"(?s)(\\\\|\\[0-7]+|\\.|[^""\\$])+", TokenTypes.String.Double)
                              .Include(rules["interp"])
                              .Build();

            rules["curly"] = builder.NewRuleSet()
                             .Add(@"\}", TokenTypes.String.Interpol, "#pop")
                             .Add(@":-", TokenTypes.Keyword)
                             .Add(@"\w+", TokenTypes.Name.Variable)
                             .Add(@"[^}:""\'`$\\]+", TokenTypes.Punctuation)
                             .Add(@":", TokenTypes.Punctuation)
                             .Include(rules["root"])
                             .Build();

            rules["paren"] = builder.NewRuleSet()
                             .Add(@"\)", TokenTypes.Keyword, "#pop")
                             .Include(rules["root"])
                             .Build();

            rules["math"] = builder.NewRuleSet()
                            .Add(@"\)\)", TokenTypes.Keyword, "#pop")
                            .Add(@"[-+*/%^|&]|\*\*|\|\|", TokenTypes.Operator)
                            .Add(@"\d+#\d+", TokenTypes.Number)
                            .Add(@"\d+#(?! )", TokenTypes.Number)
                            .Add(@"\d+", TokenTypes.Number)
                            .Include(rules["root"])
                            .Build();

            rules["backticks"] = builder.NewRuleSet()
                                 .Add(@"`", TokenTypes.String.Backtick, "#pop")
                                 .Include(rules["root"])
                                 .Build();

            return(rules);
        }