Example #1
0
        static Lexer()
        {
            Rules = new LexerRules<MathTokenType>
            {
                {"+", MathTokenType.Plus},
				{"-", MathTokenType.Minus},
				{"*", MathTokenType.Asterisk},
				{"/", MathTokenType.Slash},
				{"^", MathTokenType.Caret},
				{"(", MathTokenType.LeftParen},
				{")", MathTokenType.RightParen},
				{"++", MathTokenType.Increment},
				{"--", MathTokenType.Decrement},
				{"%", MathTokenType.Modulo},
				{"=", MathTokenType.Equals},
                {"$=", MathTokenType.Swap},
                {"+=", MathTokenType.AddAssign},
                {"-=", MathTokenType.SubAssign},
                {"*=", MathTokenType.MulAssign},
                {"/=", MathTokenType.DivAssign},
                {"%=", MathTokenType.ModAssign},
                {"^=", MathTokenType.PowAssign},
                {new Regex(@"-?(\d+(\.\d+)?|\.\d+)"), MathTokenType.Number},
                {new Regex(@"[a-zA-Z_][a-zA-Z0-9_]*"), MathTokenType.Name}
            };
            Rules.AddEndToken(MathTokenType.End);
        }
Example #2
0
 static Lexer()
 {
     Rules = new LexerRules<TokenType>
     {
         {EscapeRegex, TokenType.EscapeSequence},
         {RegexRegex, TokenType.Regex},
         {ConstantLiteralRegex, TokenType.ConstantLiteral},
         {"[", TokenType.LeftSquare}, {"]", TokenType.RightSquare},
         {"{", TokenType.LeftCurly}, {"}", TokenType.RightCurly},
         {"(", TokenType.LeftParen}, {")", TokenType.RightParen},
         {"<", TokenType.LeftAngle}, {">", TokenType.RightAngle},
         {"|", TokenType.Pipe},
         {";", TokenType.Semicolon},
         {":", TokenType.Colon},
         {"@", TokenType.At},
         {"?", TokenType.Question},
         {"::", TokenType.DoubleColon},
         {"?!", TokenType.Without},
         {"-", TokenType.Hyphen},
         {"!", TokenType.Exclamation},
         {"$", TokenType.Dollar},
         {CommentRegex, TokenType.Ignore, 3},
         {BlackspaceRegex, TokenType.Ignore, 2},
         {WhitespaceRegex, TokenType.Whitespace}
     };
     Rules.AddUndefinedCaptureRule(TokenType.Text, TruncatePadding);
     Rules.AddEndToken(TokenType.EOF);
     Rules.IgnoreRules.Add(TokenType.Ignore);
 }
Example #3
0
 static RantLexer()
 {
     Rules = new LexerRules <R>
     {
         { EscapeRegex, R.EscapeSequence },
         { RegexRegex, R.Regex },
         { ConstantLiteralRegex, R.ConstantLiteral },
         { "[", R.LeftSquare }, { "]", R.RightSquare },
         { "{", R.LeftCurly }, { "}", R.RightCurly },
         { "(", R.LeftParen }, { ")", R.RightParen },
         { "<", R.LeftAngle }, { ">", R.RightAngle },
         { "|", R.Pipe },
         { ";", R.Semicolon },
         { ":", R.Colon },
         { "@", R.At },
         { "?", R.Question },
         { "::", R.DoubleColon },
         { "?!", R.Without },
         { "-", R.Hyphen },
         { "!", R.Exclamation },
         { "$", R.Dollar },
         { "=", R.Equal },
         { "&", R.Ampersand },
         { "%", R.Percent },
         { "+", R.Plus },
         { "^", R.Caret },
         { WeightRegex, R.Weight },
         { CommentRegex, R.Ignore, 3 },
         { BlackspaceRegex, R.Ignore, 2 },
         { WhitespaceRegex, R.Whitespace }
     };
     Rules.AddUndefinedCaptureRule(R.Text, TruncatePadding);
     Rules.AddEndToken(R.EOF);
     Rules.IgnoreRules.Add(R.Ignore);
 }
Example #4
0
 static Lexer()
 {
     Rules = new LexerRules <RMathToken>
     {
         { "+", RMathToken.Plus },
         { "-", RMathToken.Minus },
         { "*", RMathToken.Asterisk },
         { "/", RMathToken.Slash },
         { "^", RMathToken.Caret },
         { "(", RMathToken.LeftParen },
         { ")", RMathToken.RightParen },
         { "++", RMathToken.Increment },
         { "--", RMathToken.Decrement },
         { "%", RMathToken.Modulo },
         { "=", RMathToken.Equals },
         { "$=", RMathToken.Swap },
         { "+=", RMathToken.AddAssign },
         { "-=", RMathToken.SubAssign },
         { "*=", RMathToken.MulAssign },
         { "/=", RMathToken.DivAssign },
         { "%=", RMathToken.ModAssign },
         { "^=", RMathToken.PowAssign },
         { "|", RMathToken.Pipe },
         { new Regex(@"(\d+(\.\d+)?|\.\d+)"), RMathToken.Number },
         { new Regex(@"[a-zA-Z_][a-zA-Z0-9_]*"), RMathToken.Name }
     };
     Rules.AddEndToken(RMathToken.End);
 }
Example #5
0
 static Lexer()
 {
     Rules = new LexerRules <TokenType>
     {
         { EscapeRegex, TokenType.EscapeSequence },
         { RegexRegex, TokenType.Regex },
         { ConstantLiteralRegex, TokenType.ConstantLiteral },
         { "[", TokenType.LeftSquare }, { "]", TokenType.RightSquare },
         { "{", TokenType.LeftCurly }, { "}", TokenType.RightCurly },
         { "(", TokenType.LeftParen }, { ")", TokenType.RightParen },
         { "<", TokenType.LeftAngle }, { ">", TokenType.RightAngle },
         { "|", TokenType.Pipe },
         { ";", TokenType.Semicolon },
         { ":", TokenType.Colon },
         { "@", TokenType.At },
         { "?", TokenType.Question },
         { "::", TokenType.DoubleColon },
         { "?!", TokenType.Without },
         { "-", TokenType.Hyphen },
         { "!", TokenType.Exclamation },
         { "$", TokenType.Dollar },
         { CommentRegex, TokenType.Ignore, 3 },
         { BlackspaceRegex, TokenType.Ignore, 2 },
         { WhitespaceRegex, TokenType.Whitespace }
     };
     Rules.AddUndefinedCaptureRule(TokenType.Text, TruncatePadding);
     Rules.AddEndToken(TokenType.EOF);
     Rules.IgnoreRules.Add(TokenType.Ignore);
 }
Example #6
0
 static Dic2Lexer()
 {
     Rules = new LexerRules <DicTokenType>
     {
         { new Regex(@"\#\s*(?<value>.*?)[\s\r]*(?=\#|\||\>|\@|$)", DicRegexOptions), DicTokenType.Directive, 2 },
         { new Regex(@"\|\s*(?<value>.*?)[\s\r]*(?=\#|\||\>|\@|$)", DicRegexOptions), DicTokenType.Property, 2 },
         { new Regex(@"\>\s*(?<value>.*?)[\s\r]*(?=\#|\||\>|\@|$)", DicRegexOptions), DicTokenType.Entry, 2 },
         { new Regex(@"\@.*?$", DicRegexOptions | RegexOptions.Multiline), DicTokenType.Ignore, 2 },
         { new Regex(@"\s+"), DicTokenType.Ignore }
     };
     Rules.AddEndToken(DicTokenType.EOF);
     Rules.IgnoreRules.Add(DicTokenType.Ignore);
 }
Example #7
0
        private static Parser CreateParser()
        {
            // Create the object tree without DI Framework
            var expressionParser = new ExpressionParser();
            var factorParser     = new FactorParser(expressionParser);
            var termParser       = new TermParser(factorParser);

            expressionParser.TermParser = termParser;
            var lexerRules  = new LexerRules();
            var tokenizer   = new Tokenizer(lexerRules, s => new LexerReader(s), lexems => new LinePositionCalculator(lexems));
            var tokenWalker = new TokenWalker(tokenizer, () => new EpsilonToken(), lexems => new LinePositionCalculator(lexems));

            return(new Parser(tokenWalker, expressionParser));
        }
Example #8
0
        public static Parser Create()
        {
            // Create the object tree without DI Framework
            var expressionParser  = new ExpressionParser();
            var applicationParser = new ApplicationParser(expressionParser);

            expressionParser.ApplicationParser = applicationParser;

            var lexerRules  = new LexerRules();
            var tokenizer   = new Tokenizer(lexerRules, s => new LexerReader(s), lexems => new LinePositionCalculator(lexems));
            var tokenWalker = new TokenWalker(tokenizer, () => new EpsilonToken(), lexems => new LinePositionCalculator(lexems));

            return(new Parser(tokenWalker, applicationParser));
        }
Example #9
0
		public Lexer(string text)
		{
			this.text = text;
			//Console.WriteLine(text);

			RegexOptions defaultOptions = RegexOptions.Compiled;

			Regex whitespaceRegex = new Regex(@"\s+", defaultOptions);
			Regex commentRegex = new Regex(@"\/\/.*", defaultOptions | RegexOptions.Multiline);

			Regex numberRegex = new Regex(@"-?\d+(..\d+)?", defaultOptions);
			Regex stringRegex = new Regex(@""".*?(?<!\\)\""", defaultOptions);

			lexerRules = new LexerRules<TokenType>
			{
				{"(", TokenType.LeftParen},
				{")", TokenType.RightParen},
				{",", TokenType.Comma},
				{new Regex(@"\bis\b", defaultOptions), TokenType.Assign},
				{new Regex(@"\bplus\b", defaultOptions), TokenType.Plus},
				{new Regex(@"\bminus\b", defaultOptions), TokenType.Minus},
				{new Regex(@"\bmultiplied by\b", defaultOptions), TokenType.Multiply},
				{new Regex(@"\bdivided by\b", defaultOptions), TokenType.Divide},
				{@"\", TokenType.ForwardSlash},
				{new Regex(@"\bto the power of\b", defaultOptions), TokenType.Exponent},
				{new Regex(@"\bnot\b", defaultOptions), TokenType.Exclam},
				{new Regex(@"\bequals\b", defaultOptions), TokenType.Equals},
				{"[", TokenType.LeftSquare},
				{"]", TokenType.RightSquare},
				{numberRegex, TokenType.Numeral},
				{stringRegex, TokenType.String},
				{new Regex(@"\b(true|false)\b", defaultOptions | RegexOptions.IgnoreCase), TokenType.Boolean},
				{new Regex(@"\bfunction\b", defaultOptions), TokenType.Function},
				{new Regex(@"\bend\b", defaultOptions), TokenType.End},
				{new Regex(@"\bif\b", defaultOptions), TokenType.If},
				{new Regex(@"\bthen\b", defaultOptions), TokenType.Then},
				{new Regex(@"\btelse\b", defaultOptions), TokenType.Else},
				{new Regex(@"\breturn\b", defaultOptions), TokenType.Return},

				{commentRegex, TokenType.Ignore},
				{whitespaceRegex, TokenType.Ignore}
			};
			lexerRules.AddEndToken(TokenType.EOF);
			lexerRules.AddUndefinedCaptureRule(TokenType.Name, TruncatePadding);

			lexerRules.IgnoreRules.Add(TokenType.Ignore);
		}
Example #10
0
 static RantLexer()
 {
     Rules = new LexerRules <R>
     {
         { EscapeRegex, R.EscapeSequence },
         { RegexRegex, R.Regex },
         { ConstantLiteralRegex, R.ConstantLiteral },
         { "[", R.LeftSquare }, { "]", R.RightSquare },
         { "{", R.LeftCurly }, { "}", R.RightCurly },
         { "<", R.LeftAngle }, { ">", R.RightAngle },
         { "|", R.Pipe },
         { ";", R.Semicolon },
         { ":", R.Colon },
         { "@", R.At },
         { "?", R.Question },
         { "::", R.DoubleColon },
         { "?!", R.Without },
         { "-", R.Hyphen },
         { SymbolCodes.EnDash, R.Text },
         { SymbolCodes.EmDash, R.Text },
         { SymbolCodes.Copyright, R.Text, true },
         { SymbolCodes.RegisteredTM, R.Text, true },
         { SymbolCodes.Trademark, R.Text, true },
         { SymbolCodes.Eszett, R.Text, true },
         { SymbolCodes.Bullet, R.Text, true },
         { "!", R.Exclamation },
         { "$", R.Dollar },
         { "=", R.Equal },
         { "&", R.Ampersand },
         { "%", R.Percent },
         { "+", R.Plus },
         { "^", R.Caret },
         { "`", R.Backtick },
         { SyllableRangeRegex, R.RangeLiteral },
         { WeightRegex, R.Weight },
         { CommentRegex, R.Ignore, 3 },
         { BlackspaceRegex, R.Ignore, 2 },
         { WhitespaceRegex, R.Whitespace }
     };
     Rules.AddUndefinedCaptureRule(R.Text, TruncatePadding);
     Rules.AddEndToken(R.EOF);
     Rules.IgnoreRules.Add(R.Ignore);
 }
Example #11
0
        public static SolutionParser Create()
        {
            // Create the object tree without DI Framework
            var lexerRules = new LexerRules();
            var tokenizer  = new Tokenizer(
                lexerRules: lexerRules,
                newLexerReader: s => new LexerReader(s),
                newLinePositionCalculator: l => new LinePositionCalculator(l));
            var tokenWalker = new TokenWalker(
                tokenizer: tokenizer,
                newEpsilonToken: () => new EpsilonToken(),
                newLinePositionCalculator: l => new LinePositionCalculator(l));
            var variableParser      = new VariableParser();
            var headerParser        = new HeaderParser(variableParser);
            var projectParser       = new ProjectParser();
            var globalSectionParser = new GlobalSectionParser();

            return(new SolutionParser(tokenWalker, headerParser, projectParser, globalSectionParser));
        }
        public void GivenLambdaCalculusSourceTheTokenizerGivesAUsefulTokenStream()
        {
            var lexerRules  = new LexerRules();
            var tokenizer   = new Tokenizer(lexerRules, s => new LexerReader(s), lexems => new LinePositionCalculator(lexems));
            var tokenWalker = new TokenWalker(tokenizer, () => new EpsilonToken(), lexems => new LinePositionCalculator(lexems));

            tokenWalker.Scan(@"Ī»s.(Ī»z.(s z))");

            Assert.IsType <LambdaToken>(tokenWalker.Pop().Token);
            Assert.IsType <IdentifierToken>(tokenWalker.Pop().Token);
            Assert.IsType <DotToken>(tokenWalker.Pop().Token);
            Assert.IsType <OpenParenthesisToken>(tokenWalker.Pop().Token);
            Assert.IsType <LambdaToken>(tokenWalker.Pop().Token);
            Assert.IsType <IdentifierToken>(tokenWalker.Pop().Token);
            Assert.IsType <DotToken>(tokenWalker.Pop().Token);
            Assert.IsType <OpenParenthesisToken>(tokenWalker.Pop().Token);
            Assert.IsType <IdentifierToken>(tokenWalker.Pop().Token);
            Assert.IsType <WhiteSpaceToken>(tokenWalker.Pop().Token);
            Assert.IsType <IdentifierToken>(tokenWalker.Pop().Token);
            Assert.IsType <ClosedParenthesisToken>(tokenWalker.Pop().Token);
            Assert.IsType <ClosedParenthesisToken>(tokenWalker.Pop().Token);
        }
Example #13
0
        /// <summary>
        /// Reads the next token from the current position, then advances the position past it.
        /// </summary>
        /// <typeparam name="T">The token identifier type to use.</typeparam>
        /// <param name="rules">The lexer rules to use.</param>
        /// <returns></returns>

        public Token <T> ReadToken <T>(LexerRules <T> rules) where T : struct
        {
readStart:

            if (EndOfStringe)
            {
                if (rules.EndToken != null && !rules.IgnoreRules.Contains(rules.EndToken.Item2))
                {
                    return(new Token <T>(rules.EndToken.Item2, _stringe.Substringe(_pos, 0)));
                }

                throw new InvalidOperationException("Unexpected end of input.");
            }

            // Indicates if undefined tokens should be created
            bool captureUndef = rules.UndefinedCaptureRule != null;

            // Tracks the beginning of the undefined token content
            int u = _pos;

            do
            {
                // If we've reached the end, return undefined token, if present.
                if (EndOfStringe && captureUndef && u < _pos)
                {
                    if (rules.IgnoreRules.Contains(rules.UndefinedCaptureRule.Item2))
                    {
                        goto readStart;
                    }
                    return(new Token <T>(rules.UndefinedCaptureRule.Item2, rules.UndefinedCaptureRule.Item1(_stringe.Slice(u, _pos))));
                }

                // Check high priority symbol rules
                foreach (var t in rules.HighSymbols.Where(t => IsNext(t.Item1, t.Item3)))
                {
                    // Return undefined token if present
                    if (captureUndef && u < _pos)
                    {
                        if (rules.IgnoreRules.Contains(rules.UndefinedCaptureRule.Item2))
                        {
                            goto readStart;
                        }
                        return(new Token <T>(rules.UndefinedCaptureRule.Item2, rules.UndefinedCaptureRule.Item1(_stringe.Slice(u, _pos))));
                    }

                    // Return symbol token
                    var c = _stringe.Substringe(_pos, t.Item1.Length);
                    _pos += t.Item1.Length;
                    if (rules.IgnoreRules.Contains(t.Item2))
                    {
                        goto readStart;
                    }
                    return(new Token <T>(t.Item2, c));
                }

                const string tokenGroupName = "value";

                // Check regex rules
                if (rules.RegexList.Any())
                {
                    Match longestMatch = null;
                    var   id           = default(T);

                    // Find the longest match, if any.
                    foreach (var re in rules.RegexList)
                    {
                        var match = re.Item1.Match(_stringe.Value, _pos);
                        if (match.Success && match.Index == _pos && (longestMatch == null || match.Length > longestMatch.Length))
                        {
                            longestMatch = match;
                            id           = re.Item2.GetValue(match);
                        }
                    }

                    // If there was a match, generate a token.
                    if (longestMatch != null)
                    {
                        // Return undefined token if present
                        if (captureUndef && u < _pos)
                        {
                            if (rules.IgnoreRules.Contains(rules.UndefinedCaptureRule.Item2))
                            {
                                goto readStart;
                            }
                            return(new Token <T>(rules.UndefinedCaptureRule.Item2, rules.UndefinedCaptureRule.Item1(_stringe.Slice(u, _pos))));
                        }

                        // Return longest match, narrow down to <value> group if available.
                        var group = longestMatch.Groups[tokenGroupName];
                        _pos += longestMatch.Length;

                        if (group.Success)
                        {
                            if (rules.IgnoreRules.Contains(id))
                            {
                                goto readStart;
                            }
                            return(new Token <T>(id, _stringe.Substringe(group.Index, group.Length)));
                        }

                        if (rules.IgnoreRules.Contains(id))
                        {
                            goto readStart;
                        }
                        return(new Token <T>(id, _stringe.Substringe(longestMatch.Index, longestMatch.Length)));
                    }
                }

                // Check normal priority symbol rules
                foreach (var t in rules.NormalSymbols.Where(t => IsNext(t.Item1, t.Item3)))
                {
                    // Return undefined token if present
                    if (captureUndef && u < _pos)
                    {
                        if (rules.IgnoreRules.Contains(rules.UndefinedCaptureRule.Item2))
                        {
                            goto readStart;
                        }
                        return(new Token <T>(rules.UndefinedCaptureRule.Item2, rules.UndefinedCaptureRule.Item1(_stringe.Slice(u, _pos))));
                    }

                    // Return symbol token
                    var c = _stringe.Substringe(_pos, t.Item1.Length);
                    _pos += t.Item1.Length;
                    if (rules.IgnoreRules.Contains(t.Item2))
                    {
                        goto readStart;
                    }
                    return(new Token <T>(t.Item2, c));
                }

                _pos++;

                if (!captureUndef)
                {
                    var bad = _stringe.Slice(u, _pos);
                    throw new InvalidOperationException(String.Concat("(Ln ", bad.Line, ", Col ", bad.Column, ") Invalid token '", bad, "'"));
                }
            } while (captureUndef);

            throw new InvalidOperationException("This should never happen.");
        }