static Lexer() { Rules = new LexerRules<MathTokenType> { {"+", MathTokenType.Plus}, {"-", MathTokenType.Minus}, {"*", MathTokenType.Asterisk}, {"/", MathTokenType.Slash}, {"^", MathTokenType.Caret}, {"(", MathTokenType.LeftParen}, {")", MathTokenType.RightParen}, {"++", MathTokenType.Increment}, {"--", MathTokenType.Decrement}, {"%", MathTokenType.Modulo}, {"=", MathTokenType.Equals}, {"$=", MathTokenType.Swap}, {"+=", MathTokenType.AddAssign}, {"-=", MathTokenType.SubAssign}, {"*=", MathTokenType.MulAssign}, {"/=", MathTokenType.DivAssign}, {"%=", MathTokenType.ModAssign}, {"^=", MathTokenType.PowAssign}, {new Regex(@"-?(\d+(\.\d+)?|\.\d+)"), MathTokenType.Number}, {new Regex(@"[a-zA-Z_][a-zA-Z0-9_]*"), MathTokenType.Name} }; Rules.AddEndToken(MathTokenType.End); }
static Lexer() { Rules = new LexerRules<TokenType> { {EscapeRegex, TokenType.EscapeSequence}, {RegexRegex, TokenType.Regex}, {ConstantLiteralRegex, TokenType.ConstantLiteral}, {"[", TokenType.LeftSquare}, {"]", TokenType.RightSquare}, {"{", TokenType.LeftCurly}, {"}", TokenType.RightCurly}, {"(", TokenType.LeftParen}, {")", TokenType.RightParen}, {"<", TokenType.LeftAngle}, {">", TokenType.RightAngle}, {"|", TokenType.Pipe}, {";", TokenType.Semicolon}, {":", TokenType.Colon}, {"@", TokenType.At}, {"?", TokenType.Question}, {"::", TokenType.DoubleColon}, {"?!", TokenType.Without}, {"-", TokenType.Hyphen}, {"!", TokenType.Exclamation}, {"$", TokenType.Dollar}, {CommentRegex, TokenType.Ignore, 3}, {BlackspaceRegex, TokenType.Ignore, 2}, {WhitespaceRegex, TokenType.Whitespace} }; Rules.AddUndefinedCaptureRule(TokenType.Text, TruncatePadding); Rules.AddEndToken(TokenType.EOF); Rules.IgnoreRules.Add(TokenType.Ignore); }
static RantLexer() { Rules = new LexerRules <R> { { EscapeRegex, R.EscapeSequence }, { RegexRegex, R.Regex }, { ConstantLiteralRegex, R.ConstantLiteral }, { "[", R.LeftSquare }, { "]", R.RightSquare }, { "{", R.LeftCurly }, { "}", R.RightCurly }, { "(", R.LeftParen }, { ")", R.RightParen }, { "<", R.LeftAngle }, { ">", R.RightAngle }, { "|", R.Pipe }, { ";", R.Semicolon }, { ":", R.Colon }, { "@", R.At }, { "?", R.Question }, { "::", R.DoubleColon }, { "?!", R.Without }, { "-", R.Hyphen }, { "!", R.Exclamation }, { "$", R.Dollar }, { "=", R.Equal }, { "&", R.Ampersand }, { "%", R.Percent }, { "+", R.Plus }, { "^", R.Caret }, { WeightRegex, R.Weight }, { CommentRegex, R.Ignore, 3 }, { BlackspaceRegex, R.Ignore, 2 }, { WhitespaceRegex, R.Whitespace } }; Rules.AddUndefinedCaptureRule(R.Text, TruncatePadding); Rules.AddEndToken(R.EOF); Rules.IgnoreRules.Add(R.Ignore); }
static Lexer() { Rules = new LexerRules <RMathToken> { { "+", RMathToken.Plus }, { "-", RMathToken.Minus }, { "*", RMathToken.Asterisk }, { "/", RMathToken.Slash }, { "^", RMathToken.Caret }, { "(", RMathToken.LeftParen }, { ")", RMathToken.RightParen }, { "++", RMathToken.Increment }, { "--", RMathToken.Decrement }, { "%", RMathToken.Modulo }, { "=", RMathToken.Equals }, { "$=", RMathToken.Swap }, { "+=", RMathToken.AddAssign }, { "-=", RMathToken.SubAssign }, { "*=", RMathToken.MulAssign }, { "/=", RMathToken.DivAssign }, { "%=", RMathToken.ModAssign }, { "^=", RMathToken.PowAssign }, { "|", RMathToken.Pipe }, { new Regex(@"(\d+(\.\d+)?|\.\d+)"), RMathToken.Number }, { new Regex(@"[a-zA-Z_][a-zA-Z0-9_]*"), RMathToken.Name } }; Rules.AddEndToken(RMathToken.End); }
static Lexer() { Rules = new LexerRules <TokenType> { { EscapeRegex, TokenType.EscapeSequence }, { RegexRegex, TokenType.Regex }, { ConstantLiteralRegex, TokenType.ConstantLiteral }, { "[", TokenType.LeftSquare }, { "]", TokenType.RightSquare }, { "{", TokenType.LeftCurly }, { "}", TokenType.RightCurly }, { "(", TokenType.LeftParen }, { ")", TokenType.RightParen }, { "<", TokenType.LeftAngle }, { ">", TokenType.RightAngle }, { "|", TokenType.Pipe }, { ";", TokenType.Semicolon }, { ":", TokenType.Colon }, { "@", TokenType.At }, { "?", TokenType.Question }, { "::", TokenType.DoubleColon }, { "?!", TokenType.Without }, { "-", TokenType.Hyphen }, { "!", TokenType.Exclamation }, { "$", TokenType.Dollar }, { CommentRegex, TokenType.Ignore, 3 }, { BlackspaceRegex, TokenType.Ignore, 2 }, { WhitespaceRegex, TokenType.Whitespace } }; Rules.AddUndefinedCaptureRule(TokenType.Text, TruncatePadding); Rules.AddEndToken(TokenType.EOF); Rules.IgnoreRules.Add(TokenType.Ignore); }
static Dic2Lexer() { Rules = new LexerRules <DicTokenType> { { new Regex(@"\#\s*(?<value>.*?)[\s\r]*(?=\#|\||\>|\@|$)", DicRegexOptions), DicTokenType.Directive, 2 }, { new Regex(@"\|\s*(?<value>.*?)[\s\r]*(?=\#|\||\>|\@|$)", DicRegexOptions), DicTokenType.Property, 2 }, { new Regex(@"\>\s*(?<value>.*?)[\s\r]*(?=\#|\||\>|\@|$)", DicRegexOptions), DicTokenType.Entry, 2 }, { new Regex(@"\@.*?$", DicRegexOptions | RegexOptions.Multiline), DicTokenType.Ignore, 2 }, { new Regex(@"\s+"), DicTokenType.Ignore } }; Rules.AddEndToken(DicTokenType.EOF); Rules.IgnoreRules.Add(DicTokenType.Ignore); }
private static Parser CreateParser() { // Create the object tree without DI Framework var expressionParser = new ExpressionParser(); var factorParser = new FactorParser(expressionParser); var termParser = new TermParser(factorParser); expressionParser.TermParser = termParser; var lexerRules = new LexerRules(); var tokenizer = new Tokenizer(lexerRules, s => new LexerReader(s), lexems => new LinePositionCalculator(lexems)); var tokenWalker = new TokenWalker(tokenizer, () => new EpsilonToken(), lexems => new LinePositionCalculator(lexems)); return(new Parser(tokenWalker, expressionParser)); }
public static Parser Create() { // Create the object tree without DI Framework var expressionParser = new ExpressionParser(); var applicationParser = new ApplicationParser(expressionParser); expressionParser.ApplicationParser = applicationParser; var lexerRules = new LexerRules(); var tokenizer = new Tokenizer(lexerRules, s => new LexerReader(s), lexems => new LinePositionCalculator(lexems)); var tokenWalker = new TokenWalker(tokenizer, () => new EpsilonToken(), lexems => new LinePositionCalculator(lexems)); return(new Parser(tokenWalker, applicationParser)); }
public Lexer(string text) { this.text = text; //Console.WriteLine(text); RegexOptions defaultOptions = RegexOptions.Compiled; Regex whitespaceRegex = new Regex(@"\s+", defaultOptions); Regex commentRegex = new Regex(@"\/\/.*", defaultOptions | RegexOptions.Multiline); Regex numberRegex = new Regex(@"-?\d+(..\d+)?", defaultOptions); Regex stringRegex = new Regex(@""".*?(?<!\\)\""", defaultOptions); lexerRules = new LexerRules<TokenType> { {"(", TokenType.LeftParen}, {")", TokenType.RightParen}, {",", TokenType.Comma}, {new Regex(@"\bis\b", defaultOptions), TokenType.Assign}, {new Regex(@"\bplus\b", defaultOptions), TokenType.Plus}, {new Regex(@"\bminus\b", defaultOptions), TokenType.Minus}, {new Regex(@"\bmultiplied by\b", defaultOptions), TokenType.Multiply}, {new Regex(@"\bdivided by\b", defaultOptions), TokenType.Divide}, {@"\", TokenType.ForwardSlash}, {new Regex(@"\bto the power of\b", defaultOptions), TokenType.Exponent}, {new Regex(@"\bnot\b", defaultOptions), TokenType.Exclam}, {new Regex(@"\bequals\b", defaultOptions), TokenType.Equals}, {"[", TokenType.LeftSquare}, {"]", TokenType.RightSquare}, {numberRegex, TokenType.Numeral}, {stringRegex, TokenType.String}, {new Regex(@"\b(true|false)\b", defaultOptions | RegexOptions.IgnoreCase), TokenType.Boolean}, {new Regex(@"\bfunction\b", defaultOptions), TokenType.Function}, {new Regex(@"\bend\b", defaultOptions), TokenType.End}, {new Regex(@"\bif\b", defaultOptions), TokenType.If}, {new Regex(@"\bthen\b", defaultOptions), TokenType.Then}, {new Regex(@"\btelse\b", defaultOptions), TokenType.Else}, {new Regex(@"\breturn\b", defaultOptions), TokenType.Return}, {commentRegex, TokenType.Ignore}, {whitespaceRegex, TokenType.Ignore} }; lexerRules.AddEndToken(TokenType.EOF); lexerRules.AddUndefinedCaptureRule(TokenType.Name, TruncatePadding); lexerRules.IgnoreRules.Add(TokenType.Ignore); }
static RantLexer() { Rules = new LexerRules <R> { { EscapeRegex, R.EscapeSequence }, { RegexRegex, R.Regex }, { ConstantLiteralRegex, R.ConstantLiteral }, { "[", R.LeftSquare }, { "]", R.RightSquare }, { "{", R.LeftCurly }, { "}", R.RightCurly }, { "<", R.LeftAngle }, { ">", R.RightAngle }, { "|", R.Pipe }, { ";", R.Semicolon }, { ":", R.Colon }, { "@", R.At }, { "?", R.Question }, { "::", R.DoubleColon }, { "?!", R.Without }, { "-", R.Hyphen }, { SymbolCodes.EnDash, R.Text }, { SymbolCodes.EmDash, R.Text }, { SymbolCodes.Copyright, R.Text, true }, { SymbolCodes.RegisteredTM, R.Text, true }, { SymbolCodes.Trademark, R.Text, true }, { SymbolCodes.Eszett, R.Text, true }, { SymbolCodes.Bullet, R.Text, true }, { "!", R.Exclamation }, { "$", R.Dollar }, { "=", R.Equal }, { "&", R.Ampersand }, { "%", R.Percent }, { "+", R.Plus }, { "^", R.Caret }, { "`", R.Backtick }, { SyllableRangeRegex, R.RangeLiteral }, { WeightRegex, R.Weight }, { CommentRegex, R.Ignore, 3 }, { BlackspaceRegex, R.Ignore, 2 }, { WhitespaceRegex, R.Whitespace } }; Rules.AddUndefinedCaptureRule(R.Text, TruncatePadding); Rules.AddEndToken(R.EOF); Rules.IgnoreRules.Add(R.Ignore); }
public static SolutionParser Create() { // Create the object tree without DI Framework var lexerRules = new LexerRules(); var tokenizer = new Tokenizer( lexerRules: lexerRules, newLexerReader: s => new LexerReader(s), newLinePositionCalculator: l => new LinePositionCalculator(l)); var tokenWalker = new TokenWalker( tokenizer: tokenizer, newEpsilonToken: () => new EpsilonToken(), newLinePositionCalculator: l => new LinePositionCalculator(l)); var variableParser = new VariableParser(); var headerParser = new HeaderParser(variableParser); var projectParser = new ProjectParser(); var globalSectionParser = new GlobalSectionParser(); return(new SolutionParser(tokenWalker, headerParser, projectParser, globalSectionParser)); }
public void GivenLambdaCalculusSourceTheTokenizerGivesAUsefulTokenStream() { var lexerRules = new LexerRules(); var tokenizer = new Tokenizer(lexerRules, s => new LexerReader(s), lexems => new LinePositionCalculator(lexems)); var tokenWalker = new TokenWalker(tokenizer, () => new EpsilonToken(), lexems => new LinePositionCalculator(lexems)); tokenWalker.Scan(@"λs.(λz.(s z))"); Assert.IsType <LambdaToken>(tokenWalker.Pop().Token); Assert.IsType <IdentifierToken>(tokenWalker.Pop().Token); Assert.IsType <DotToken>(tokenWalker.Pop().Token); Assert.IsType <OpenParenthesisToken>(tokenWalker.Pop().Token); Assert.IsType <LambdaToken>(tokenWalker.Pop().Token); Assert.IsType <IdentifierToken>(tokenWalker.Pop().Token); Assert.IsType <DotToken>(tokenWalker.Pop().Token); Assert.IsType <OpenParenthesisToken>(tokenWalker.Pop().Token); Assert.IsType <IdentifierToken>(tokenWalker.Pop().Token); Assert.IsType <WhiteSpaceToken>(tokenWalker.Pop().Token); Assert.IsType <IdentifierToken>(tokenWalker.Pop().Token); Assert.IsType <ClosedParenthesisToken>(tokenWalker.Pop().Token); Assert.IsType <ClosedParenthesisToken>(tokenWalker.Pop().Token); }
/// <summary> /// Reads the next token from the current position, then advances the position past it. /// </summary> /// <typeparam name="T">The token identifier type to use.</typeparam> /// <param name="rules">The lexer rules to use.</param> /// <returns></returns> public Token <T> ReadToken <T>(LexerRules <T> rules) where T : struct { readStart: if (EndOfStringe) { if (rules.EndToken != null && !rules.IgnoreRules.Contains(rules.EndToken.Item2)) { return(new Token <T>(rules.EndToken.Item2, _stringe.Substringe(_pos, 0))); } throw new InvalidOperationException("Unexpected end of input."); } // Indicates if undefined tokens should be created bool captureUndef = rules.UndefinedCaptureRule != null; // Tracks the beginning of the undefined token content int u = _pos; do { // If we've reached the end, return undefined token, if present. if (EndOfStringe && captureUndef && u < _pos) { if (rules.IgnoreRules.Contains(rules.UndefinedCaptureRule.Item2)) { goto readStart; } return(new Token <T>(rules.UndefinedCaptureRule.Item2, rules.UndefinedCaptureRule.Item1(_stringe.Slice(u, _pos)))); } // Check high priority symbol rules foreach (var t in rules.HighSymbols.Where(t => IsNext(t.Item1, t.Item3))) { // Return undefined token if present if (captureUndef && u < _pos) { if (rules.IgnoreRules.Contains(rules.UndefinedCaptureRule.Item2)) { goto readStart; } return(new Token <T>(rules.UndefinedCaptureRule.Item2, rules.UndefinedCaptureRule.Item1(_stringe.Slice(u, _pos)))); } // Return symbol token var c = _stringe.Substringe(_pos, t.Item1.Length); _pos += t.Item1.Length; if (rules.IgnoreRules.Contains(t.Item2)) { goto readStart; } return(new Token <T>(t.Item2, c)); } const string tokenGroupName = "value"; // Check regex rules if (rules.RegexList.Any()) { Match longestMatch = null; var id = default(T); // Find the longest match, if any. foreach (var re in rules.RegexList) { var match = re.Item1.Match(_stringe.Value, _pos); if (match.Success && match.Index == _pos && (longestMatch == null || match.Length > longestMatch.Length)) { longestMatch = match; id = re.Item2.GetValue(match); } } // If there was a match, generate a token. if (longestMatch != null) { // Return undefined token if present if (captureUndef && u < _pos) { if (rules.IgnoreRules.Contains(rules.UndefinedCaptureRule.Item2)) { goto readStart; } return(new Token <T>(rules.UndefinedCaptureRule.Item2, rules.UndefinedCaptureRule.Item1(_stringe.Slice(u, _pos)))); } // Return longest match, narrow down to <value> group if available. var group = longestMatch.Groups[tokenGroupName]; _pos += longestMatch.Length; if (group.Success) { if (rules.IgnoreRules.Contains(id)) { goto readStart; } return(new Token <T>(id, _stringe.Substringe(group.Index, group.Length))); } if (rules.IgnoreRules.Contains(id)) { goto readStart; } return(new Token <T>(id, _stringe.Substringe(longestMatch.Index, longestMatch.Length))); } } // Check normal priority symbol rules foreach (var t in rules.NormalSymbols.Where(t => IsNext(t.Item1, t.Item3))) { // Return undefined token if present if (captureUndef && u < _pos) { if (rules.IgnoreRules.Contains(rules.UndefinedCaptureRule.Item2)) { goto readStart; } return(new Token <T>(rules.UndefinedCaptureRule.Item2, rules.UndefinedCaptureRule.Item1(_stringe.Slice(u, _pos)))); } // Return symbol token var c = _stringe.Substringe(_pos, t.Item1.Length); _pos += t.Item1.Length; if (rules.IgnoreRules.Contains(t.Item2)) { goto readStart; } return(new Token <T>(t.Item2, c)); } _pos++; if (!captureUndef) { var bad = _stringe.Slice(u, _pos); throw new InvalidOperationException(String.Concat("(Ln ", bad.Line, ", Col ", bad.Column, ") Invalid token '", bad, "'")); } } while (captureUndef); throw new InvalidOperationException("This should never happen."); }