private readonly List<TokenType> _skipList; // tokens to be skipped public Scanner() { Patterns = new Dictionary<TokenType, Regex>(); _tokens = new List<TokenType>(); _lookAheadToken = null; Skipped = new List<Token>(); _skipList = new List<TokenType> { TokenType.WHITESPACE }; var regex = new Regex(@"\?", RegexOptions.Compiled); Patterns.Add(TokenType.QUESTION, regex); _tokens.Add(TokenType.QUESTION); regex = new Regex(":", RegexOptions.Compiled); Patterns.Add(TokenType.COLON, regex); _tokens.Add(TokenType.COLON); regex = new Regex("[a-z]+", RegexOptions.Compiled); Patterns.Add(TokenType.ANY, regex); _tokens.Add(TokenType.ANY); regex = new Regex(@"\|\||or", RegexOptions.Compiled); Patterns.Add(TokenType.OR, regex); _tokens.Add(TokenType.OR); regex = new Regex(@"&&|and", RegexOptions.Compiled); Patterns.Add(TokenType.AND, regex); _tokens.Add(TokenType.AND); regex = new Regex(@"<|<=|>|>=|==|!=", RegexOptions.Compiled); Patterns.Add(TokenType.COMPARISON, regex); _tokens.Add(TokenType.COMPARISON); regex = new Regex("[-+&]", RegexOptions.Compiled); Patterns.Add(TokenType.ADDITIVE, regex); _tokens.Add(TokenType.ADDITIVE); regex = new Regex(@"[*/%]|mod|div", RegexOptions.Compiled); Patterns.Add(TokenType.MULTIPLICATIVE, regex); _tokens.Add(TokenType.MULTIPLICATIVE); regex = new Regex("-|!|not", RegexOptions.Compiled); Patterns.Add(TokenType.UNARY, regex); _tokens.Add(TokenType.UNARY); regex = new Regex(@"\.", RegexOptions.Compiled); Patterns.Add(TokenType.DOT, regex); _tokens.Add(TokenType.DOT); regex = new Regex("[a-zA-Z_][a-zA-Z_0-9]*", RegexOptions.Compiled); Patterns.Add(TokenType.IDENTIFIER, regex); _tokens.Add(TokenType.IDENTIFIER); regex = new Regex("[a-zA-Z_][a-zA-Z_0-9]*:", RegexOptions.Compiled); Patterns.Add(TokenType.SEMICOLONNAME, regex); _tokens.Add(TokenType.SEMICOLONNAME); regex = new Regex("as ([a-zA-Z_][a-zA-Z_0-9]*)", RegexOptions.Compiled); Patterns.Add(TokenType.ASNAME, regex); _tokens.Add(TokenType.ASNAME); regex = new Regex(@"([""'])[^""\\\r\n]*(?:\\.[^""\\\r\n]*)*\1", RegexOptions.Compiled); Patterns.Add(TokenType.STRING, regex); _tokens.Add(TokenType.STRING); regex = new Regex(@"[0-9]+", RegexOptions.Compiled); Patterns.Add(TokenType.INTEGER, regex); _tokens.Add(TokenType.INTEGER); regex = new Regex(@"[0-9]*\.[0-9]+", RegexOptions.Compiled); Patterns.Add(TokenType.REAL, regex); _tokens.Add(TokenType.REAL); regex = new Regex(@"\(", RegexOptions.Compiled); Patterns.Add(TokenType.OPENPAREN, regex); _tokens.Add(TokenType.OPENPAREN); regex = new Regex(@"\)", RegexOptions.Compiled); Patterns.Add(TokenType.CLOSEPAREN, regex); _tokens.Add(TokenType.CLOSEPAREN); regex = new Regex(@"\{", RegexOptions.Compiled); Patterns.Add(TokenType.OPENCURLY, regex); _tokens.Add(TokenType.OPENCURLY); regex = new Regex(@"}", RegexOptions.Compiled); Patterns.Add(TokenType.CLOSECURLY, regex); _tokens.Add(TokenType.CLOSECURLY); regex = new Regex("in", RegexOptions.Compiled); Patterns.Add(TokenType.IN, regex); _tokens.Add(TokenType.IN); regex = new Regex(",", RegexOptions.Compiled); Patterns.Add(TokenType.COMMA, regex); _tokens.Add(TokenType.COMMA); regex = new Regex(@"\[", RegexOptions.Compiled); Patterns.Add(TokenType.OPENSQUARE, regex); _tokens.Add(TokenType.OPENSQUARE); regex = new Regex(@"]", RegexOptions.Compiled); Patterns.Add(TokenType.CLOSESQUARE, regex); _tokens.Add(TokenType.CLOSESQUARE); regex = new Regex("new", RegexOptions.Compiled); Patterns.Add(TokenType.NEW, regex); _tokens.Add(TokenType.NEW); regex = new Regex(@"^$", RegexOptions.Compiled); Patterns.Add(TokenType.EOF, regex); _tokens.Add(TokenType.EOF); regex = new Regex(@"\s+", RegexOptions.Compiled); Patterns.Add(TokenType.WHITESPACE, regex); _tokens.Add(TokenType.WHITESPACE); }
public void UpdateRange(Token token) { if (token.StartPos < StartPos) StartPos = token.StartPos; if (token.EndPos > EndPos) EndPos = token.EndPos; }
/// <summary> /// executes a lookahead of the next token /// and will advance the scan on the input string /// </summary> /// <returns></returns> public Token Scan(params TokenType[] expectedtokens) { Token tok = LookAhead(expectedtokens); // temporarely retrieve the lookahead _lookAheadToken = null; // reset lookahead token, so scanning will continue StartPos = tok.EndPos; EndPos = tok.EndPos; // set the tokenizer to the new scan position CurrentLine = tok.Line + (tok.Text.Length - tok.Text.Replace("\n", "").Length); CurrentFile = tok.File; return tok; }
/// <summary> /// returns token with longest best match /// </summary> /// <returns></returns> public Token LookAhead(params TokenType[] expectedtokens) { int startPos = StartPos; int endPos = EndPos; int currentline = CurrentLine; string currentFile = CurrentFile; Token tok; List<TokenType> scantokens; // this prevents double scanning and matching // increased performance if (_lookAheadToken != null && _lookAheadToken.Type != TokenType._UNDETERMINED_ && _lookAheadToken.Type != TokenType._NONE_) return _lookAheadToken; // if no scantokens specified, then scan for all of them (= backward compatible) if (expectedtokens.Length == 0) scantokens = _tokens; else { scantokens = new List<TokenType>(expectedtokens); scantokens.AddRange(_skipList); } do { int len = -1; TokenType index = (TokenType) int.MaxValue; string input = Input.Substring(startPos); tok = new Token(startPos, endPos); int i; for (i = 0; i < scantokens.Count; i++) { Regex r = Patterns[scantokens[i]]; Match m = r.Match(input); if (m.Success && m.Index == 0 && ((m.Length > len) || (scantokens[i] < index && m.Length == len))) { len = m.Length; index = scantokens[i]; } } if (index >= 0 && len >= 0) { tok.EndPos = startPos + len; tok.Text = Input.Substring(tok.StartPos, len); tok.Type = index; } else if (tok.StartPos == tok.EndPos) { tok.Text = tok.StartPos < Input.Length ? Input.Substring(tok.StartPos, 1) : "EOF"; } // Update the line and column count for error reporting. tok.File = currentFile; tok.Line = currentline; if (tok.StartPos < Input.Length) tok.Column = tok.StartPos - Input.LastIndexOf('\n', tok.StartPos); if (_skipList.Contains(tok.Type)) { startPos = tok.EndPos; endPos = tok.EndPos; currentline = tok.Line + (tok.Text.Length - tok.Text.Replace("\n", "").Length); currentFile = tok.File; Skipped.Add(tok); } else { // only assign to non-skipped tokens tok.Skipped = Skipped; // assign prior skips to this token Skipped = new List<Token>(); //reset skips } } while (_skipList.Contains(tok.Type)); _lookAheadToken = tok; return tok; }
public Token GetToken(TokenType type) { var t = new Token(StartPos, EndPos) { Type = type }; return t; }
public void Init(string input, string fileName) { Input = input; StartPos = 0; EndPos = 0; CurrentFile = fileName; CurrentLine = 1; CurrentColumn = 1; CurrentPosition = 0; _lookAheadToken = null; }
private void ParseObjectPropertyDefinition(out Expression expr, out string propName) { var exprPos = _token.Position; if (_token.ID == TokenId.Identifier) { propName = GetIdentifier(); var token = _token; var pos = _textPos; var ch = _ch; NextToken(); if (_token.ID != TokenId.Colon) { _token = token; _textPos = pos; _ch = ch; } else { NextToken(); expr = ParseExpression(); return; } } expr = ParseExpression(); if (TokenIdentifierIs("as")) { NextToken(); propName = GetIdentifier(); NextToken(); return; } var maybePropName = GetPropertyName(expr, exprPos); propName = maybePropName .GetOrThrow(() => ParseError(exprPos, Res.MissingAsClause)); }
private void PromoteLeftOrRight(ref Expression left, ref Expression right, Token op) { if (left.Type != right.Type) { Expression e; if ((e = PromoteExpression(right, left.Type, true)) != null) { right = e; } else if ((e = PromoteExpression(left, right.Type, true)) != null) { left = e; } else { throw IncompatibleOperandsError(op.Text, left, right, op.Position); } } }
private void ParseNameColonExpression(Token token, int pos, char ch, Exception exception, out string propName, out Expression expr) { _token = token; _textPos = pos; _ch = ch; NextToken(); propName = GetIdentifier(); NextToken(); if (_token.ID == TokenId.Colon) { NextToken(); expr = ParseExpression(); } else { throw exception; } }