public Scanner() { Regex regex; Patterns = new Dictionary<TokenType, Regex>(); Tokens = new List<TokenType>(); LookAheadToken = null; SkipList = new List<TokenType>(); SkipList.Add(TokenType.WHITESPACE); regex = new Regex(@"[0-9]+", RegexOptions.Compiled); Patterns.Add(TokenType.NUMBER, regex); Tokens.Add(TokenType.NUMBER); regex = new Regex(@"(\+|-)", RegexOptions.Compiled); Patterns.Add(TokenType.PLUSMINUS, regex); Tokens.Add(TokenType.PLUSMINUS); regex = new Regex(@"\*|/", RegexOptions.Compiled); Patterns.Add(TokenType.MULTDIV, regex); Tokens.Add(TokenType.MULTDIV); regex = new Regex(@"\(", RegexOptions.Compiled); Patterns.Add(TokenType.BROPEN, regex); Tokens.Add(TokenType.BROPEN); regex = new Regex(@"\)", RegexOptions.Compiled); Patterns.Add(TokenType.BRCLOSE, regex); Tokens.Add(TokenType.BRCLOSE); regex = new Regex(@"^$", RegexOptions.Compiled); Patterns.Add(TokenType.EOF, regex); Tokens.Add(TokenType.EOF); regex = new Regex(@"\s+", RegexOptions.Compiled); Patterns.Add(TokenType.WHITESPACE, regex); Tokens.Add(TokenType.WHITESPACE); }
public void UpdateRange(Token token) { if (token.StartPos < startpos) startpos = token.StartPos; if (token.EndPos > endpos) endpos = token.EndPos; }
private List<TokenType> SkipList; // tokens to be skipped public Scanner() { Regex regex; Patterns = new Dictionary<TokenType, Regex>(); Tokens = new List<TokenType>(); LookAheadToken = null; Skipped = new List<Token>(); SkipList = new List<TokenType>(); SkipList.Add(TokenType.SPACE); regex = new Regex(@"(?i)id:", RegexOptions.Compiled); Patterns.Add(TokenType.IdKeyword1, regex); Tokens.Add(TokenType.IdKeyword1); regex = new Regex(@"#", RegexOptions.Compiled); Patterns.Add(TokenType.IdKeyword2, regex); Tokens.Add(TokenType.IdKeyword2); regex = new Regex(@"time:", RegexOptions.Compiled); Patterns.Add(TokenType.TimeKeyword, regex); Tokens.Add(TokenType.TimeKeyword); regex = new Regex(@"status:", RegexOptions.Compiled); Patterns.Add(TokenType.StatusKeyword1, regex); Tokens.Add(TokenType.StatusKeyword1); regex = new Regex(@"state:", RegexOptions.Compiled); Patterns.Add(TokenType.StatusKeyword2, regex); Tokens.Add(TokenType.StatusKeyword2); regex = new Regex(@"comment:", RegexOptions.Compiled); Patterns.Add(TokenType.CommentKeyword1, regex); Tokens.Add(TokenType.CommentKeyword1); regex = new Regex(@"comments:", RegexOptions.Compiled); Patterns.Add(TokenType.CommentKeyword2, regex); Tokens.Add(TokenType.CommentKeyword2); regex = new Regex(@"comm:", RegexOptions.Compiled); Patterns.Add(TokenType.CommentKeyword3, regex); Tokens.Add(TokenType.CommentKeyword3); regex = new Regex(@"cmt:", RegexOptions.Compiled); Patterns.Add(TokenType.CommentKeyword4, regex); Tokens.Add(TokenType.CommentKeyword4); regex = new Regex(@"^$", RegexOptions.Compiled); Patterns.Add(TokenType.EOF, regex); Tokens.Add(TokenType.EOF); regex = new Regex(@"\s+", RegexOptions.Compiled); Patterns.Add(TokenType.SPACE, regex); Tokens.Add(TokenType.SPACE); regex = new Regex(@":", RegexOptions.Compiled); Patterns.Add(TokenType.Delimiter, regex); Tokens.Add(TokenType.Delimiter); regex = new Regex(@"[,\.\?!;]+", RegexOptions.Compiled); Patterns.Add(TokenType.PUNCTUATIONMARK, regex); Tokens.Add(TokenType.PUNCTUATIONMARK); regex = new Regex(@",", RegexOptions.Compiled); Patterns.Add(TokenType.COMMA, regex); Tokens.Add(TokenType.COMMA); regex = new Regex(@"[0-9]+", RegexOptions.Compiled); Patterns.Add(TokenType.NUMBER, regex); Tokens.Add(TokenType.NUMBER); regex = new Regex(@"\d+([\.,]\d+)?", RegexOptions.Compiled); Patterns.Add(TokenType.DECIMAL, regex); Tokens.Add(TokenType.DECIMAL); regex = new Regex(@"(?i)(.(?<!#)(?<!(id|time|status|state|comment|comm|comments|cmt):))+\s", RegexOptions.Compiled); Patterns.Add(TokenType.ANY_TEXT, regex); Tokens.Add(TokenType.ANY_TEXT); regex = new Regex(@"(?i)((?!#)(?!(id|time|status|state|comment|comm|comments|cmt):).)", RegexOptions.Compiled); Patterns.Add(TokenType.ANY_SYMBOL, regex); Tokens.Add(TokenType.ANY_SYMBOL); }
/// <summary> /// returns token with longest best match /// </summary> /// <returns></returns> public Token LookAhead(params TokenType[] expectedtokens) { int i; int startpos = StartPos; Token tok = null; List<TokenType> scantokens; // this prevents double scanning and matching // increased performance if (LookAheadToken != null && LookAheadToken.Type != TokenType._UNDETERMINED_ && LookAheadToken.Type != TokenType._NONE_) return LookAheadToken; // if no scantokens specified, then scan for all of them (= backward compatible) if (expectedtokens.Length == 0) scantokens = Tokens; else { scantokens = new List<TokenType>(expectedtokens); scantokens.AddRange(SkipList); } do { int len = -1; TokenType index = (TokenType)int.MaxValue; string input = Input.Substring(startpos); tok = new Token(startpos, EndPos); for (i = 0; i < scantokens.Count; i++) { Regex r = Patterns[scantokens[i]]; Match m = r.Match(input); if (m.Success && m.Index == 0 && ((m.Length > len) || (scantokens[i] < index && m.Length == len ))) { len = m.Length; index = scantokens[i]; } } if (index >= 0 && len >= 0) { tok.EndPos = startpos + len; tok.Text = Input.Substring(tok.StartPos, len); tok.Type = index; } else if (tok.StartPos < tok.EndPos - 1) { tok.Text = Input.Substring(tok.StartPos, 1); } if (SkipList.Contains(tok.Type)) { startpos = tok.EndPos; Skipped.Add(tok); } else { // only assign to non-skipped tokens tok.Skipped = Skipped; // assign prior skips to this token Skipped = new List<Token>(); //reset skips } } while (SkipList.Contains(tok.Type)); LookAheadToken = tok; return tok; }
/// <summary> /// executes a lookahead of the next token /// and will advance the scan on the input string /// </summary> /// <returns></returns> public Token Scan(params TokenType[] expectedtokens) { Token tok = LookAhead(expectedtokens); // temporarely retrieve the lookahead LookAheadToken = null; // reset lookahead token, so scanning will continue StartPos = tok.EndPos; EndPos = tok.EndPos; // set the tokenizer to the new scan position return tok; }
public Token GetToken(TokenType type) { Token t = new Token(this.StartPos, this.EndPos); t.Type = type; return t; }
public void Init(string input) { this.Input = input; StartPos = 0; EndPos = 0; CurrentLine = 0; CurrentColumn = 0; CurrentPosition = 0; LookAheadToken = null; }
public Scanner() { Regex regex; Patterns = new Dictionary<TokenType, Regex>(); Tokens = new List<TokenType>(); LookAheadToken = null; Skipped = new List<Token>(); SkipList = new List<TokenType>(); SkipList.Add(TokenType.WHITESPACE); SkipList.Add(TokenType.COMMENTLINE); SkipList.Add(TokenType.COMMENTBLOCK); regex = new Regex(@"\(", RegexOptions.Compiled); Patterns.Add(TokenType.BRACKETOPEN, regex); Tokens.Add(TokenType.BRACKETOPEN); regex = new Regex(@"\)", RegexOptions.Compiled); Patterns.Add(TokenType.BRACKETCLOSE, regex); Tokens.Add(TokenType.BRACKETCLOSE); regex = new Regex(@"\{[^\}]*\}([^};][^}]*\}+)*;", RegexOptions.Compiled); Patterns.Add(TokenType.CODEBLOCK, regex); Tokens.Add(TokenType.CODEBLOCK); regex = new Regex(@",", RegexOptions.Compiled); Patterns.Add(TokenType.COMMA, regex); Tokens.Add(TokenType.COMMA); regex = new Regex(@"\[", RegexOptions.Compiled); Patterns.Add(TokenType.SQUAREOPEN, regex); Tokens.Add(TokenType.SQUAREOPEN); regex = new Regex(@"\]", RegexOptions.Compiled); Patterns.Add(TokenType.SQUARECLOSE, regex); Tokens.Add(TokenType.SQUARECLOSE); regex = new Regex(@"=", RegexOptions.Compiled); Patterns.Add(TokenType.ASSIGN, regex); Tokens.Add(TokenType.ASSIGN); regex = new Regex(@"\|", RegexOptions.Compiled); Patterns.Add(TokenType.PIPE, regex); Tokens.Add(TokenType.PIPE); regex = new Regex(@";", RegexOptions.Compiled); Patterns.Add(TokenType.SEMICOLON, regex); Tokens.Add(TokenType.SEMICOLON); regex = new Regex(@"(\*|\+|\?)", RegexOptions.Compiled); Patterns.Add(TokenType.UNARYOPER, regex); Tokens.Add(TokenType.UNARYOPER); regex = new Regex(@"[a-zA-Z_][a-zA-Z0-9_]*", RegexOptions.Compiled); Patterns.Add(TokenType.IDENTIFIER, regex); Tokens.Add(TokenType.IDENTIFIER); regex = new Regex(@"[0-9]+", RegexOptions.Compiled); Patterns.Add(TokenType.INTEGER, regex); Tokens.Add(TokenType.INTEGER); regex = new Regex(@"[0-9]*\.[0-9]+", RegexOptions.Compiled); Patterns.Add(TokenType.DOUBLE, regex); Tokens.Add(TokenType.DOUBLE); regex = new Regex(@"(0x[0-9a-fA-F]{6})", RegexOptions.Compiled); Patterns.Add(TokenType.HEX, regex); Tokens.Add(TokenType.HEX); regex = new Regex(@"->", RegexOptions.Compiled); Patterns.Add(TokenType.ARROW, regex); Tokens.Add(TokenType.ARROW); regex = new Regex(@"<%\s*@", RegexOptions.Compiled); Patterns.Add(TokenType.DIRECTIVEOPEN, regex); Tokens.Add(TokenType.DIRECTIVEOPEN); regex = new Regex(@"%>", RegexOptions.Compiled); Patterns.Add(TokenType.DIRECTIVECLOSE, regex); Tokens.Add(TokenType.DIRECTIVECLOSE); regex = new Regex(@"^$", RegexOptions.Compiled); Patterns.Add(TokenType.EOF, regex); Tokens.Add(TokenType.EOF); regex = new Regex(@"@?\""(\""\""|[^\""])*\""", RegexOptions.Compiled); Patterns.Add(TokenType.STRING, regex); Tokens.Add(TokenType.STRING); regex = new Regex(@"\s+", RegexOptions.Compiled); Patterns.Add(TokenType.WHITESPACE, regex); Tokens.Add(TokenType.WHITESPACE); regex = new Regex(@"//[^\n]*\n?", RegexOptions.Compiled); Patterns.Add(TokenType.COMMENTLINE, regex); Tokens.Add(TokenType.COMMENTLINE); regex = new Regex(@"/\*[^*]*\*+(?:[^/*][^*]*\*+)*/", RegexOptions.Compiled); Patterns.Add(TokenType.COMMENTBLOCK, regex); Tokens.Add(TokenType.COMMENTBLOCK); }
/// <summary> /// executes a lookahead of the next token /// and will advance the scan on the input string /// </summary> /// <returns></returns> public Token Scan(params TokenType[] expectedtokens) { Token tok = LookAhead(expectedtokens); // temporarely retrieve the lookahead LookAheadToken = null; // reset lookahead token, so scanning will continue StartPos = tok.EndPos; EndPos = tok.EndPos; // set the tokenizer to the new scan position CurrentLine = tok.Line + (tok.Text.Length - tok.Text.Replace("\n", "").Length); CurrentFile = tok.File; return tok; }
/// <summary> /// returns token with longest best match /// </summary> /// <returns></returns> public Token LookAhead(params TokenType[] expectedtokens) { int i; int startpos = StartPos; int endpos = EndPos; int currentline = CurrentLine; string currentFile = CurrentFile; Token tok = null; List<TokenType> scantokens; // this prevents double scanning and matching // increased performance if (LookAheadToken != null && LookAheadToken.Type != TokenType._UNDETERMINED_ && LookAheadToken.Type != TokenType._NONE_) return LookAheadToken; // if no scantokens specified, then scan for all of them (= backward compatible) if (expectedtokens.Length == 0) scantokens = Tokens; else { scantokens = new List<TokenType>(expectedtokens); scantokens.AddRange(SkipList); } do { int len = -1; TokenType index = (TokenType)int.MaxValue; string input = Input.Substring(startpos); tok = new Token(startpos, endpos); for (i = 0; i < scantokens.Count; i++) { Regex r = Patterns[scantokens[i]]; Match m = r.Match(input); if (m.Success && m.Index == 0 && ((m.Length > len) || (scantokens[i] < index && m.Length == len ))) { len = m.Length; index = scantokens[i]; } } if (index >= 0 && len >= 0) { tok.EndPos = startpos + len; tok.Text = Input.Substring(tok.StartPos, len); tok.Type = index; } else if (tok.StartPos < tok.EndPos - 1) { tok.Text = Input.Substring(tok.StartPos, 1); } // Update the line and column count for error reporting. tok.File = currentFile; tok.Line = currentline; if (tok.StartPos < Input.Length) tok.Column = tok.StartPos - Input.LastIndexOf('\n', tok.StartPos); if (SkipList.Contains(tok.Type)) { startpos = tok.EndPos; endpos = tok.EndPos; currentline = tok.Line + (tok.Text.Length - tok.Text.Replace("\n", "").Length); currentFile = tok.File; Skipped.Add(tok); } else { // only assign to non-skipped tokens tok.Skipped = Skipped; // assign prior skips to this token Skipped = new List<Token>(); //reset skips } // Check to see if the parsed token wants to // alter the file and line number. if (tok.Type == FileAndLine) { var match = Patterns[tok.Type].Match(tok.Text); var fileMatch = match.Groups["File"]; if (fileMatch.Success) currentFile = fileMatch.Value; var lineMatch = match.Groups["Line"]; if (lineMatch.Success) currentline = int.Parse(lineMatch.Value); } } while (SkipList.Contains(tok.Type)); LookAheadToken = tok; return tok; }
public void Init(string input, string fileName = "") { this.Input = input; StartPos = 0; EndPos = 0; CurrentFile = fileName; CurrentLine = 1; CurrentColumn = 1; CurrentPosition = 0; LookAheadToken = null; }
public Scanner() { Regex regex; Patterns = new Dictionary<TokenType, Regex>(); Tokens = new List<TokenType>(); LookAheadToken = null; Skipped = new List<Token>(); SkipList = new List<TokenType>(); SkipList.Add(TokenType.WHITESPACE); regex = new Regex(@"\(", RegexOptions.Compiled); Patterns.Add(TokenType.BROPEN, regex); Tokens.Add(TokenType.BROPEN); regex = new Regex(@"\)", RegexOptions.Compiled); Patterns.Add(TokenType.BRCLOSE, regex); Tokens.Add(TokenType.BRCLOSE); regex = new Regex(@"^$", RegexOptions.Compiled); Patterns.Add(TokenType.EOF, regex); Tokens.Add(TokenType.EOF); regex = new Regex("==", RegexOptions.Compiled); Patterns.Add(TokenType.EQ, regex); Tokens.Add(TokenType.EQ); regex = new Regex("!=", RegexOptions.Compiled); Patterns.Add(TokenType.NEQ, regex); Tokens.Add(TokenType.NEQ); regex = new Regex("contains", RegexOptions.Compiled); Patterns.Add(TokenType.CONTAINS, regex); Tokens.Add(TokenType.CONTAINS); regex = new Regex("&&", RegexOptions.Compiled); Patterns.Add(TokenType.AND, regex); Tokens.Add(TokenType.AND); regex = new Regex("\\|\\|", RegexOptions.Compiled); Patterns.Add(TokenType.OR, regex); Tokens.Add(TokenType.OR); regex = new Regex(@"\w+", RegexOptions.Compiled); Patterns.Add(TokenType.NAME, regex); Tokens.Add(TokenType.NAME); regex = new Regex(@"""[^""]*""", RegexOptions.Compiled); Patterns.Add(TokenType.VALUE, regex); Tokens.Add(TokenType.VALUE); regex = new Regex(";", RegexOptions.Compiled); Patterns.Add(TokenType.SEP, regex); Tokens.Add(TokenType.SEP); regex = new Regex("Was", RegexOptions.Compiled); Patterns.Add(TokenType.WAS, regex); Tokens.Add(TokenType.WAS); regex = new Regex("Obsolete", RegexOptions.Compiled); Patterns.Add(TokenType.OBSOLETE, regex); Tokens.Add(TokenType.OBSOLETE); regex = new Regex("Delete", RegexOptions.Compiled); Patterns.Add(TokenType.DELETE, regex); Tokens.Add(TokenType.DELETE); regex = new Regex(@"\s+", RegexOptions.Compiled); Patterns.Add(TokenType.WHITESPACE, regex); Tokens.Add(TokenType.WHITESPACE); }
/// <summary> /// returns token with longest best match /// </summary> /// <returns></returns> public Token LookAhead(params TokenType[] scantokens) { int len; int index = -1; int i; int startpos = StartPos; Token tok = null; // this prevents double scanning and matching // increased performance if (LookAheadToken != null && LookAheadToken.Type != TokenType._UNDETERMINED_ && LookAheadToken.Type != TokenType._NONE_) return LookAheadToken; // if no scantokens specified, then scan for all of them (= backward compatible) if (scantokens.Length == 0) scantokens = Tokens.ToArray(); do { len = -1; string input = Input.Substring(startpos); tok = new Token(startpos, EndPos); for (i = 0; i < scantokens.Length; i++) { Regex r = Patterns[scantokens[i]]; Match m = r.Match(input); if (m.Success && m.Index == 0 && m.Length > len) { len = m.Length; index = i; } } if (index >= 0 && len >= 0) { tok.EndPos = startpos + len; tok.Text = Input.Substring(tok.StartPos, len); tok.Type = scantokens[index]; } else { if (tok.EndPos < Input.Length) tok.Text = Input.Substring(tok.StartPos, 1); } if (SkipList.Contains(tok.Type)) { startpos = tok.EndPos; Skipped.Add(tok); } } while (SkipList.Contains(tok.Type)); LookAheadToken = tok; return tok; }