public (List <Token>, List <ScriptError>) Tokenize(string source) { int index = 0; var tokens = new List <Token>(); var wsRegex = new Regex("[\t ]"); var errors = new List <ScriptError>(); while (index < source.Length) { Token token = null; var match = wsRegex.Match(source, index); if (match.Success && (match.Index - index) == 0) { index += match.Length; continue; } foreach (var definition in TokenDefinitions) { match = definition.Regex.Match(source, index); if (match.Success && (match.Index - index) == 0) { var value = source.Substring(index, match.Length); if (definition.Type == TokenType.StringLiteral || definition.Type == TokenType.DateTimeLiteral || definition.Type == TokenType.Field) { value = value.Substring(1, value.Length - 2); } token = new Token { Type = definition.Type, Contents = value, Index = index, }; index += match.Length; break; } } if (token == null) { var errorToken = new Token { Type = TokenType.Unknown, Contents = source[index].ToString(), Index = index, }; errors.Add(ScriptError.UnrecognizedSymbol(errorToken, source[index].ToString())); index++; } else { tokens.Add(token); } } var eof = new Token { Type = TokenType.EOF, Contents = "End of script", Index = index, }; tokens.Add(eof); return(tokens, errors); }