Example #1
0
        public (List <Token>, List <ScriptError>) Tokenize(string source)
        {
            int index   = 0;
            var tokens  = new List <Token>();
            var wsRegex = new Regex("[\t ]");
            var errors  = new List <ScriptError>();

            while (index < source.Length)
            {
                Token token = null;
                var   match = wsRegex.Match(source, index);
                if (match.Success && (match.Index - index) == 0)
                {
                    index += match.Length;
                    continue;
                }
                foreach (var definition in TokenDefinitions)
                {
                    match = definition.Regex.Match(source, index);
                    if (match.Success && (match.Index - index) == 0)
                    {
                        var value = source.Substring(index, match.Length);
                        if (definition.Type == TokenType.StringLiteral ||
                            definition.Type == TokenType.DateTimeLiteral ||
                            definition.Type == TokenType.Field)
                        {
                            value = value.Substring(1, value.Length - 2);
                        }

                        token = new Token
                        {
                            Type     = definition.Type,
                            Contents = value,
                            Index    = index,
                        };
                        index += match.Length;
                        break;
                    }
                }
                if (token == null)
                {
                    var errorToken = new Token
                    {
                        Type     = TokenType.Unknown,
                        Contents = source[index].ToString(),
                        Index    = index,
                    };
                    errors.Add(ScriptError.UnrecognizedSymbol(errorToken, source[index].ToString()));
                    index++;
                }
                else
                {
                    tokens.Add(token);
                }
            }

            var eof = new Token
            {
                Type     = TokenType.EOF,
                Contents = "End of script",
                Index    = index,
            };

            tokens.Add(eof);
            return(tokens, errors);
        }