示例#1
0
        /// <summary>
        /// Reads in a number (only integers, not floats).
        /// </summary>
        /// <param name="text">The character stream to be tokenized.</param>
        /// <param name="i">The current index offset.</param>
        /// <exception cref="TokenizerException">If the number is invalid,
        /// such as multiple decimal points or characters.</exception>
        private void ConsumeNumber(string text, ref int i)
        {
            Assert(text != null);
            Assert(i < text.Length);

            ValidCharDelegate   validDel   = c => char.IsDigit(c);
            IllegalCharDelegate illegalDel = c => !(char.IsDigit(c) || VALID_SYMBOLS_AFTER_NUMBER.Contains(c));
            string tokenStr = ExtractElementToToken(text, ref i, validDel, illegalDel);

            // This function does not support floating point numbers. May be changed in the future.
            if (tokenStr.Contains('.'))
            {
                throw new TokenizerException(lineNumber, charOffset, "Floating point numbers not supported.");
            }

            int dummy = 0; // TryParse needs an 'out' variable, we don't use it though.

            if (!int.TryParse(tokenStr, out dummy))
            {
                throw new TokenizerException(lineNumber, charOffset, $"Malformed number: {tokenStr}");
            }

            tokens.Add(new Token(TokenType.Number, tokenStr, lineNumber, charOffset));
            charOffset += tokenStr.Length;
        }
示例#2
0
        /// <summary>
        /// Consumes a word from the file character stream. Advances the stream
        /// index after consuming some word.
        /// </summary>
        /// <param name="text">All the characters the tokenizer is tokenizing.
        /// </param>
        /// <param name="i">The offset that this should consume the word from.
        /// </param>
        /// <exception cref="TokenizerException">If there are bad characters
        /// in the word (like ab$d or he0p or some@) </exception>
        private void ConsumeWord(string text, ref int i)
        {
            Assert(text != null);
            Assert(i < text.Length);

            ValidCharDelegate   del        = c => char.IsLetter(c);
            IllegalCharDelegate illegalDel = c => char.IsDigit(c) || c == '$' || c == '@';
            string tokenStr = ExtractElementToToken(text, ref i, del, illegalDel);

            tokens.Add(new Token(TokenType.Word, tokenStr, lineNumber, charOffset));
            charOffset += tokenStr.Length;
        }
示例#3
0
        /// <summary>
        /// Gets an identifier and then assigns the token type based on the
        /// symbol prefix.
        /// </summary>
        /// <param name="text">The character stream to be tokenized.</param>
        /// <param name="i">The current index offset.</param>
        /// <param name="symbolPrefix">The symbol that prefixes this
        /// identifier, used in determining what kind of token it is.</param>
        /// <exception cref="TokenizerException">If the identifier is
        /// malformed.</exception>
        private void ExtractIdentifier(string text, ref int i, char symbolPrefix)
        {
            Assert(text != null);
            Assert(i < text.Length);
            Assert(symbolPrefix == '$' || symbolPrefix == '@');

            i++; // Jump past the symbol, we don't need it anymore.

            ValidCharDelegate validDel = c => char.IsLetter(c) || c == '_' || c == '.';
            string            tokenStr = ExtractElementToToken(text, ref i, validDel);

            if (!IDENTIFIER_REGEX.Match(tokenStr).Success)
            {
                throw new TokenizerException(lineNumber, charOffset, $"Malformed identifier: {tokenStr}");
            }

            TokenType type = symbolPrefix == '$' ? TokenType.DollarIdentifier : TokenType.AtIdentifier;

            tokens.Add(new Token(type, tokenStr, lineNumber, charOffset));
            charOffset += 1 + tokenStr.Length; // 1 char symbol + token length characters read.
        }
示例#4
0
        /// <summary>
        /// Extracts the token based on the provided arguments. Increments the
        /// loop counter but not the charOffset/lineNumber.
        /// </summary>
        /// <param name="text">The character stream to tokenize (in string
        /// format).</param>
        /// <param name="i">The character offset (will be modified).</param>
        /// <param name="ValidCharDel">A delegate to determine what is a valid
        /// character and thus part of the token.True means the character is
        /// accepted, false means it is not.</param>
        /// <param name="IllegalCharDel">A delegate, which may be null, whereby
        /// not being null will cause an exception to be thrown if it returns
        /// true.</param>
        /// <returns>The extracted token.</returns>
        /// <exception cref="TokenizerException">If IllegalCharDel is not null
        /// and it detects an illegal character.</exception>
        private string ExtractElementToToken(string text, ref int i, ValidCharDelegate ValidCharDel, IllegalCharDelegate IllegalCharDel)
        {
            Assert(text != null);
            Assert(i < text.Length);
            Assert(ValidCharDel != null);

            char          c;
            bool          isValidChar    = false;
            int           tempCharOffset = charOffset;
            StringBuilder stringBuilder  = new StringBuilder();

            do
            {
                c = text[i];

                if (IllegalCharDel != null && IllegalCharDel(c))
                {
                    throw new TokenizerException(lineNumber, tempCharOffset, $"Unexpected character: {c.ToString()}");
                }

                isValidChar = ValidCharDel(c);
                if (isValidChar)
                {
                    stringBuilder.Append(c);
                    i++; // Only advance if it's a valid character.
                }

                tempCharOffset++;
            } while (i < text.Length && isValidChar);

            // Since the GenerateToken method for-loop will increment for us, we need to rewind prematurely.
            // When we rewind, we make it so that when the for-loop does increment i, it will then look at
            // the character that caused the above to terminate and assign that (or skip) as needed.
            i--;

            Assert(stringBuilder.Length > 0);
            string wordStr = stringBuilder.ToString();

            return(wordStr);
        }
示例#5
0
        /// <summary>
        /// Will consume a quoted string with any character in the quotes
        /// (except escape sequences, tab is allowed).
        /// </summary>
        /// <param name="text">The character stream to be tokenized.</param>
        /// <param name="i">The current index offset.</param>
        /// <exception cref="TokenizerException">If the quotation mark ending
        /// is missing or the end is reached before finding it.</exception>
        private void ConsumeQuotedString(string text, ref int i)
        {
            Assert(text != null);
            Assert(i < text.Length);

            // Because we will be skipping the first quotation mark, we want to make sure
            // 'i' is actually still valid. Otherwise if there is no next character than we
            // know it's a malformed quote.
            i++;
            if (i >= text.Length)
            {
                throw new TokenizerException(lineNumber, charOffset, "Found starting quote at EOF.");
            }

            ValidCharDelegate del      = c => c != 127 && (c >= 32 || c == '\t') && c != '"';
            string            tokenStr = ExtractElementToToken(text, ref i, del);

            // Now we actually want to skip past the last quotation mark since we didn't consume it.
            // We need to make some logic checks with this since fringe cases could yield EOF issues.
            i++;
            if (tokenStr.Length <= 0)
            {
                throw new TokenizerException(lineNumber, charOffset, "Cannot have an empty quoted string.");
            }
            else if (i >= text.Length)
            {
                throw new TokenizerException(lineNumber, charOffset, "Quotation mark not found (EOF).");
            }
            else if (text[i] != '"')
            {
                throw new TokenizerException(lineNumber, charOffset, $"Could not find ending quotation mark, got '{text[i]}' instead.");
            }

            tokens.Add(new Token(TokenType.QuotedString, tokenStr, lineNumber, charOffset));
            charOffset += tokenStr.Length + 2; // +2 for two quotation marks.
        }
示例#6
0
 /// <summary>
 /// Extracts the token based on the provided arguments. Increments the
 /// loop counter but not the charOffset/lineNumber. Same as calling
 /// ExtractElementToToken except with a null IsIllegalDelegate.
 /// </summary>
 /// <param name="text">The character stream to tokenize (in string
 /// format).</param>
 /// <param name="i">The character offset (will be modified).</param>
 /// <param name="ValidCharDel">A delegate to determine what is a valid
 /// character and thus part of the token.True means the character is
 /// accepted, false means it is not.</param>
 /// <returns>The extracted token.</returns>
 private string ExtractElementToToken(string text, ref int i, ValidCharDelegate ValidCharDel)
 {
     return(ExtractElementToToken(text, ref i, ValidCharDel, null));
 }