/// <summary> /// Reads in a number (only integers, not floats). /// </summary> /// <param name="text">The character stream to be tokenized.</param> /// <param name="i">The current index offset.</param> /// <exception cref="TokenizerException">If the number is invalid, /// such as multiple decimal points or characters.</exception> private void ConsumeNumber(string text, ref int i) { Assert(text != null); Assert(i < text.Length); ValidCharDelegate validDel = c => char.IsDigit(c); IllegalCharDelegate illegalDel = c => !(char.IsDigit(c) || VALID_SYMBOLS_AFTER_NUMBER.Contains(c)); string tokenStr = ExtractElementToToken(text, ref i, validDel, illegalDel); // This function does not support floating point numbers. May be changed in the future. if (tokenStr.Contains('.')) { throw new TokenizerException(lineNumber, charOffset, "Floating point numbers not supported."); } int dummy = 0; // TryParse needs an 'out' variable, we don't use it though. if (!int.TryParse(tokenStr, out dummy)) { throw new TokenizerException(lineNumber, charOffset, $"Malformed number: {tokenStr}"); } tokens.Add(new Token(TokenType.Number, tokenStr, lineNumber, charOffset)); charOffset += tokenStr.Length; }
/// <summary> /// Consumes a word from the file character stream. Advances the stream /// index after consuming some word. /// </summary> /// <param name="text">All the characters the tokenizer is tokenizing. /// </param> /// <param name="i">The offset that this should consume the word from. /// </param> /// <exception cref="TokenizerException">If there are bad characters /// in the word (like ab$d or he0p or some@) </exception> private void ConsumeWord(string text, ref int i) { Assert(text != null); Assert(i < text.Length); ValidCharDelegate del = c => char.IsLetter(c); IllegalCharDelegate illegalDel = c => char.IsDigit(c) || c == '$' || c == '@'; string tokenStr = ExtractElementToToken(text, ref i, del, illegalDel); tokens.Add(new Token(TokenType.Word, tokenStr, lineNumber, charOffset)); charOffset += tokenStr.Length; }
/// <summary> /// Extracts the token based on the provided arguments. Increments the /// loop counter but not the charOffset/lineNumber. /// </summary> /// <param name="text">The character stream to tokenize (in string /// format).</param> /// <param name="i">The character offset (will be modified).</param> /// <param name="ValidCharDel">A delegate to determine what is a valid /// character and thus part of the token.True means the character is /// accepted, false means it is not.</param> /// <param name="IllegalCharDel">A delegate, which may be null, whereby /// not being null will cause an exception to be thrown if it returns /// true.</param> /// <returns>The extracted token.</returns> /// <exception cref="TokenizerException">If IllegalCharDel is not null /// and it detects an illegal character.</exception> private string ExtractElementToToken(string text, ref int i, ValidCharDelegate ValidCharDel, IllegalCharDelegate IllegalCharDel) { Assert(text != null); Assert(i < text.Length); Assert(ValidCharDel != null); char c; bool isValidChar = false; int tempCharOffset = charOffset; StringBuilder stringBuilder = new StringBuilder(); do { c = text[i]; if (IllegalCharDel != null && IllegalCharDel(c)) { throw new TokenizerException(lineNumber, tempCharOffset, $"Unexpected character: {c.ToString()}"); } isValidChar = ValidCharDel(c); if (isValidChar) { stringBuilder.Append(c); i++; // Only advance if it's a valid character. } tempCharOffset++; } while (i < text.Length && isValidChar); // Since the GenerateToken method for-loop will increment for us, we need to rewind prematurely. // When we rewind, we make it so that when the for-loop does increment i, it will then look at // the character that caused the above to terminate and assign that (or skip) as needed. i--; Assert(stringBuilder.Length > 0); string wordStr = stringBuilder.ToString(); return(wordStr); }