internal bool IsPossibleNumber() { // It is hard to tell in 12 +1 if it is a sum of numbers or // a sequence. If operator or punctiation (comma, semicolon) // precedes the sign then sign is part of the number. // Note that if preceding token is one of the function () // or indexing braces [] then sign is an operator like in x[1]+2. // In other cases plus or minus is also a start of the operator. // It important that in partial tokenization classifier removes // enough tokens so tokenizer can start its work early enough // in the stream to be able to figure out numbers properly. if (_cs.CurrentChar == '-' || _cs.CurrentChar == '+') { // Next character must be decimal or a dot otherwise // it is not a number. No whitespace is allowed. if (CharacterStream.IsDecimal(_cs.NextChar) || _cs.NextChar == '.') { // Check what previous token is, if any if (_tokens.Count == 0) { // At the start of the file this can only be a number return(true); } var previousToken = _tokens[_tokens.Count - 1]; if (previousToken.TokenType == RTokenType.OpenBrace || previousToken.TokenType == RTokenType.OpenSquareBracket || previousToken.TokenType == RTokenType.Comma || previousToken.TokenType == RTokenType.Semicolon || previousToken.TokenType == RTokenType.Operator) { return(true); } } return(false); } // R only supports 0xABCD. x0A is not legal. if (_cs.CurrentChar == '0' && _cs.NextChar == 'x') { // Hex humber like 0xA1BC return(true); } if (_cs.IsDecimal()) { return(true); } if (_cs.CurrentChar == '.' && CharacterStream.IsDecimal(_cs.NextChar)) { return(true); } return(false); }
internal static int HandleExponent(CharacterStream cs, int start) { Debug.Assert(cs.CurrentChar == 'E' || cs.CurrentChar == 'e'); bool hasSign = false; cs.MoveToNextChar(); if (cs.IsWhiteSpace() || cs.IsEndOfStream()) { // 0.1E or 1e return(0); } if (cs.CurrentChar == '-' || cs.CurrentChar == '+') { hasSign = true; cs.MoveToNextChar(); } int digitsStart = cs.Position; // collect decimals while (cs.IsDecimal()) { cs.MoveToNextChar(); } if (hasSign && digitsStart == cs.Position) { return(0); // NaN like 1.0E- } // Technically if letter or braces follows this is not // a number but we'll leave it alone for now. // TODO: This code is not language specific and yet it currently // handles complex 'i' as well as R-specific 'L' suffix. // Ideally this needs to be extended in a way so language-specific // tokenizer can specify options or control number format. if (char.IsLetter(cs.CurrentChar) && cs.CurrentChar != 'i' && cs.CurrentChar != 'L') { return(0); } return(cs.Position - start); }
public static bool IsIdentifierCharacter(char ch) { return(CharacterStream.IsLetter(ch) || CharacterStream.IsDecimal(ch) || ch == '.' || ch == '_'); }
// public static object CharacterSteam { get; private set; } public static int HandleNumber(CharacterStream cs) { int start = cs.Position; if (cs.CurrentChar == '-' || cs.CurrentChar == '+') { cs.MoveToNextChar(); } if (cs.CurrentChar == '0' && cs.NextChar == 'x') { cs.Advance(2); return(HandleHex(cs, start)); } if (cs.CurrentChar == 'x' && CharacterStream.IsHex(cs.NextChar)) { cs.MoveToNextChar(); return(HandleHex(cs, start)); } int integerPartStart = cs.Position; int integerPartLength = 0; int fractionPartLength = 0; bool isDouble = false; // collect decimals (there may be none like in .1e+20 while (cs.IsDecimal()) { cs.MoveToNextChar(); integerPartLength++; } if (cs.CurrentChar == '.') { isDouble = true; // float/double cs.MoveToNextChar(); // If we've seen don we need to collect factional part of any while (cs.IsDecimal()) { cs.MoveToNextChar(); fractionPartLength++; } } if (integerPartLength + fractionPartLength == 0) { return(0); // +e or +.e is not a number and neither is lonely + or - } int numberLength; if (cs.CurrentChar == 'e' || cs.CurrentChar == 'E') { isDouble = true; numberLength = HandleExponent(cs, start); } else { numberLength = cs.Position - start; } // Verify double format if (isDouble && !IsValidDouble(cs, start, cs.Position)) { numberLength = 0; } if (numberLength > 0) { // skip over trailing 'L' if any if (cs.CurrentChar == 'L') { cs.MoveToNextChar(); numberLength++; } } return(numberLength); }