Example #1
0
        private static int GetNCharOperatorLength(CharacterStream cs) {
            // R allows user-defined infix operators. These have the form of 
            // a string of characters delimited by the ‘%’ character. The string 
            // can contain any printable character except ‘%’. 
            if (cs.CurrentChar == '%' && !char.IsWhiteSpace(cs.NextChar)) {
                // In case of broken or partially typed operators
                // make sure we terminate at whitespace or end of the line
                // so in 'x <- y % z' '% z' is not an operator.
                int start = cs.Position;
                int length;

                cs.MoveToNextChar();

                while (!cs.IsEndOfStream() && !cs.IsWhiteSpace()) {
                    if (cs.CurrentChar == '%') {
                        cs.MoveToNextChar();

                        length = cs.Position - start;
                        cs.Position = start;

                        return length;
                    }

                    if (cs.IsAtNewLine()) {
                        // x <- y %abcd
                        cs.Position = start;
                        return 1;
                    }

                    cs.MoveToNextChar();
                }
            }

            return Get3CharOrShorterOperatorLength(cs);
        }
Example #2
0
        /// <summary>
        /// Handles string sequence with escapes
        /// </summary>
        /// <param name="openQuote"></param>
        public static void HandleString(char openQuote, CharacterStream cs, Action<int, int> addToken) {
            int start = cs.Position;

            cs.MoveToNextChar();

            if (!cs.IsEndOfStream()) {
                while (true) {
                    if (cs.CurrentChar == openQuote) {
                        cs.MoveToNextChar();
                        break;
                    }

                    if (cs.CurrentChar == '\\') {
                        cs.MoveToNextChar();
                    }

                    if (!cs.MoveToNextChar())
                        break;
                }
            }

            int length = cs.Position - start;
            if (length > 0) {
                addToken(start, length);
            }
        }
Example #3
0
        /// <summary>
        /// Given candidate returns length of operator
        /// or zero if character sequence is not an operator.
        /// </summary>
        public static int OperatorLength(CharacterStream cs) {
            //
            // http://stat.ethz.ch/R-manual/R-patched/library/base/html/Syntax.html
            //

            // Longest first
            return GetNCharOperatorLength(cs);
        }
Example #4
0
        /// <summary>
        /// Handle generic comment. Comment goes to the end of the line.
        /// </summary>
        public static void HandleEolComment(CharacterStream cs, Action<int, int> addToken) {
            int start = cs.Position;

            while (!cs.IsEndOfStream() && !cs.IsAtNewLine()) {
                cs.MoveToNextChar();
            }

            int length = cs.Position - start;
            if (length > 0) {
                addToken(start, length);
            }
        }
Example #5
0
        private static int Get3CharOrShorterOperatorLength(CharacterStream cs) {
            if (cs.DistanceFromEnd >= 3) {
                string threeLetterCandidate = cs.GetSubstringAt(cs.Position, 3);
                if (threeLetterCandidate.Length == 3) {
                    int index = Array.BinarySearch<string>(_threeChars, threeLetterCandidate);
                    if (index >= 0) {
                        return 3;
                    }
                }
            }

            return Get2CharOrShorterOperatorLength(cs);
        }
Example #6
0
        internal static int Get2CharOrShorterOperatorLength(CharacterStream cs) {
            if (cs.DistanceFromEnd >= 2) {
                string twoLetterCandidate = cs.GetSubstringAt(cs.Position, 2);

                if (twoLetterCandidate.Length == 2) {
                    int index = Array.BinarySearch<string>(_twoChars, twoLetterCandidate);
                    if (index >= 0) {
                        return 2;
                    }
                }
            }

            return GetSingleCharOperatorLength(cs.CurrentChar);
        }
Example #7
0
        public static void SkipIdentifier(CharacterStream cs, Func<CharacterStream, bool> isIdentifierLeadCharacter, Func<CharacterStream, bool> isIdentifierCharacter) {
            if (!isIdentifierLeadCharacter(cs))
                return;

            if (cs.IsEndOfStream())
                return;

            while (!cs.IsWhiteSpace()) {
                if (!isIdentifierCharacter(cs))
                    break;

                if (!cs.MoveToNextChar())
                    break;
            }
        }
Example #8
0
        public static string NormalizeWhitespace(this string s) {
            if(s == null || s.Length == 0) {
                return s;
            }

            var cs = new CharacterStream(new TextStream(s));
            var sb = new StringBuilder();

            while (!cs.IsEndOfStream()) {
                var current = cs.Position;
                cs.SkipWhitespace();
                if (cs.Position - current > 0) {
                    sb.Append(' ');
                }

                while (!cs.IsEndOfStream() && !cs.IsWhiteSpace()) {
                    sb.Append(cs.CurrentChar);
                    cs.MoveToNextChar();
                }
            }
            return sb.ToString().Trim();
        }
        internal static int HandleHex(CharacterStream cs, int start) {
            while (CharacterStream.IsHex(cs.CurrentChar)) {
                cs.MoveToNextChar();
            }

            // TODO: handle C99 floating point hex syntax like 0x1.1p-2
            if (cs.CurrentChar == 'L') {
                cs.MoveToNextChar();
            }

            return cs.Position - start;
        }
Example #10
0
 private static bool IsValidDouble(CharacterStream cs, int start, int end) {
     int len = end - start;
     string s = cs.GetSubstringAt(start, len);
     double n;
     return Double.TryParse(s, NumberStyles.Number | NumberStyles.AllowExponent, CultureInfo.InvariantCulture, out n);
 }
Example #11
0
 internal static void SkipWhitespace(CharacterStream cs) {
     while (!cs.IsEndOfStream() && cs.IsWhiteSpace()) {
         cs.MoveToNextChar();
     }
 }
Example #12
0
        public static int HandleImaginaryPart(CharacterStream cs) {
            int start = cs.Position;

            // Check if this is actually complex number
            NumberTokenizer.SkipWhitespace(cs);

            if (cs.CurrentChar == '+' || cs.CurrentChar == '-') {
                cs.MoveToNextChar();

                if (cs.CurrentChar == '+' || cs.CurrentChar == '-') {
                    cs.MoveToNextChar();
                }

                int imaginaryLength = NumberTokenizer.HandleNumber(cs);
                if (imaginaryLength > 0) {
                    if (cs.CurrentChar == 'i') {
                        cs.MoveToNextChar();
                        return cs.Position - start;
                    }
                }
            }

            return 0;
        }
Example #13
0
        // public static object CharacterSteam { get; private set; }

        public static int HandleNumber(CharacterStream cs) {
            int start = cs.Position;

            if (cs.CurrentChar == '-' || cs.CurrentChar == '+') {
                cs.MoveToNextChar();
            }

            if (cs.CurrentChar == '0' && cs.NextChar == 'x') {
                cs.Advance(2);
                return HandleHex(cs, start);
            }

            if (cs.CurrentChar == 'x' && CharacterStream.IsHex(cs.NextChar)) {
                cs.MoveToNextChar();
                return HandleHex(cs, start);
            }

            int integerPartStart = cs.Position;
            int integerPartLength = 0;
            int fractionPartLength = 0;
            bool isDouble = false;

            // collect decimals (there may be none like in .1e+20
            while (cs.IsDecimal()) {
                cs.MoveToNextChar();
                integerPartLength++;
            }

            if (cs.CurrentChar == '.') {
                isDouble = true;

                // float/double
                cs.MoveToNextChar();

                // If we've seen don we need to collect factional part of any
                while (cs.IsDecimal()) {
                    cs.MoveToNextChar();
                    fractionPartLength++;
                }
            }

            if (integerPartLength + fractionPartLength == 0) {
                return 0; // +e or +.e is not a number and neither is lonely + or -
            }

            int numberLength;
            if (cs.CurrentChar == 'e' || cs.CurrentChar == 'E') {
                isDouble = true;
                numberLength = HandleExponent(cs, start);
            } else {
                numberLength = cs.Position - start;
            }

            // Verify double format
            if (isDouble && !IsValidDouble(cs, start, cs.Position)) {
                numberLength = 0;
            }

            if (numberLength > 0) {
                // skip over trailing 'L' if any
                if (cs.CurrentChar == 'L') {
                    cs.MoveToNextChar();
                    numberLength++;
                }
            }

            return numberLength;
        }
Example #14
0
        internal static int HandleExponent(CharacterStream cs, int start) {
            Debug.Assert(cs.CurrentChar == 'E' || cs.CurrentChar == 'e');

            bool hasSign = false;

            cs.MoveToNextChar();
            if (cs.IsWhiteSpace() || cs.IsEndOfStream()) {
                // 0.1E or 1e
                return 0;
            }

            if (cs.CurrentChar == '-' || cs.CurrentChar == '+') {
                hasSign = true;
                cs.MoveToNextChar();
            }

            int digitsStart = cs.Position;

            // collect decimals
            while (cs.IsDecimal()) {
                cs.MoveToNextChar();
            }

            if (hasSign && digitsStart == cs.Position)
                return 0; // NaN like 1.0E-

            // Technically if letter or braces follows this is not 
            // a number but we'll leave it alone for now.
            
            // TODO: This code is not language specific and yet it currently
            // handles complex 'i' as well as R-specific 'L' suffix.
            // Ideally this needs to be extended in a way so language-specific
            // tokenizer can specify options or control number format.
            if (char.IsLetter(cs.CurrentChar) && cs.CurrentChar != 'i' && cs.CurrentChar != 'L') {
                return 0;
            }

            return cs.Position - start;
        }