Example #1
0
        private int CountRemainingQuotesOnLine(int tagEnd, char quote)
        {
            int startPosition = _cs.Position;
            int quoteCount    = 0;

            while (!_cs.IsEndOfStream() && _cs.Position < tagEnd)
            {
                char currentChar = _cs.CurrentChar;
                if (currentChar == quote)
                {
                    quoteCount++;
                }
                else if (CharacterStream.IsNewLine(currentChar))
                {
                    break;
                }

                _cs.MoveToNextChar();
            }

            // reset the stream back to it's original position
            _cs.Position = startPosition;

            return(quoteCount);
        }
Example #2
0
        private static void HandleString(int separatorLength, CharacterStream cs, Func <CharacterStream, bool> terminatorCheck)
        {
            cs.Advance(separatorLength);

            if (!cs.IsEndOfStream())
            {
                while (true)
                {
                    if (terminatorCheck(cs))
                    {
                        cs.Advance(separatorLength);
                        break;
                    }

                    if (cs.CurrentChar == '\\')
                    {
                        cs.MoveToNextChar();
                    }

                    if (!cs.MoveToNextChar())
                    {
                        break;
                    }
                }
            }
        }
Example #3
0
        /// <summary>
        /// Handles string sequence with escapes
        /// </summary>
        /// <param name="openQuote"></param>
        public static void HandleString(char openQuote, CharacterStream cs, Action <int, int> addToken)
        {
            int start = cs.Position;

            cs.MoveToNextChar();

            if (!cs.IsEndOfStream())
            {
                while (true)
                {
                    if (cs.CurrentChar == openQuote)
                    {
                        cs.MoveToNextChar();
                        break;
                    }

                    if (cs.CurrentChar == '\\')
                    {
                        cs.MoveToNextChar();
                    }

                    if (!cs.MoveToNextChar())
                    {
                        break;
                    }
                }
            }

            int length = cs.Position - start;

            if (length > 0)
            {
                addToken(start, length);
            }
        }
Example #4
0
        public static string NormalizeWhitespace(this string s)
        {
            if (s == null || s.Length == 0)
            {
                return(s);
            }

            var cs = new CharacterStream(new TextStream(s));
            var sb = new StringBuilder();

            while (!cs.IsEndOfStream())
            {
                var current = cs.Position;
                cs.SkipWhitespace();
                if (cs.Position - current > 0)
                {
                    sb.Append(' ');
                }

                while (!cs.IsEndOfStream() && !cs.IsWhiteSpace())
                {
                    sb.Append(cs.CurrentChar);
                    cs.MoveToNextChar();
                }
            }
            return(sb.ToString().Trim());
        }
Example #5
0
 internal static void SkipWhitespace(CharacterStream cs)
 {
     while (!cs.IsEndOfStream() && cs.IsWhiteSpace())
     {
         cs.MoveToNextChar();
     }
 }
Example #6
0
        public static int HandleImaginaryPart(CharacterStream cs)
        {
            int start = cs.Position;

            // Check if this is actually complex number
            NumberTokenizer.SkipWhitespace(cs);

            if (cs.CurrentChar == '+' || cs.CurrentChar == '-')
            {
                cs.MoveToNextChar();

                if (cs.CurrentChar == '+' || cs.CurrentChar == '-')
                {
                    cs.MoveToNextChar();
                }

                int imaginaryLength = NumberTokenizer.HandleNumber(cs);
                if (imaginaryLength > 0)
                {
                    if (cs.CurrentChar == 'i')
                    {
                        cs.MoveToNextChar();
                        return(cs.Position - start);
                    }
                }
            }

            return(0);
        }
        public void TextIterator_Simple()
        {
            CharacterStream ti = new CharacterStream(new StringTextProvider("abcd"));

            Assert.AreEqual(4, ti.TextProvider.Length);
            Assert.AreEqual(0, ti.Position);
            Assert.AreEqual('a', ti.CurrentChar);
            Assert.AreEqual(new DecodedChar('a', 1), TextHelpers.DecodeCurrentChar(ti));

            Assert.IsTrue(ti.TextProvider.CompareTo(ti.Position, "ab", ignoreCase: false));
            Assert.IsFalse(ti.TextProvider.CompareTo(ti.Position, "abcde", ignoreCase: false));

            Assert.IsTrue(TextHelpers.CompareCurrentDecodedString(ti, "ab", ignoreCase: false, matchLength: out int matchLength));
            Assert.AreEqual(2, matchLength);
            Assert.IsFalse(TextHelpers.CompareCurrentDecodedString(ti, "abcde", ignoreCase: false, matchLength: out _));

            Assert.IsFalse(ti.IsAtEnd);
            Assert.IsTrue(ti.Advance(1));
            Assert.AreEqual(1, ti.Position);
            Assert.AreEqual('b', ti.CurrentChar);
            Assert.AreEqual('a', ti.Peek(-1));
            Assert.AreEqual('c', ti.Peek(1));
            Assert.AreEqual('d', ti.Peek(2));
            Assert.AreEqual(0, ti.Peek(3));
            Assert.AreEqual(0, ti.Peek(4));

            Assert.IsTrue(ti.Advance(3));
            Assert.IsTrue(ti.IsAtEnd);

            Assert.IsFalse(ti.Advance(1));
        }
Example #8
0
        /// <summary>
        /// Checks file whitespace (typically Lint-type or style type checkers.
        /// </summary>
        /// <returns>A collection of validation errors</returns>
        public IReadOnlyCollection <IValidationError> ValidateWhitespace(ITextProvider tp)
        {
            if (!_linterEnabled)
            {
                return(Enumerable.Empty <IValidationError>().ToList());
            }

            var warnings = _whitespaceFileCheckers
                           .SelectMany(c => c(tp, _settings.LintOptions, _projectedBuffer))
                           .ToList();

            var cs = new CharacterStream(tp);

            while (!cs.IsEndOfStream())
            {
                if (cs.IsWhiteSpace())
                {
                    // Unrolled since most return nulls.
                    warnings.AddRange(_whitespaceCharCheckers
                                      .Select(c => c(cs, _settings.LintOptions))
                                      .Where(result => result != null));
                }
                cs.MoveToNextChar();
            }
            return(warnings.ToList());
        }
Example #9
0
        public static void SkipIdentifier(CharacterStream cs, Func <CharacterStream, bool> isIdentifierLeadCharacter, Func <CharacterStream, bool> isIdentifierCharacter)
        {
            if (!isIdentifierLeadCharacter(cs))
            {
                return;
            }

            if (cs.IsEndOfStream())
            {
                return;
            }

            while (!cs.IsWhiteSpace())
            {
                if (!isIdentifierCharacter(cs))
                {
                    break;
                }

                if (!cs.MoveToNextChar())
                {
                    break;
                }
            }
        }
Example #10
0
        public void TextHelpers_Decode1()
        {
            // Try parsing a simple unicode char and escaped char

            string          text = @"u\52 \l(foo.jpg)";
            CharacterStream cs   = new CharacterStream(new StringTextProvider(text));

            Assert.IsFalse(TextHelpers.AtEscape(cs));
            Assert.IsFalse(TextHelpers.AtUnicodeEscape(cs));
            Assert.AreEqual(new DecodedChar('u', 1), TextHelpers.DecodeCurrentChar(cs));
            Assert.IsTrue(cs.Advance(1));

            Assert.IsTrue(TextHelpers.AtEscape(cs));
            Assert.IsTrue(TextHelpers.AtUnicodeEscape(cs));
            Assert.AreEqual('R', TextHelpers.DecodeCurrentChar(cs).Char);
            Assert.AreEqual(4, TextHelpers.DecodeCurrentChar(cs).EncodedLength);
            Assert.IsTrue(cs.Advance(4));

            Assert.IsTrue(TextHelpers.AtEscape(cs));
            Assert.IsFalse(TextHelpers.AtUnicodeEscape(cs));
            Assert.AreEqual('l', TextHelpers.DecodeCurrentChar(cs).Char);
            Assert.AreEqual(2, TextHelpers.DecodeCurrentChar(cs).EncodedLength);
            Assert.IsTrue(cs.Advance(2));

            Assert.IsFalse(TextHelpers.AtEscape(cs));
            Assert.IsFalse(TextHelpers.AtUnicodeEscape(cs));
            Assert.AreEqual(new DecodedChar('(', 1), TextHelpers.DecodeCurrentChar(cs));

            Assert.AreEqual(@"uRl(foo.jpg)", TextHelpers.DecodeText(cs.TextProvider, 0, text.Length, forStringToken: false));
        }
Example #11
0
        /// <summary>
        /// Checks file whitespace (typically Lint-type or style type checkers.
        /// </summary>
        /// <returns>A collection of validation errors</returns>
        public IReadOnlyCollection <IValidationError> ValidateWhitespace(ITextProvider tp)
        {
            if (!_settings.LintOptions.Enabled)
            {
                return(Enumerable.Empty <IValidationError>().ToList());
            }

            var warnings = _whitespaceFileCheckers.SelectMany(c => c(tp, _settings.LintOptions)).ToList();
            var cs       = new CharacterStream(tp);

            while (!cs.IsEndOfStream())
            {
                if (cs.IsWhiteSpace())
                {
                    // Unrolled since most return nulls.
                    foreach (var c in _whitespaceCharCheckers)
                    {
                        var result = c(cs, _settings.LintOptions);
                        if (result != null)
                        {
                            warnings.Add(result);
                        }
                    }
                }
                cs.MoveToNextChar();
            }
            return(warnings.ToList());
        }
Example #12
0
        public void ReadAndPeakAndRead()
        {
            byte[] stringBytes = Encoding.UTF8.GetBytes(
                "123-.27'Test'");
            Token tok = Token.Empty;

            using (MemoryStream ms = new MemoryStream(stringBytes))
            {
                using (StreamReader sr = new StreamReader(ms))
                {
                    CharacterStream cs = new CharacterStream(sr);
                    TokenStream     ts = new TokenStream(cs);

                    Assert.IsTrue(
                        ts.Read(out tok), "Failed to read token");
                    AssertToken(tok, Tokens.Number, "123");

                    Assert.IsTrue(
                        ts.Peek(out tok), "Failed to read token");
                    AssertToken(tok, Tokens.Number, "-.27");

                    Assert.IsTrue(
                        ts.Read(out tok), "Failed to read token");
                    AssertToken(tok, Tokens.Number, "-.27");

                    Assert.IsTrue(
                        ts.Read(out tok), "Failed to read token");
                    AssertToken(tok, Tokens.String, "Test");
                }
            }
        }
Example #13
0
        internal bool IsPossibleNumber()
        {
            // It is hard to tell in 12 +1 if it is a sum of numbers or
            // a sequence. If operator or punctiation (comma, semicolon)
            // precedes the sign then sign is part of the number.
            // Note that if preceding token is one of the function ()
            // or indexing braces [] then sign is an operator like in x[1]+2.
            // In other cases plus or minus is also a start of the operator.
            // It important that in partial tokenization classifier removes
            // enough tokens so tokenizer can start its work early enough
            // in the stream to be able to figure out numbers properly.

            if (_cs.CurrentChar == '-' || _cs.CurrentChar == '+')
            {
                // Next character must be decimal or a dot otherwise
                // it is not a number. No whitespace is allowed.
                if (CharacterStream.IsDecimal(_cs.NextChar) || _cs.NextChar == '.')
                {
                    // Check what previous token is, if any
                    if (_tokens.Count == 0)
                    {
                        // At the start of the file this can only be a number
                        return(true);
                    }

                    var previousToken = _tokens[_tokens.Count - 1];

                    if (previousToken.TokenType == RTokenType.OpenBrace ||
                        previousToken.TokenType == RTokenType.OpenSquareBracket ||
                        previousToken.TokenType == RTokenType.Comma ||
                        previousToken.TokenType == RTokenType.Semicolon ||
                        previousToken.TokenType == RTokenType.Operator)
                    {
                        return(true);
                    }
                }

                return(false);
            }

            // R only supports 0xABCD. x0A is not legal.
            if (_cs.CurrentChar == '0' && _cs.NextChar == 'x')
            {
                // Hex humber like 0xA1BC
                return(true);
            }

            if (_cs.IsDecimal())
            {
                return(true);
            }

            if (_cs.CurrentChar == '.' && CharacterStream.IsDecimal(_cs.NextChar))
            {
                return(true);
            }

            return(false);
        }
Example #14
0
        private static bool IsValidDouble(CharacterStream cs, int start, int end)
        {
            int    len = end - start;
            string s   = cs.GetSubstringAt(start, len);
            double n;

            return(Double.TryParse(s, NumberStyles.Number | NumberStyles.AllowExponent, CultureInfo.InvariantCulture, out n));
        }
Example #15
0
        internal virtual void InitializeTokenizer(ITextProvider textProvider, int start, int length)
        {
            Debug.Assert(start >= 0 && length >= 0 && start + length <= textProvider.Length);

            _cs          = new CharacterStream(textProvider);
            _cs.Position = start;

            _tokens = new TextRangeCollection <T>();
        }
Example #16
0
        /// <summary>
        /// Given candidate returns length of operator
        /// or zero if character sequence is not an operator.
        /// </summary>
        public static int OperatorLength(CharacterStream cs)
        {
            //
            // http://stat.ethz.ch/R-manual/R-patched/library/base/html/Syntax.html
            //

            // Longest first
            return(GetNCharOperatorLength(cs));
        }
Example #17
0
 private static IValidationError TabCheck(CharacterStream cs, LintOptions options)
 {
     if (options.NoTabs && cs.CurrentChar == '\t' && cs.Position < cs.Length)
     {
         // // no_tab_linter: check that only spaces are used, never tabs
         return(new ValidationWarning(new TextRange(cs.Position, 1), Resources.Lint_Tabs, ErrorLocation.Token));
     }
     return(null);
 }
        public async Task <bool> RunAsync()
        {
            bool result = true;

            {
                byte[] buffer = Encoding.UTF8.GetBytes("hello world");

                MemoryStream memory_stream = new MemoryStream();
                IInputStream input_stream  = new InputStream();
                memory_stream.Write(buffer, 0, buffer.Count());

                input_stream.Initialize(memory_stream);
                ICharacterStream cStream = new CharacterStream();
                cStream.Initialize(input_stream);

                ICharacter c1 = await cStream.Get();

                result = result && (null != c1); // ICharacterStream never returns null

                ICharacter ch = null;
                // verify that Get uses a pushed character
                cStream.Push(c1);
                ch = await cStream.Get();

                result = result && (c1.Info == ch.Info);

                while (ch.Kind != CharKind.NULL)
                {
                    ch = await cStream.Get();
                }
                ch = await cStream.Get(); // verify that Get() continues to get the NULL

                result = result && (ch.Kind == CharKind.NULL);
                memory_stream.Dispose();
            }
            {
                byte[]       buffer        = Encoding.UTF8.GetBytes("\n");
                MemoryStream memory_stream = new MemoryStream();
                IInputStream input_stream  = new InputStream();
                memory_stream.Write(buffer, 0, buffer.Count());

                input_stream.Initialize(memory_stream);
                ICharacterStream cStream = new CharacterStream();
                cStream.Initialize(input_stream);
                ICharacter ch = await cStream.Get();

                result = result && (ch.Kind == CharKind.CARRAGERETURN);
                ch     = await cStream.Get();

                result = result && (ch.Kind == CharKind.NULL);
                memory_stream.Dispose();
            }
            return(result);
        }
Example #19
0
 private static IValidationError TrailingWhitespaceCheck(CharacterStream cs, LintOptions options)
 {
     if (options.TrailingWhitespace)
     {
         if (cs.IsWhiteSpace() && !cs.CurrentChar.IsLineBreak() && (cs.NextChar.IsLineBreak() || cs.Position == cs.Length - 1))
         {
             // trailing_whitespace_linter: check there are no trailing whitespace characters.
             return(new ValidationWarning(new TextRange(cs.Position, 1), Resources.Lint_TrailingWhitespace, ErrorLocation.Token));
         }
     }
     return(null);
 }
Example #20
0
        private static bool IsOpenBraceFollow(CharacterStream cs, int position)
        {
            for (var i = position; i < cs.Length; i++)
            {
                if (!char.IsWhiteSpace(cs[i]))
                {
                    return(cs[i] == '(');
                }
            }

            return(false);
        }
Example #21
0
        public void TestNext()
        {
            var t = new Thing();

            t.DoThing();

            var str          = "foo bar";
            var bytes        = Encoding.UTF8.GetBytes(str);
            var stream       = new MemoryStream(bytes);
            var streamReader = new StreamReader(stream);
            var charStream   = new CharacterStream(streamReader);
        }
Example #22
0
        public void PeekCharactersWithOffset()
        {
            string stringToRead = "quick";

            CharacterStream fcs = new CharacterStream(_reader);

            char[] charsPeeked = fcs.Peek(4, stringToRead.Length);

            Assert.IsTrue(
                charsPeeked != null &&
                charsPeeked.Length == stringToRead.Length &&
                string.CompareOrdinal(
                    new string(charsPeeked), stringToRead) == 0);
        }
Example #23
0
        public void ReadCharacters()
        {
            string stringToRead = "The quick";

            CharacterStream fcs = new CharacterStream(_reader);

            char[] charsRead = fcs.Read(stringToRead.Length);

            Assert.IsTrue(
                charsRead != null &&
                charsRead.Length == stringToRead.Length &&
                string.CompareOrdinal(
                    new string(charsRead), stringToRead) == 0);
        }
        public void InitStream(ITextProvider textProvider, int start, int estimatedLength, bool keepWhiteSpace)
        {
            CS = new CharacterStream(textProvider)
            {
                Position = start
            };

            // Guess how many tokens will be allocated (5 was the average token length of the 090 test files)
            const int averageTokenLength = 5;
            int       tokenCountGuess    = estimatedLength / averageTokenLength;

            Tokens         = new TokenList(tokenCountGuess);
            KeepWhiteSpace = keepWhiteSpace;
            _streamToken   = 0;
        }
Example #25
0
        internal static int HandleHex(CharacterStream cs, int start)
        {
            while (CharacterStream.IsHex(cs.CurrentChar))
            {
                cs.MoveToNextChar();
            }

            // TODO: handle C99 floating point hex syntax like 0x1.1p-2
            if (cs.CurrentChar == 'L')
            {
                cs.MoveToNextChar();
            }

            return(cs.Position - start);
        }
Example #26
0
        /// <summary>
        /// Handle generic comment. Comment goes to the end of the line.
        /// </summary>
        public static void HandleEolComment(CharacterStream cs, Action <int, int> addToken)
        {
            int start = cs.Position;

            while (!cs.IsEndOfStream() && !cs.IsAtNewLine())
            {
                cs.MoveToNextChar();
            }

            int length = cs.Position - start;

            if (length > 0)
            {
                addToken(start, length);
            }
        }
Example #27
0
        public void PeekWithOffsetAndReadCharacters()
        {
            string stringToRead = "quick";

            CharacterStream fcs = new CharacterStream(_reader);

            char[] charsPeeked = fcs.Peek(4, stringToRead.Length);

            Assert.IsTrue(
                charsPeeked != null &&
                charsPeeked.Length == stringToRead.Length &&
                string.CompareOrdinal(
                    new string(charsPeeked), stringToRead) == 0);

            stringToRead = "The quick";

            char[] charsRead = fcs.Read(stringToRead.Length);

            Assert.IsTrue(
                charsRead != null &&
                charsRead.Length == stringToRead.Length &&
                string.CompareOrdinal(
                    new string(charsRead), stringToRead) == 0);

            charsRead    = null;
            stringToRead = " fox jumped";

            charsRead = fcs.Read(stringToRead.Length);

            Assert.IsTrue(
                charsRead != null &&
                charsRead.Length == stringToRead.Length &&
                string.CompareOrdinal(
                    new string(charsRead), stringToRead) == 0);

            charsPeeked  = null;
            stringToRead = " over the lazy";

            charsPeeked = fcs.Peek(stringToRead.Length);

            Assert.IsTrue(
                charsPeeked != null &&
                charsPeeked.Length == stringToRead.Length &&
                string.CompareOrdinal(
                    new string(charsPeeked), stringToRead) == 0);
        }
Example #28
0
        internal static int HandleExponent(CharacterStream cs, int start)
        {
            Debug.Assert(cs.CurrentChar == 'E' || cs.CurrentChar == 'e');

            bool hasSign = false;

            cs.MoveToNextChar();
            if (cs.IsWhiteSpace() || cs.IsEndOfStream())
            {
                // 0.1E or 1e
                return(0);
            }

            if (cs.CurrentChar == '-' || cs.CurrentChar == '+')
            {
                hasSign = true;
                cs.MoveToNextChar();
            }

            int digitsStart = cs.Position;

            // collect decimals
            while (cs.IsDecimal())
            {
                cs.MoveToNextChar();
            }

            if (hasSign && digitsStart == cs.Position)
            {
                return(0); // NaN like 1.0E-
            }

            // Technically if letter or braces follows this is not
            // a number but we'll leave it alone for now.

            // TODO: This code is not language specific and yet it currently
            // handles complex 'i' as well as R-specific 'L' suffix.
            // Ideally this needs to be extended in a way so language-specific
            // tokenizer can specify options or control number format.
            if (char.IsLetter(cs.CurrentChar) && cs.CurrentChar != 'i' && cs.CurrentChar != 'L')
            {
                return(0);
            }

            return(cs.Position - start);
        }
Example #29
0
        private static int Get3CharOrShorterOperatorLength(CharacterStream cs)
        {
            if (cs.DistanceFromEnd >= 3)
            {
                string threeLetterCandidate = cs.GetSubstringAt(cs.Position, 3);
                if (threeLetterCandidate.Length == 3)
                {
                    int index = Array.BinarySearch <string>(_threeChars, threeLetterCandidate);
                    if (index >= 0)
                    {
                        return(3);
                    }
                }
            }

            return(Get2CharOrShorterOperatorLength(cs));
        }
Example #30
0
        internal static int Get2CharOrShorterOperatorLength(CharacterStream cs)
        {
            if (cs.DistanceFromEnd >= 2)
            {
                string twoLetterCandidate = cs.GetSubstringAt(cs.Position, 2);

                if (twoLetterCandidate.Length == 2)
                {
                    int index = Array.BinarySearch <string>(_twoChars, twoLetterCandidate);
                    if (index >= 0)
                    {
                        return(2);
                    }
                }
            }

            return(GetSingleCharOperatorLength(cs.CurrentChar));
        }