private int CountRemainingQuotesOnLine(int tagEnd, char quote) { int startPosition = _cs.Position; int quoteCount = 0; while (!_cs.IsEndOfStream() && _cs.Position < tagEnd) { char currentChar = _cs.CurrentChar; if (currentChar == quote) { quoteCount++; } else if (CharacterStream.IsNewLine(currentChar)) { break; } _cs.MoveToNextChar(); } // reset the stream back to it's original position _cs.Position = startPosition; return(quoteCount); }
private static void HandleString(int separatorLength, CharacterStream cs, Func <CharacterStream, bool> terminatorCheck) { cs.Advance(separatorLength); if (!cs.IsEndOfStream()) { while (true) { if (terminatorCheck(cs)) { cs.Advance(separatorLength); break; } if (cs.CurrentChar == '\\') { cs.MoveToNextChar(); } if (!cs.MoveToNextChar()) { break; } } } }
/// <summary> /// Handles string sequence with escapes /// </summary> /// <param name="openQuote"></param> public static void HandleString(char openQuote, CharacterStream cs, Action <int, int> addToken) { int start = cs.Position; cs.MoveToNextChar(); if (!cs.IsEndOfStream()) { while (true) { if (cs.CurrentChar == openQuote) { cs.MoveToNextChar(); break; } if (cs.CurrentChar == '\\') { cs.MoveToNextChar(); } if (!cs.MoveToNextChar()) { break; } } } int length = cs.Position - start; if (length > 0) { addToken(start, length); } }
public static string NormalizeWhitespace(this string s) { if (s == null || s.Length == 0) { return(s); } var cs = new CharacterStream(new TextStream(s)); var sb = new StringBuilder(); while (!cs.IsEndOfStream()) { var current = cs.Position; cs.SkipWhitespace(); if (cs.Position - current > 0) { sb.Append(' '); } while (!cs.IsEndOfStream() && !cs.IsWhiteSpace()) { sb.Append(cs.CurrentChar); cs.MoveToNextChar(); } } return(sb.ToString().Trim()); }
internal static void SkipWhitespace(CharacterStream cs) { while (!cs.IsEndOfStream() && cs.IsWhiteSpace()) { cs.MoveToNextChar(); } }
public static int HandleImaginaryPart(CharacterStream cs) { int start = cs.Position; // Check if this is actually complex number NumberTokenizer.SkipWhitespace(cs); if (cs.CurrentChar == '+' || cs.CurrentChar == '-') { cs.MoveToNextChar(); if (cs.CurrentChar == '+' || cs.CurrentChar == '-') { cs.MoveToNextChar(); } int imaginaryLength = NumberTokenizer.HandleNumber(cs); if (imaginaryLength > 0) { if (cs.CurrentChar == 'i') { cs.MoveToNextChar(); return(cs.Position - start); } } } return(0); }
public void TextIterator_Simple() { CharacterStream ti = new CharacterStream(new StringTextProvider("abcd")); Assert.AreEqual(4, ti.TextProvider.Length); Assert.AreEqual(0, ti.Position); Assert.AreEqual('a', ti.CurrentChar); Assert.AreEqual(new DecodedChar('a', 1), TextHelpers.DecodeCurrentChar(ti)); Assert.IsTrue(ti.TextProvider.CompareTo(ti.Position, "ab", ignoreCase: false)); Assert.IsFalse(ti.TextProvider.CompareTo(ti.Position, "abcde", ignoreCase: false)); Assert.IsTrue(TextHelpers.CompareCurrentDecodedString(ti, "ab", ignoreCase: false, matchLength: out int matchLength)); Assert.AreEqual(2, matchLength); Assert.IsFalse(TextHelpers.CompareCurrentDecodedString(ti, "abcde", ignoreCase: false, matchLength: out _)); Assert.IsFalse(ti.IsAtEnd); Assert.IsTrue(ti.Advance(1)); Assert.AreEqual(1, ti.Position); Assert.AreEqual('b', ti.CurrentChar); Assert.AreEqual('a', ti.Peek(-1)); Assert.AreEqual('c', ti.Peek(1)); Assert.AreEqual('d', ti.Peek(2)); Assert.AreEqual(0, ti.Peek(3)); Assert.AreEqual(0, ti.Peek(4)); Assert.IsTrue(ti.Advance(3)); Assert.IsTrue(ti.IsAtEnd); Assert.IsFalse(ti.Advance(1)); }
/// <summary> /// Checks file whitespace (typically Lint-type or style type checkers. /// </summary> /// <returns>A collection of validation errors</returns> public IReadOnlyCollection <IValidationError> ValidateWhitespace(ITextProvider tp) { if (!_linterEnabled) { return(Enumerable.Empty <IValidationError>().ToList()); } var warnings = _whitespaceFileCheckers .SelectMany(c => c(tp, _settings.LintOptions, _projectedBuffer)) .ToList(); var cs = new CharacterStream(tp); while (!cs.IsEndOfStream()) { if (cs.IsWhiteSpace()) { // Unrolled since most return nulls. warnings.AddRange(_whitespaceCharCheckers .Select(c => c(cs, _settings.LintOptions)) .Where(result => result != null)); } cs.MoveToNextChar(); } return(warnings.ToList()); }
public static void SkipIdentifier(CharacterStream cs, Func <CharacterStream, bool> isIdentifierLeadCharacter, Func <CharacterStream, bool> isIdentifierCharacter) { if (!isIdentifierLeadCharacter(cs)) { return; } if (cs.IsEndOfStream()) { return; } while (!cs.IsWhiteSpace()) { if (!isIdentifierCharacter(cs)) { break; } if (!cs.MoveToNextChar()) { break; } } }
public void TextHelpers_Decode1() { // Try parsing a simple unicode char and escaped char string text = @"u\52 \l(foo.jpg)"; CharacterStream cs = new CharacterStream(new StringTextProvider(text)); Assert.IsFalse(TextHelpers.AtEscape(cs)); Assert.IsFalse(TextHelpers.AtUnicodeEscape(cs)); Assert.AreEqual(new DecodedChar('u', 1), TextHelpers.DecodeCurrentChar(cs)); Assert.IsTrue(cs.Advance(1)); Assert.IsTrue(TextHelpers.AtEscape(cs)); Assert.IsTrue(TextHelpers.AtUnicodeEscape(cs)); Assert.AreEqual('R', TextHelpers.DecodeCurrentChar(cs).Char); Assert.AreEqual(4, TextHelpers.DecodeCurrentChar(cs).EncodedLength); Assert.IsTrue(cs.Advance(4)); Assert.IsTrue(TextHelpers.AtEscape(cs)); Assert.IsFalse(TextHelpers.AtUnicodeEscape(cs)); Assert.AreEqual('l', TextHelpers.DecodeCurrentChar(cs).Char); Assert.AreEqual(2, TextHelpers.DecodeCurrentChar(cs).EncodedLength); Assert.IsTrue(cs.Advance(2)); Assert.IsFalse(TextHelpers.AtEscape(cs)); Assert.IsFalse(TextHelpers.AtUnicodeEscape(cs)); Assert.AreEqual(new DecodedChar('(', 1), TextHelpers.DecodeCurrentChar(cs)); Assert.AreEqual(@"uRl(foo.jpg)", TextHelpers.DecodeText(cs.TextProvider, 0, text.Length, forStringToken: false)); }
/// <summary> /// Checks file whitespace (typically Lint-type or style type checkers. /// </summary> /// <returns>A collection of validation errors</returns> public IReadOnlyCollection <IValidationError> ValidateWhitespace(ITextProvider tp) { if (!_settings.LintOptions.Enabled) { return(Enumerable.Empty <IValidationError>().ToList()); } var warnings = _whitespaceFileCheckers.SelectMany(c => c(tp, _settings.LintOptions)).ToList(); var cs = new CharacterStream(tp); while (!cs.IsEndOfStream()) { if (cs.IsWhiteSpace()) { // Unrolled since most return nulls. foreach (var c in _whitespaceCharCheckers) { var result = c(cs, _settings.LintOptions); if (result != null) { warnings.Add(result); } } } cs.MoveToNextChar(); } return(warnings.ToList()); }
public void ReadAndPeakAndRead() { byte[] stringBytes = Encoding.UTF8.GetBytes( "123-.27'Test'"); Token tok = Token.Empty; using (MemoryStream ms = new MemoryStream(stringBytes)) { using (StreamReader sr = new StreamReader(ms)) { CharacterStream cs = new CharacterStream(sr); TokenStream ts = new TokenStream(cs); Assert.IsTrue( ts.Read(out tok), "Failed to read token"); AssertToken(tok, Tokens.Number, "123"); Assert.IsTrue( ts.Peek(out tok), "Failed to read token"); AssertToken(tok, Tokens.Number, "-.27"); Assert.IsTrue( ts.Read(out tok), "Failed to read token"); AssertToken(tok, Tokens.Number, "-.27"); Assert.IsTrue( ts.Read(out tok), "Failed to read token"); AssertToken(tok, Tokens.String, "Test"); } } }
internal bool IsPossibleNumber() { // It is hard to tell in 12 +1 if it is a sum of numbers or // a sequence. If operator or punctiation (comma, semicolon) // precedes the sign then sign is part of the number. // Note that if preceding token is one of the function () // or indexing braces [] then sign is an operator like in x[1]+2. // In other cases plus or minus is also a start of the operator. // It important that in partial tokenization classifier removes // enough tokens so tokenizer can start its work early enough // in the stream to be able to figure out numbers properly. if (_cs.CurrentChar == '-' || _cs.CurrentChar == '+') { // Next character must be decimal or a dot otherwise // it is not a number. No whitespace is allowed. if (CharacterStream.IsDecimal(_cs.NextChar) || _cs.NextChar == '.') { // Check what previous token is, if any if (_tokens.Count == 0) { // At the start of the file this can only be a number return(true); } var previousToken = _tokens[_tokens.Count - 1]; if (previousToken.TokenType == RTokenType.OpenBrace || previousToken.TokenType == RTokenType.OpenSquareBracket || previousToken.TokenType == RTokenType.Comma || previousToken.TokenType == RTokenType.Semicolon || previousToken.TokenType == RTokenType.Operator) { return(true); } } return(false); } // R only supports 0xABCD. x0A is not legal. if (_cs.CurrentChar == '0' && _cs.NextChar == 'x') { // Hex humber like 0xA1BC return(true); } if (_cs.IsDecimal()) { return(true); } if (_cs.CurrentChar == '.' && CharacterStream.IsDecimal(_cs.NextChar)) { return(true); } return(false); }
private static bool IsValidDouble(CharacterStream cs, int start, int end) { int len = end - start; string s = cs.GetSubstringAt(start, len); double n; return(Double.TryParse(s, NumberStyles.Number | NumberStyles.AllowExponent, CultureInfo.InvariantCulture, out n)); }
internal virtual void InitializeTokenizer(ITextProvider textProvider, int start, int length) { Debug.Assert(start >= 0 && length >= 0 && start + length <= textProvider.Length); _cs = new CharacterStream(textProvider); _cs.Position = start; _tokens = new TextRangeCollection <T>(); }
/// <summary> /// Given candidate returns length of operator /// or zero if character sequence is not an operator. /// </summary> public static int OperatorLength(CharacterStream cs) { // // http://stat.ethz.ch/R-manual/R-patched/library/base/html/Syntax.html // // Longest first return(GetNCharOperatorLength(cs)); }
private static IValidationError TabCheck(CharacterStream cs, LintOptions options) { if (options.NoTabs && cs.CurrentChar == '\t' && cs.Position < cs.Length) { // // no_tab_linter: check that only spaces are used, never tabs return(new ValidationWarning(new TextRange(cs.Position, 1), Resources.Lint_Tabs, ErrorLocation.Token)); } return(null); }
public async Task <bool> RunAsync() { bool result = true; { byte[] buffer = Encoding.UTF8.GetBytes("hello world"); MemoryStream memory_stream = new MemoryStream(); IInputStream input_stream = new InputStream(); memory_stream.Write(buffer, 0, buffer.Count()); input_stream.Initialize(memory_stream); ICharacterStream cStream = new CharacterStream(); cStream.Initialize(input_stream); ICharacter c1 = await cStream.Get(); result = result && (null != c1); // ICharacterStream never returns null ICharacter ch = null; // verify that Get uses a pushed character cStream.Push(c1); ch = await cStream.Get(); result = result && (c1.Info == ch.Info); while (ch.Kind != CharKind.NULL) { ch = await cStream.Get(); } ch = await cStream.Get(); // verify that Get() continues to get the NULL result = result && (ch.Kind == CharKind.NULL); memory_stream.Dispose(); } { byte[] buffer = Encoding.UTF8.GetBytes("\n"); MemoryStream memory_stream = new MemoryStream(); IInputStream input_stream = new InputStream(); memory_stream.Write(buffer, 0, buffer.Count()); input_stream.Initialize(memory_stream); ICharacterStream cStream = new CharacterStream(); cStream.Initialize(input_stream); ICharacter ch = await cStream.Get(); result = result && (ch.Kind == CharKind.CARRAGERETURN); ch = await cStream.Get(); result = result && (ch.Kind == CharKind.NULL); memory_stream.Dispose(); } return(result); }
private static IValidationError TrailingWhitespaceCheck(CharacterStream cs, LintOptions options) { if (options.TrailingWhitespace) { if (cs.IsWhiteSpace() && !cs.CurrentChar.IsLineBreak() && (cs.NextChar.IsLineBreak() || cs.Position == cs.Length - 1)) { // trailing_whitespace_linter: check there are no trailing whitespace characters. return(new ValidationWarning(new TextRange(cs.Position, 1), Resources.Lint_TrailingWhitespace, ErrorLocation.Token)); } } return(null); }
private static bool IsOpenBraceFollow(CharacterStream cs, int position) { for (var i = position; i < cs.Length; i++) { if (!char.IsWhiteSpace(cs[i])) { return(cs[i] == '('); } } return(false); }
public void TestNext() { var t = new Thing(); t.DoThing(); var str = "foo bar"; var bytes = Encoding.UTF8.GetBytes(str); var stream = new MemoryStream(bytes); var streamReader = new StreamReader(stream); var charStream = new CharacterStream(streamReader); }
public void PeekCharactersWithOffset() { string stringToRead = "quick"; CharacterStream fcs = new CharacterStream(_reader); char[] charsPeeked = fcs.Peek(4, stringToRead.Length); Assert.IsTrue( charsPeeked != null && charsPeeked.Length == stringToRead.Length && string.CompareOrdinal( new string(charsPeeked), stringToRead) == 0); }
public void ReadCharacters() { string stringToRead = "The quick"; CharacterStream fcs = new CharacterStream(_reader); char[] charsRead = fcs.Read(stringToRead.Length); Assert.IsTrue( charsRead != null && charsRead.Length == stringToRead.Length && string.CompareOrdinal( new string(charsRead), stringToRead) == 0); }
public void InitStream(ITextProvider textProvider, int start, int estimatedLength, bool keepWhiteSpace) { CS = new CharacterStream(textProvider) { Position = start }; // Guess how many tokens will be allocated (5 was the average token length of the 090 test files) const int averageTokenLength = 5; int tokenCountGuess = estimatedLength / averageTokenLength; Tokens = new TokenList(tokenCountGuess); KeepWhiteSpace = keepWhiteSpace; _streamToken = 0; }
internal static int HandleHex(CharacterStream cs, int start) { while (CharacterStream.IsHex(cs.CurrentChar)) { cs.MoveToNextChar(); } // TODO: handle C99 floating point hex syntax like 0x1.1p-2 if (cs.CurrentChar == 'L') { cs.MoveToNextChar(); } return(cs.Position - start); }
/// <summary> /// Handle generic comment. Comment goes to the end of the line. /// </summary> public static void HandleEolComment(CharacterStream cs, Action <int, int> addToken) { int start = cs.Position; while (!cs.IsEndOfStream() && !cs.IsAtNewLine()) { cs.MoveToNextChar(); } int length = cs.Position - start; if (length > 0) { addToken(start, length); } }
public void PeekWithOffsetAndReadCharacters() { string stringToRead = "quick"; CharacterStream fcs = new CharacterStream(_reader); char[] charsPeeked = fcs.Peek(4, stringToRead.Length); Assert.IsTrue( charsPeeked != null && charsPeeked.Length == stringToRead.Length && string.CompareOrdinal( new string(charsPeeked), stringToRead) == 0); stringToRead = "The quick"; char[] charsRead = fcs.Read(stringToRead.Length); Assert.IsTrue( charsRead != null && charsRead.Length == stringToRead.Length && string.CompareOrdinal( new string(charsRead), stringToRead) == 0); charsRead = null; stringToRead = " fox jumped"; charsRead = fcs.Read(stringToRead.Length); Assert.IsTrue( charsRead != null && charsRead.Length == stringToRead.Length && string.CompareOrdinal( new string(charsRead), stringToRead) == 0); charsPeeked = null; stringToRead = " over the lazy"; charsPeeked = fcs.Peek(stringToRead.Length); Assert.IsTrue( charsPeeked != null && charsPeeked.Length == stringToRead.Length && string.CompareOrdinal( new string(charsPeeked), stringToRead) == 0); }
internal static int HandleExponent(CharacterStream cs, int start) { Debug.Assert(cs.CurrentChar == 'E' || cs.CurrentChar == 'e'); bool hasSign = false; cs.MoveToNextChar(); if (cs.IsWhiteSpace() || cs.IsEndOfStream()) { // 0.1E or 1e return(0); } if (cs.CurrentChar == '-' || cs.CurrentChar == '+') { hasSign = true; cs.MoveToNextChar(); } int digitsStart = cs.Position; // collect decimals while (cs.IsDecimal()) { cs.MoveToNextChar(); } if (hasSign && digitsStart == cs.Position) { return(0); // NaN like 1.0E- } // Technically if letter or braces follows this is not // a number but we'll leave it alone for now. // TODO: This code is not language specific and yet it currently // handles complex 'i' as well as R-specific 'L' suffix. // Ideally this needs to be extended in a way so language-specific // tokenizer can specify options or control number format. if (char.IsLetter(cs.CurrentChar) && cs.CurrentChar != 'i' && cs.CurrentChar != 'L') { return(0); } return(cs.Position - start); }
private static int Get3CharOrShorterOperatorLength(CharacterStream cs) { if (cs.DistanceFromEnd >= 3) { string threeLetterCandidate = cs.GetSubstringAt(cs.Position, 3); if (threeLetterCandidate.Length == 3) { int index = Array.BinarySearch <string>(_threeChars, threeLetterCandidate); if (index >= 0) { return(3); } } } return(Get2CharOrShorterOperatorLength(cs)); }
internal static int Get2CharOrShorterOperatorLength(CharacterStream cs) { if (cs.DistanceFromEnd >= 2) { string twoLetterCandidate = cs.GetSubstringAt(cs.Position, 2); if (twoLetterCandidate.Length == 2) { int index = Array.BinarySearch <string>(_twoChars, twoLetterCandidate); if (index >= 0) { return(2); } } } return(GetSingleCharOperatorLength(cs.CurrentChar)); }