public static string NormalizeWhitespace(this string s) { if (s == null || s.Length == 0) { return(s); } var cs = new CharacterStream(new TextStream(s)); var sb = new StringBuilder(); while (!cs.IsEndOfStream()) { var current = cs.Position; cs.SkipWhitespace(); if (cs.Position - current > 0) { sb.Append(' '); } while (!cs.IsEndOfStream() && !cs.IsWhiteSpace()) { sb.Append(cs.CurrentChar); cs.MoveToNextChar(); } } return(sb.ToString().Trim()); }
public static void SkipIdentifier(CharacterStream cs, Func <CharacterStream, bool> isIdentifierLeadCharacter, Func <CharacterStream, bool> isIdentifierCharacter) { if (!isIdentifierLeadCharacter(cs)) { return; } if (cs.IsEndOfStream()) { return; } while (!cs.IsWhiteSpace()) { if (!isIdentifierCharacter(cs)) { break; } if (!cs.MoveToNextChar()) { break; } } }
internal static void SkipWhitespace(CharacterStream cs) { while (!cs.IsEndOfStream() && cs.IsWhiteSpace()) { cs.MoveToNextChar(); } }
private static void HandleString(int separatorLength, CharacterStream cs, Func <CharacterStream, bool> terminatorCheck) { cs.Advance(separatorLength); if (!cs.IsEndOfStream()) { while (true) { if (terminatorCheck(cs)) { cs.Advance(separatorLength); break; } if (cs.CurrentChar == '\\') { cs.MoveToNextChar(); } if (!cs.MoveToNextChar()) { break; } } } }
/// <summary> /// Handles string sequence with escapes /// </summary> /// <param name="openQuote"></param> public static void HandleString(char openQuote, CharacterStream cs, Action <int, int> addToken) { int start = cs.Position; cs.MoveToNextChar(); if (!cs.IsEndOfStream()) { while (true) { if (cs.CurrentChar == openQuote) { cs.MoveToNextChar(); break; } if (cs.CurrentChar == '\\') { cs.MoveToNextChar(); } if (!cs.MoveToNextChar()) { break; } } } int length = cs.Position - start; if (length > 0) { addToken(start, length); } }
/// <summary> /// Checks file whitespace (typically Lint-type or style type checkers. /// </summary> /// <returns>A collection of validation errors</returns> public IReadOnlyCollection <IValidationError> ValidateWhitespace(ITextProvider tp) { if (!_linterEnabled) { return(Enumerable.Empty <IValidationError>().ToList()); } var warnings = _whitespaceFileCheckers .SelectMany(c => c(tp, _settings.LintOptions, _projectedBuffer)) .ToList(); var cs = new CharacterStream(tp); while (!cs.IsEndOfStream()) { if (cs.IsWhiteSpace()) { // Unrolled since most return nulls. warnings.AddRange(_whitespaceCharCheckers .Select(c => c(cs, _settings.LintOptions)) .Where(result => result != null)); } cs.MoveToNextChar(); } return(warnings.ToList()); }
/// <summary> /// Checks file whitespace (typically Lint-type or style type checkers. /// </summary> /// <returns>A collection of validation errors</returns> public IReadOnlyCollection <IValidationError> ValidateWhitespace(ITextProvider tp) { if (!_settings.LintOptions.Enabled) { return(Enumerable.Empty <IValidationError>().ToList()); } var warnings = _whitespaceFileCheckers.SelectMany(c => c(tp, _settings.LintOptions)).ToList(); var cs = new CharacterStream(tp); while (!cs.IsEndOfStream()) { if (cs.IsWhiteSpace()) { // Unrolled since most return nulls. foreach (var c in _whitespaceCharCheckers) { var result = c(cs, _settings.LintOptions); if (result != null) { warnings.Add(result); } } } cs.MoveToNextChar(); } return(warnings.ToList()); }
public virtual IReadOnlyTextRangeCollection <T> Tokenize(ITextProvider textProvider, int start, int length, bool excludePartialTokens) { var end = start + length; InitializeTokenizer(textProvider, start, length); while (!_cs.IsEndOfStream()) { // Keep on adding tokens AddNextToken(); if (_cs.Position >= end) { break; } } if (excludePartialTokens) { // Exclude tokens that are beyond the specified range int i; for (i = _tokens.Count - 1; i >= 0; i--) { if (_tokens[i].End <= end) { break; } } i++; if (i < _tokens.Count) { _tokens.RemoveRange(i, _tokens.Count - i); } } return(new ReadOnlyTextRangeCollection <T>(_tokens)); }
/// <summary> /// Handle generic comment. Comment goes to the end of the line. /// </summary> public static void HandleEolComment(CharacterStream cs, Action <int, int> addToken) { int start = cs.Position; while (!cs.IsEndOfStream() && !cs.IsAtNewLine()) { cs.MoveToNextChar(); } int length = cs.Position - start; if (length > 0) { addToken(start, length); } }
internal static int HandleExponent(CharacterStream cs, int start) { Debug.Assert(cs.CurrentChar == 'E' || cs.CurrentChar == 'e'); bool hasSign = false; cs.MoveToNextChar(); if (cs.IsWhiteSpace() || cs.IsEndOfStream()) { // 0.1E or 1e return(0); } if (cs.CurrentChar == '-' || cs.CurrentChar == '+') { hasSign = true; cs.MoveToNextChar(); } int digitsStart = cs.Position; // collect decimals while (cs.IsDecimal()) { cs.MoveToNextChar(); } if (hasSign && digitsStart == cs.Position) { return(0); // NaN like 1.0E- } // Technically if letter or braces follows this is not // a number but we'll leave it alone for now. // TODO: This code is not language specific and yet it currently // handles complex 'i' as well as R-specific 'L' suffix. // Ideally this needs to be extended in a way so language-specific // tokenizer can specify options or control number format. if (char.IsLetter(cs.CurrentChar) && cs.CurrentChar != 'i' && cs.CurrentChar != 'L') { return(0); } return(cs.Position - start); }
public IReadOnlyTextRangeCollection <BraceToken> Tokenize(string text) { var cs = new CharacterStream(text); var tokens = new TextRangeCollection <BraceToken>(); while (!cs.IsEndOfStream()) { BraceTokenType?t = null; switch (cs.CurrentChar) { case '(': t = OpenBrace; break; case ')': t = CloseBrace; break; case '{': t = OpenCurly; break; case '}': t = CloseCurly; break; case '[': t = OpenBracket; break; case ']': t = CloseBracket; break; } if (t != null) { tokens.Add(new BraceToken(cs.Position, 1, t.Value)); } cs.MoveToNextChar(); } return(new ReadOnlyTextRangeCollection <BraceToken>(tokens)); }
private static int GetNCharOperatorLength(CharacterStream cs) { // R allows user-defined infix operators. These have the form of // a string of characters delimited by the ‘%’ character. The string // can contain any printable character except ‘%’. if (cs.CurrentChar == '%' && !char.IsWhiteSpace(cs.NextChar)) { // In case of broken or partially typed operators // make sure we terminate at whitespace or end of the line // so in 'x <- y % z' '% z' is not an operator. int start = cs.Position; int length; cs.MoveToNextChar(); while (!cs.IsEndOfStream() && !cs.IsWhiteSpace()) { if (cs.CurrentChar == '%') { cs.MoveToNextChar(); length = cs.Position - start; cs.Position = start; return(length); } if (cs.IsAtNewLine()) { // x <- y %abcd cs.Position = start; return(1); } cs.MoveToNextChar(); } } return(Get3CharOrShorterOperatorLength(cs)); }