public static bool SkipUnicodeEscape(CharacterStream cs) { // Skip: \012345 if (AtUnicodeEscape(cs)) { cs.Advance(2); // Skip "\X" // Skip up to six hex characters for (int count = 1; count < 6; count++, cs.Advance(1)) { if (!TextHelper.IsHexDigit(cs.CurrentChar)) { break; } } // Eat a single space or line after the unicode character if (TextHelper.IsNewLine(cs.CurrentChar)) { SkipNewLine(cs); } else if (TextHelper.IsWhiteSpace(cs.CurrentChar)) { cs.Advance(1); } return(true); } return(false); }
/// <summary> /// This is similar to string.Substring, but it deals with encoded unicode chars and escaped chars. /// Escaped line breaks are only valid within strings, so set "forStringToken" to true for strings. /// </summary> public static string DecodeText(ITextProvider textProvider, int start, int length, bool forStringToken) { if (RangeCouldContainEncodedChars(textProvider, start, length)) { // Need to carefully investigate every character and decode it System.Text.StringBuilder sb = new System.Text.StringBuilder(length); CharacterStream cs = new CharacterStream(textProvider); for (cs.Position = start; cs.Position < start + length && !cs.IsAtEnd;) { if (forStringToken && AtEscapedNewLine(cs)) { // Ignore this line break within a string cs.Advance(1); SkipNewLine(cs); } else { DecodedChar decodedChar = TextHelpers.DecodeCurrentChar(cs); if (decodedChar.RequiresUtf32) { // http://www.w3.org/TR/CSS21/syndata.html#characters // // If the number is outside the range allowed by Unicode (e.g., "\110000" is above the maximum 10FFFF // allowed in current Unicode), the UA may replace the escape with the "replacement character" (U+FFFD). int utf32 = decodedChar.CharUtf32; if ((utf32 < 0) || (utf32 > 0x10FFFF)) { utf32 = 0xFFFD; } sb.Append(char.ConvertFromUtf32(utf32)); } else { sb.Append(decodedChar.Char); } cs.Advance(decodedChar.EncodedLength); } } return(sb.ToString()); } else { // Nothing can possibly be encoded, so return the plain string return(textProvider.GetText(start, length)); } }
public static void SkipToEOL(CharacterStream cs) { while (!(cs.IsAtEnd || TextHelper.IsNewLine(cs.CurrentChar))) { cs.Advance(1); } }
public static bool SkipWhitespaceReverse(CharacterStream cs) { int start = cs.Position; while (TextHelper.IsWhiteSpace(cs.Peek(-1))) { cs.Advance(-1); } return(start != cs.Position); }
public static bool SkipWhitespace(CharacterStream cs) { int start = cs.Position; while (TextHelper.IsWhiteSpace(cs.CurrentChar)) { cs.Advance(1); } return(start != cs.Position); }
public static bool SkipNewLine(CharacterStream cs) { switch (cs.CurrentChar) { case '\r': cs.Advance(1); if (cs.CurrentChar == '\n') { // A "\r\n" pair is always treated as a single line break cs.Advance(1); } return(true); case '\n': case '\f': cs.Advance(1); return(true); default: return(false); } }
public static bool SkipEscape(CharacterStream cs) { // Skips: // "\abc " - Unicode escape (notice that the trailing space is included) // "\X" - Any other character escape (except line breaks) if (AtUnicodeEscape(cs)) { return(SkipUnicodeEscape(cs)); } else if (AtEscape(cs)) { cs.Advance(2); return(true); } return(false); }