public virtual void TestAppendCodePointTooLow() { var sb = new StringBuilder("foo bar"); int codePoint = Character.MIN_CODE_POINT - 1; Assert.Throws<ArgumentException>(() => sb.AppendCodePoint(codePoint)); }
public virtual void TestAppendCodePointUTF16Surrogates() { var sb = new StringBuilder("foo bar"); int codePoint = 176129; // '\uD86C', '\uDC01' (𫀁) sb.AppendCodePoint(codePoint); Assert.AreEqual("foo bar𫀁", sb.ToString()); }
public virtual void TestAppendCodePointUnicode() { var sb = new StringBuilder("foo bar"); int codePoint = 3594; // ช sb.AppendCodePoint(codePoint); Assert.AreEqual("foo barช", sb.ToString()); }
public virtual void TestAppendCodePointBmp() { var sb = new StringBuilder("foo bar"); int codePoint = 97; // a sb.AppendCodePoint(codePoint); Assert.AreEqual("foo bara", sb.ToString()); }
private unsafe static void AppendHexadecimalCharacterReference(StringBuilder sb, char* i, char* end) { int cp = 0; do { int base16v; if (*i <= '9') base16v = *i - '0'; else if (*i <= 'f') base16v = *i - 'a' + 10; else base16v = *i - 'A' + 10; cp = cp * 16 + base16v; if (0x10FFFF < cp) { throw new ArgumentException(); } } while (++i < end); if (!IsCharacterReferenceValid(cp)) throw new ArgumentException(); sb.AppendCodePoint(cp); }
private unsafe static void AppendDecimalCharacterReference(StringBuilder sb, char* i, char* end) { int cp = 0; do { cp = cp * 10 + (*i - '0'); if (0x10FFFF < cp) { throw new ArgumentException(); } } while (++i < end); if (!IsCharacterReferenceValid(cp)) throw new ArgumentException(); sb.AppendCodePoint(cp); }
public static string Unescape(string str) { string rval = str; if (str != null) { Matcher m = UcharMatched.Matcher(str); while (m.Find()) { string uni = m.Group(0); if (m.Group(1) == null) { string hex = m.Group(2) != null ? m.Group(2) : m.Group(3); int v = System.Convert.ToInt32(hex, 16); // hex = // hex.replaceAll("^(?:00)+", // ""); if (v > unchecked((int)(0xFFFF))) { // deal with UTF-32 // Integer v = Integer.parseInt(hex, 16); int vt = v - unchecked((int)(0x10000)); int vh = vt >> 10; int v1 = vt & unchecked((int)(0x3FF)); int w1 = unchecked((int)(0xD800)) + vh; int w2 = unchecked((int)(0xDC00)) + v1; StringBuilder b = new StringBuilder(); b.AppendCodePoint(w1); b.AppendCodePoint(w2); uni = b.ToString(); } else { uni = char.ToString((char)v); } } else { char c = m.Group(1)[0]; switch (c) { case 'b': { uni = "\b"; break; } case 'n': { uni = "\n"; break; } case 't': { uni = "\t"; break; } case 'f': { uni = "\f"; break; } case 'r': { uni = "\r"; break; } case '\'': { uni = "'"; break; } case '\"': { uni = "\""; break; } case '\\': { uni = "\\"; break; } default: { // do nothing continue; } } } string pat = Pattern.Quote(m.Group(0)); string x = JsonLD.JavaCompat.ToHexString(uni[0]); rval = rval.Replace(pat, uni); } } return rval; }
/// <summary>Unescapes a string that uses C-style escape sequences, e.g. /// "\\\n\\\r" becomes "\n\r".</summary> /// <param name="encountered">Returns information about whether escape /// sequences were encountered, and which categories.</param> /// <param name="removeUnnecessaryBackslashes">Causes the backslash before /// an unrecognized escape sequence to be removed, e.g. "\z" => "z".</param> /// <remarks>See <see cref="UnescapeChar(ref UString, ref EscapeC)"/> for details.</remarks> public static StringBuilder UnescapeCStyle(UString s, out EscapeC encountered, bool removeUnnecessaryBackslashes = false) { encountered = 0; StringBuilder @out = new StringBuilder(s.Length); while (s.Length > 0) { EscapeC encounteredHere = 0; int c = UnescapeChar(ref s, ref encounteredHere); encountered |= encounteredHere; if (removeUnnecessaryBackslashes && (encounteredHere & EscapeC.Unrecognized) != 0) { Debug.Assert(c == '\\'); continue; } @out.AppendCodePoint(c); } return @out; }
/* map bits to unicode codepoints */ private static String MapInt(int[] codePointTable, int i) { StringBuilder sb = new StringBuilder(); String binary = Number.ToBinaryString(i); for (int j = 0; j < binary.Length; j++) sb.AppendCodePoint(codePointTable[binary[j] - '0']); return sb.toString(); }
/// <summary>Writes a character <c>c</c> to a StringBuilder, either as a normal /// character or as a C-style escape sequence.</summary> /// <param name="flags">Specifies which characters should be escaped.</param> /// <param name="quoteType">Specifies a character that should always be /// escaped (typically one of <c>' " `</c>)</param> /// <returns>true if an escape sequence was emitted, false if not.</returns> /// <remarks><see cref="EscapeC.HasLongEscape"/> can be used to force a 6-digit /// unicode escape; this may be needed if the next character after this one /// is a digit.</remarks> public static bool EscapeCStyle(int c, StringBuilder @out, EscapeC flags = EscapeC.Default, char quoteType = '\0') { for(;;) { if (c >= 128) { if ((flags & EscapeC.NonAscii) != 0) { EscapeU(c, @out, flags); } else if (c >= 0xDC00) { if ((flags & EscapeC.UnicodeNonCharacters) != 0 && ( c >= 0xFDD0 && c <= 0xFDEF || // 0xFDD0...0xFDEF (c & 0xFFFE) == 0xFFFE) || // 0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF, etc. (c & 0xFC00) == 0xDC00) { // 0xDC00...0xDCFF EscapeU(c, @out, flags); } else if ((flags & EscapeC.UnicodePrivateUse) != 0 && ( c >= 0xE000 && c <= 0xF8FF || c >= 0xF0000 && c <= 0xFFFFD || c >= 0x100000 && c <= 0x10FFFD)) { EscapeU(c, @out, flags); } else break; } else break; } else if (c < 32) { if (c == '\n') @out.Append(@"\n"); else if (c == '\r') @out.Append(@"\r"); else if (c == '\0') @out.Append(@"\0"); else { if ((flags & EscapeC.ABFV) != 0) { if (c == '\a') { // 7 (alert) @out.Append(@"\a"); return true; } if (c == '\b') { // 8 (backspace) @out.Append(@"\b"); return true; } if (c == '\f') { // 12 (form feed) @out.Append(@"\f"); return true; } if (c == '\v') { // 11 (vertical tab) @out.Append(@"\v"); return true; } } if ((flags & EscapeC.Control) != 0) { if (c == '\t') @out.Append(@"\t"); else EscapeU(c, @out, flags); } else @out.Append(c); } } else if (c == '\"' && (flags & EscapeC.DoubleQuotes) != 0) { @out.Append("\\\""); } else if (c == '\'' && (flags & EscapeC.SingleQuotes) != 0) { @out.Append("\\'"); } else if (c == '\\') @out.Append(@"\\"); else break; return true; } if (c == quoteType) { @out.Append('\\'); @out.Append((char)c); return true; } else @out.AppendCodePoint(c); return false; }