public virtual void TestAppendCodePointTooLow()
        {
            var sb = new StringBuilder("foo bar");
            int codePoint = Character.MIN_CODE_POINT - 1;

            Assert.Throws<ArgumentException>(() => sb.AppendCodePoint(codePoint));
        }
        public virtual void TestAppendCodePointUTF16Surrogates()
        {
            var sb = new StringBuilder("foo bar");
            int codePoint = 176129; // '\uD86C', '\uDC01' (𫀁)

            sb.AppendCodePoint(codePoint);

            Assert.AreEqual("foo bar𫀁", sb.ToString());
        }
        public virtual void TestAppendCodePointUnicode()
        {
            var sb = new StringBuilder("foo bar");
            int codePoint = 3594; // ช

            sb.AppendCodePoint(codePoint);

            Assert.AreEqual("foo barช", sb.ToString());
        }
        public virtual void TestAppendCodePointBmp()
        {
            var sb = new StringBuilder("foo bar");
            int codePoint = 97; // a

            sb.AppendCodePoint(codePoint);

            Assert.AreEqual("foo bara", sb.ToString());
        }
Example #5
0
 private unsafe static void AppendHexadecimalCharacterReference(StringBuilder sb, char* i, char* end)
 {
     int cp = 0;
     do
     {
         int base16v;
         if (*i <= '9') base16v = *i - '0';
         else if (*i <= 'f') base16v = *i - 'a' + 10;
         else base16v = *i - 'A' + 10;
         cp = cp * 16 + base16v;
         if (0x10FFFF < cp)
         {
             throw new ArgumentException();
         }
     } while (++i < end);
     if (!IsCharacterReferenceValid(cp)) throw new ArgumentException();
     sb.AppendCodePoint(cp);
 }
Example #6
0
 private unsafe static void AppendDecimalCharacterReference(StringBuilder sb, char* i, char* end)
 {
     int cp = 0;
     do
     {
         cp = cp * 10 + (*i - '0');
         if (0x10FFFF < cp)
         {
             throw new ArgumentException();
         }
     } while (++i < end);
     if (!IsCharacterReferenceValid(cp)) throw new ArgumentException();
     sb.AppendCodePoint(cp);
 }
Example #7
0
        public static string Unescape(string str)
        {
            string rval = str;
            if (str != null)
            {
                Matcher m = UcharMatched.Matcher(str);
                while (m.Find())
                {
                    string uni = m.Group(0);
                    if (m.Group(1) == null)
                    {
                        string hex = m.Group(2) != null ? m.Group(2) : m.Group(3);
                        int v = System.Convert.ToInt32(hex, 16);
                        // hex =
                        // hex.replaceAll("^(?:00)+",
                        // "");
                        if (v > unchecked((int)(0xFFFF)))
                        {
                            // deal with UTF-32
                            // Integer v = Integer.parseInt(hex, 16);
                            int vt = v - unchecked((int)(0x10000));
                            int vh = vt >> 10;
                            int v1 = vt & unchecked((int)(0x3FF));
                            int w1 = unchecked((int)(0xD800)) + vh;
                            int w2 = unchecked((int)(0xDC00)) + v1;
                            StringBuilder b = new StringBuilder();
                            b.AppendCodePoint(w1);
                            b.AppendCodePoint(w2);
                            uni = b.ToString();
                        }
                        else
                        {
                            uni = char.ToString((char)v);
                        }
                    }
                    else
                    {
                        char c = m.Group(1)[0];
                        switch (c)
                        {
                            case 'b':
                            {
                                uni = "\b";
                                break;
                            }

                            case 'n':
                            {
                                uni = "\n";
                                break;
                            }

                            case 't':
                            {
                                uni = "\t";
                                break;
                            }

                            case 'f':
                            {
                                uni = "\f";
                                break;
                            }

                            case 'r':
                            {
                                uni = "\r";
                                break;
                            }

                            case '\'':
                            {
                                uni = "'";
                                break;
                            }

                            case '\"':
                            {
                                uni = "\"";
                                break;
                            }

                            case '\\':
                            {
                                uni = "\\";
                                break;
                            }

                            default:
                            {
                                // do nothing
                                continue;
                            }
                        }
                    }
                    string pat = Pattern.Quote(m.Group(0));
                    string x = JsonLD.JavaCompat.ToHexString(uni[0]);
                    rval = rval.Replace(pat, uni);
                }
            }
            return rval;
        }
Example #8
0
		/// <summary>Unescapes a string that uses C-style escape sequences, e.g. 
		/// "\\\n\\\r" becomes "\n\r".</summary>
		/// <param name="encountered">Returns information about whether escape 
		/// sequences were encountered, and which categories.</param>
		/// <param name="removeUnnecessaryBackslashes">Causes the backslash before 
		/// an unrecognized escape sequence to be removed, e.g. "\z" => "z".</param>
		/// <remarks>See <see cref="UnescapeChar(ref UString, ref EscapeC)"/> for details.</remarks>
		public static StringBuilder UnescapeCStyle(UString s, out EscapeC encountered, bool removeUnnecessaryBackslashes = false)
		{
			encountered = 0;
			StringBuilder @out = new StringBuilder(s.Length);
			while (s.Length > 0) {
				EscapeC encounteredHere = 0;
				int c = UnescapeChar(ref s, ref encounteredHere);
				encountered |= encounteredHere;
				if (removeUnnecessaryBackslashes && (encounteredHere & EscapeC.Unrecognized) != 0) {
					Debug.Assert(c == '\\');
					continue;
				}
				@out.AppendCodePoint(c);
			}
			return @out;
		}
Example #9
0
 /* map bits to unicode codepoints */
 private static String MapInt(int[] codePointTable, int i)
 {
     StringBuilder sb = new StringBuilder();
     String binary = Number.ToBinaryString(i);
     for (int j = 0; j < binary.Length; j++)
         sb.AppendCodePoint(codePointTable[binary[j] - '0']);
     return sb.toString();
 }
Example #10
0
		/// <summary>Writes a character <c>c</c> to a StringBuilder, either as a normal 
		/// character or as a C-style escape sequence.</summary>
		/// <param name="flags">Specifies which characters should be escaped.</param>
		/// <param name="quoteType">Specifies a character that should always be 
		/// escaped (typically one of <c>' " `</c>)</param>
		/// <returns>true if an escape sequence was emitted, false if not.</returns>
		/// <remarks><see cref="EscapeC.HasLongEscape"/> can be used to force a 6-digit 
		/// unicode escape; this may be needed if the next character after this one 
		/// is a digit.</remarks>
		public static bool EscapeCStyle(int c, StringBuilder @out, EscapeC flags = EscapeC.Default, char quoteType = '\0')
		{
			for(;;) {
				if (c >= 128) {
					if ((flags & EscapeC.NonAscii) != 0) {
						EscapeU(c, @out, flags);
					} else if (c >= 0xDC00) {
						if ((flags & EscapeC.UnicodeNonCharacters) != 0 && (
							c >= 0xFDD0 && c <= 0xFDEF || // 0xFDD0...0xFDEF 
							(c & 0xFFFE) == 0xFFFE) || // 0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF, etc.
							(c & 0xFC00) == 0xDC00) { // 0xDC00...0xDCFF 
							EscapeU(c, @out, flags);
						} else if ((flags & EscapeC.UnicodePrivateUse) != 0 && (
							c >= 0xE000 && c <= 0xF8FF ||
							c >= 0xF0000 && c <= 0xFFFFD ||
							c >= 0x100000 && c <= 0x10FFFD)) {
							EscapeU(c, @out, flags);
						} else
							break;
					} else
						break;
				} else if (c < 32) {
					if (c == '\n')
						@out.Append(@"\n");
					else if (c == '\r')
						@out.Append(@"\r");
					else if (c == '\0')
						@out.Append(@"\0");
					else {
						if ((flags & EscapeC.ABFV) != 0) {
							if (c == '\a') { // 7 (alert)
								@out.Append(@"\a");
								return true;
							}
							if (c == '\b') { // 8 (backspace)
								@out.Append(@"\b");
								return true;
							}
							if (c == '\f') { // 12 (form feed)
								@out.Append(@"\f");
								return true;
							}
							if (c == '\v') { // 11 (vertical tab)
								@out.Append(@"\v");
								return true;
							}
						}
						if ((flags & EscapeC.Control) != 0) {
							if (c == '\t')
								@out.Append(@"\t");
							else
								EscapeU(c, @out, flags);
						} else
							@out.Append(c);
					}
				} else if (c == '\"' && (flags & EscapeC.DoubleQuotes) != 0) {
					@out.Append("\\\"");
				} else if (c == '\'' && (flags & EscapeC.SingleQuotes) != 0) {
					@out.Append("\\'");
				} else if (c == '\\')
					@out.Append(@"\\");
				else
					break;
				return true;
			}

			if (c == quoteType) {
				@out.Append('\\');
				@out.Append((char)c);
				return true;
			} else 
				@out.AppendCodePoint(c);
			return false;
		}