/// <summary>Encode characters from this String, starting at offset /// for length characters. Returns the number of bytes /// written to bytesOut. /// </summary> public static void UTF16toUTF8(System.String s, int offset, int length, UTF8Result result) { int end = offset + length; byte[] out_Renamed = result.result; int upto = 0; for (int i = offset; i < end; i++) { int code = (int) s[i]; if (upto + 4 > out_Renamed.Length) { byte[] newOut = new byte[2 * out_Renamed.Length]; System.Diagnostics.Debug.Assert(newOut.Length >= upto + 4); Array.Copy(out_Renamed, 0, newOut, 0, upto); result.result = out_Renamed = newOut; } if (code < 0x80) out_Renamed[upto++] = (byte) code; else if (code < 0x800) { out_Renamed[upto++] = (byte) (0xC0 | (code >> 6)); out_Renamed[upto++] = (byte) (0x80 | (code & 0x3F)); } else if (code < 0xD800 || code > 0xDFFF) { out_Renamed[upto++] = (byte) (0xE0 | (code >> 12)); out_Renamed[upto++] = (byte) (0x80 | ((code >> 6) & 0x3F)); out_Renamed[upto++] = (byte) (0x80 | (code & 0x3F)); } else { // surrogate pair // confirm valid high surrogate if (code < 0xDC00 && (i < end - 1)) { int utf32 = (int) s[i + 1]; // confirm valid low surrogate and write pair if (utf32 >= 0xDC00 && utf32 <= 0xDFFF) { utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF); i++; out_Renamed[upto++] = (byte) (0xF0 | (utf32 >> 18)); out_Renamed[upto++] = (byte) (0x80 | ((utf32 >> 12) & 0x3F)); out_Renamed[upto++] = (byte) (0x80 | ((utf32 >> 6) & 0x3F)); out_Renamed[upto++] = (byte) (0x80 | (utf32 & 0x3F)); continue; } } // replace unpaired surrogate or out-of-order low surrogate // with substitution character out_Renamed[upto++] = (byte) (0xEF); out_Renamed[upto++] = (byte) (0xBF); out_Renamed[upto++] = (byte) (0xBD); } } //assert matches(s, offset, length, out, upto); result.length = upto; }
/// <summary>Encode characters from a char[] source, starting at /// offset and stopping when the character 0xffff is seen. /// Returns the number of bytes written to bytesOut. /// </summary> public static void UTF16toUTF8(char[] source, int offset, UTF8Result result) { int upto = 0; int i = offset; byte[] out_Renamed = result.result; while (true) { int code = (int)source[i++]; if (upto + 4 > out_Renamed.Length) { byte[] newOut = new byte[2 * out_Renamed.Length]; System.Diagnostics.Debug.Assert(newOut.Length >= upto + 4); Array.Copy(out_Renamed, 0, newOut, 0, upto); result.result = out_Renamed = newOut; } if (code < 0x80) { out_Renamed[upto++] = (byte)code; } else if (code < 0x800) { out_Renamed[upto++] = (byte)(0xC0 | (code >> 6)); out_Renamed[upto++] = (byte)(0x80 | (code & 0x3F)); } else if (code < 0xD800 || code > 0xDFFF) { if (code == 0xffff) { // END break; } out_Renamed[upto++] = (byte)(0xE0 | (code >> 12)); out_Renamed[upto++] = (byte)(0x80 | ((code >> 6) & 0x3F)); out_Renamed[upto++] = (byte)(0x80 | (code & 0x3F)); } else { // surrogate pair // confirm valid high surrogate if (code < 0xDC00 && source[i] != 0xffff) { int utf32 = (int)source[i]; // confirm valid low surrogate and write pair if (utf32 >= 0xDC00 && utf32 <= 0xDFFF) { utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF); i++; out_Renamed[upto++] = (byte)(0xF0 | (utf32 >> 18)); out_Renamed[upto++] = (byte)(0x80 | ((utf32 >> 12) & 0x3F)); out_Renamed[upto++] = (byte)(0x80 | ((utf32 >> 6) & 0x3F)); out_Renamed[upto++] = (byte)(0x80 | (utf32 & 0x3F)); continue; } } // replace unpaired surrogate or out-of-order low surrogate // with substitution character out_Renamed[upto++] = (byte)(0xEF); out_Renamed[upto++] = (byte)(0xBF); out_Renamed[upto++] = (byte)(0xBD); } } //assert matches(source, offset, i-offset-1, out, upto); result.length = upto; }
/** Encode characters from this String, starting at offset * for length characters. Returns the number of bytes * written to bytesOut. */ public static void UTF16toUTF8(/* in */ string s, /* in */ int offset, /* in */ int length, UTF8Result result) { int end = offset + length; byte[] out_Renamed = result.result; int upto = 0; for (int i = offset; i < end; i++) { int code = (int)s[i]; if (upto + 4 > out_Renamed.Length) { byte[] newOut = new byte[2 * out_Renamed.Length]; System.Diagnostics.Debug.Assert(newOut.Length >= upto + 4); System.Array.Copy(out_Renamed, 0, newOut, 0, upto); result.result = out_Renamed = newOut; } if (code < 0x80) { out_Renamed[upto++] = (byte)code; } else if (code < 0x800) { out_Renamed[upto++] = (byte)(0xC0 | (code >> 6)); out_Renamed[upto++] = (byte)(0x80 | (code & 0x3F)); } else if (code < 0xD800 || code > 0xDFFF) { out_Renamed[upto++] = (byte)(0xE0 | (code >> 12)); out_Renamed[upto++] = (byte)(0x80 | ((code >> 6) & 0x3F)); out_Renamed[upto++] = (byte)(0x80 | (code & 0x3F)); } else { // surrogate pair // confirm valid high surrogate if (code < 0xDC00 && (i < end - 1)) { int utf32 = (int)s[i + 1]; // confirm valid low surrogate and write pair if (utf32 >= 0xDC00 && utf32 <= 0xDFFF) { utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF); i++; out_Renamed[upto++] = (byte)(0xF0 | (utf32 >> 18)); out_Renamed[upto++] = (byte)(0x80 | ((utf32 >> 12) & 0x3F)); out_Renamed[upto++] = (byte)(0x80 | ((utf32 >> 6) & 0x3F)); out_Renamed[upto++] = (byte)(0x80 | (utf32 & 0x3F)); continue; } } // replace unpaired surrogate or out_Renamed-of-order low surrogate // with substitution character out_Renamed[upto++] = (byte)0xEF; out_Renamed[upto++] = (byte)0xBF; out_Renamed[upto++] = (byte)0xBD; } } //System.Diagnostics.Debug.Assert(matches(s, offset, length, out_Renamed, upto); result.length = upto; }
/** Encode characters from a char[] source, starting at * offset and stopping when the character 0xffff is seen. * Returns the number of bytes written to bytesOut. */ public static void UTF16toUTF8(/* in */ char[] source, /* in */ int offset, UTF8Result result) { int upto = 0; int i = offset; byte[] out_Renamed = result.result; while (true) { int code = (int)source[i++]; if (upto + 4 > out_Renamed.Length) { byte[] newOut = new byte[2 * out_Renamed.Length]; System.Diagnostics.Debug.Assert(newOut.Length >= upto + 4); System.Array.Copy(out_Renamed, 0, newOut, 0, upto); result.result = out_Renamed = newOut; } if (code < 0x80) out_Renamed[upto++] = (byte)code; else if (code < 0x800) { out_Renamed[upto++] = (byte)(0xC0 | (code >> 6)); out_Renamed[upto++] = (byte)(0x80 | (code & 0x3F)); } else if (code < 0xD800 || code > 0xDFFF) { if (code == 0xffff) // END break; out_Renamed[upto++] = (byte)(0xE0 | (code >> 12)); out_Renamed[upto++] = (byte)(0x80 | ((code >> 6) & 0x3F)); out_Renamed[upto++] = (byte)(0x80 | (code & 0x3F)); } else { // surrogate pair // confirm valid high surrogate if (code < 0xDC00 && source[i] != 0xffff) { int utf32 = (int)source[i]; // confirm valid low surrogate and write pair if (utf32 >= 0xDC00 && utf32 <= 0xDFFF) { utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF); i++; out_Renamed[upto++] = (byte)(0xF0 | (utf32 >> 18)); out_Renamed[upto++] = (byte)(0x80 | ((utf32 >> 12) & 0x3F)); out_Renamed[upto++] = (byte)(0x80 | ((utf32 >> 6) & 0x3F)); out_Renamed[upto++] = (byte)(0x80 | (utf32 & 0x3F)); continue; } } // replace unpaired surrogate or out_Renamed-of-order low surrogate // with substitution character out_Renamed[upto++] = (byte)0xEF; out_Renamed[upto++] = (byte)0xBF; out_Renamed[upto++] = (byte)0xBD; } } //System.Diagnostics.Debug.Assert(matches(source, offset, i-offset-1, out_Renamed, upto); result.length = upto; }