示例#1
0
		/// <summary>Encode characters from this String, starting at offset
		/// for length characters.  Returns the number of bytes
		/// written to bytesOut. 
		/// </summary>
		public static void  UTF16toUTF8(System.String s, int offset, int length, UTF8Result result)
		{
			int end = offset + length;
			
			byte[] out_Renamed = result.result;
			
			int upto = 0;
			for (int i = offset; i < end; i++)
			{
				int code = (int) s[i];
				
				if (upto + 4 > out_Renamed.Length)
				{
					byte[] newOut = new byte[2 * out_Renamed.Length];
					System.Diagnostics.Debug.Assert(newOut.Length >= upto + 4);
					Array.Copy(out_Renamed, 0, newOut, 0, upto);
					result.result = out_Renamed = newOut;
				}
				if (code < 0x80)
					out_Renamed[upto++] = (byte) code;
				else if (code < 0x800)
				{
					out_Renamed[upto++] = (byte) (0xC0 | (code >> 6));
					out_Renamed[upto++] = (byte) (0x80 | (code & 0x3F));
				}
				else if (code < 0xD800 || code > 0xDFFF)
				{
					out_Renamed[upto++] = (byte) (0xE0 | (code >> 12));
					out_Renamed[upto++] = (byte) (0x80 | ((code >> 6) & 0x3F));
					out_Renamed[upto++] = (byte) (0x80 | (code & 0x3F));
				}
				else
				{
					// surrogate pair
					// confirm valid high surrogate
					if (code < 0xDC00 && (i < end - 1))
					{
						int utf32 = (int) s[i + 1];
						// confirm valid low surrogate and write pair
						if (utf32 >= 0xDC00 && utf32 <= 0xDFFF)
						{
							utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF);
							i++;
							out_Renamed[upto++] = (byte) (0xF0 | (utf32 >> 18));
							out_Renamed[upto++] = (byte) (0x80 | ((utf32 >> 12) & 0x3F));
							out_Renamed[upto++] = (byte) (0x80 | ((utf32 >> 6) & 0x3F));
							out_Renamed[upto++] = (byte) (0x80 | (utf32 & 0x3F));
							continue;
						}
					}
					// replace unpaired surrogate or out-of-order low surrogate
					// with substitution character
					out_Renamed[upto++] = (byte) (0xEF);
					out_Renamed[upto++] = (byte) (0xBF);
					out_Renamed[upto++] = (byte) (0xBD);
				}
			}
			//assert matches(s, offset, length, out, upto);
			result.length = upto;
		}
示例#2
0
        /// <summary>Encode characters from a char[] source, starting at
        /// offset and stopping when the character 0xffff is seen.
        /// Returns the number of bytes written to bytesOut.
        /// </summary>
        public static void  UTF16toUTF8(char[] source, int offset, UTF8Result result)
        {
            int upto = 0;
            int i    = offset;

            byte[] out_Renamed = result.result;

            while (true)
            {
                int code = (int)source[i++];

                if (upto + 4 > out_Renamed.Length)
                {
                    byte[] newOut = new byte[2 * out_Renamed.Length];
                    System.Diagnostics.Debug.Assert(newOut.Length >= upto + 4);
                    Array.Copy(out_Renamed, 0, newOut, 0, upto);
                    result.result = out_Renamed = newOut;
                }
                if (code < 0x80)
                {
                    out_Renamed[upto++] = (byte)code;
                }
                else if (code < 0x800)
                {
                    out_Renamed[upto++] = (byte)(0xC0 | (code >> 6));
                    out_Renamed[upto++] = (byte)(0x80 | (code & 0x3F));
                }
                else if (code < 0xD800 || code > 0xDFFF)
                {
                    if (code == 0xffff)
                    {
                        // END
                        break;
                    }
                    out_Renamed[upto++] = (byte)(0xE0 | (code >> 12));
                    out_Renamed[upto++] = (byte)(0x80 | ((code >> 6) & 0x3F));
                    out_Renamed[upto++] = (byte)(0x80 | (code & 0x3F));
                }
                else
                {
                    // surrogate pair
                    // confirm valid high surrogate
                    if (code < 0xDC00 && source[i] != 0xffff)
                    {
                        int utf32 = (int)source[i];
                        // confirm valid low surrogate and write pair
                        if (utf32 >= 0xDC00 && utf32 <= 0xDFFF)
                        {
                            utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF);
                            i++;
                            out_Renamed[upto++] = (byte)(0xF0 | (utf32 >> 18));
                            out_Renamed[upto++] = (byte)(0x80 | ((utf32 >> 12) & 0x3F));
                            out_Renamed[upto++] = (byte)(0x80 | ((utf32 >> 6) & 0x3F));
                            out_Renamed[upto++] = (byte)(0x80 | (utf32 & 0x3F));
                            continue;
                        }
                    }
                    // replace unpaired surrogate or out-of-order low surrogate
                    // with substitution character
                    out_Renamed[upto++] = (byte)(0xEF);
                    out_Renamed[upto++] = (byte)(0xBF);
                    out_Renamed[upto++] = (byte)(0xBD);
                }
            }
            //assert matches(source, offset, i-offset-1, out, upto);
            result.length = upto;
        }
示例#3
0
        /** Encode characters from this String, starting at offset
         *  for length characters.  Returns the number of bytes
         *  written to bytesOut. */
        public static void UTF16toUTF8(/* in */ string s, /* in */ int offset, /* in */ int length, UTF8Result result)
        {
            int end = offset + length;

            byte[] out_Renamed = result.result;

            int upto = 0;

            for (int i = offset; i < end; i++)
            {
                int code = (int)s[i];

                if (upto + 4 > out_Renamed.Length)
                {
                    byte[] newOut = new byte[2 * out_Renamed.Length];
                    System.Diagnostics.Debug.Assert(newOut.Length >= upto + 4);
                    System.Array.Copy(out_Renamed, 0, newOut, 0, upto);
                    result.result = out_Renamed = newOut;
                }
                if (code < 0x80)
                {
                    out_Renamed[upto++] = (byte)code;
                }
                else if (code < 0x800)
                {
                    out_Renamed[upto++] = (byte)(0xC0 | (code >> 6));
                    out_Renamed[upto++] = (byte)(0x80 | (code & 0x3F));
                }
                else if (code < 0xD800 || code > 0xDFFF)
                {
                    out_Renamed[upto++] = (byte)(0xE0 | (code >> 12));
                    out_Renamed[upto++] = (byte)(0x80 | ((code >> 6) & 0x3F));
                    out_Renamed[upto++] = (byte)(0x80 | (code & 0x3F));
                }
                else
                {
                    // surrogate pair
                    // confirm valid high surrogate
                    if (code < 0xDC00 && (i < end - 1))
                    {
                        int utf32 = (int)s[i + 1];
                        // confirm valid low surrogate and write pair
                        if (utf32 >= 0xDC00 && utf32 <= 0xDFFF)
                        {
                            utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF);
                            i++;
                            out_Renamed[upto++] = (byte)(0xF0 | (utf32 >> 18));
                            out_Renamed[upto++] = (byte)(0x80 | ((utf32 >> 12) & 0x3F));
                            out_Renamed[upto++] = (byte)(0x80 | ((utf32 >> 6) & 0x3F));
                            out_Renamed[upto++] = (byte)(0x80 | (utf32 & 0x3F));
                            continue;
                        }
                    }
                    // replace unpaired surrogate or out_Renamed-of-order low surrogate
                    // with substitution character
                    out_Renamed[upto++] = (byte)0xEF;
                    out_Renamed[upto++] = (byte)0xBF;
                    out_Renamed[upto++] = (byte)0xBD;
                }
            }
            //System.Diagnostics.Debug.Assert(matches(s, offset, length, out_Renamed, upto);
            result.length = upto;
        }
示例#4
0
        /** Encode characters from a char[] source, starting at
         *  offset and stopping when the character 0xffff is seen.
         *  Returns the number of bytes written to bytesOut. */
        public static void UTF16toUTF8(/* in */ char[] source, /* in */ int offset, UTF8Result result)
        {
            int upto = 0;
            int i = offset;
            byte[] out_Renamed = result.result;

            while (true)
            {

                int code = (int)source[i++];

                if (upto + 4 > out_Renamed.Length)
                {
                    byte[] newOut = new byte[2 * out_Renamed.Length];
                    System.Diagnostics.Debug.Assert(newOut.Length >= upto + 4);
                    System.Array.Copy(out_Renamed, 0, newOut, 0, upto);
                    result.result = out_Renamed = newOut;
                }
                if (code < 0x80)
                    out_Renamed[upto++] = (byte)code;
                else if (code < 0x800)
                {
                    out_Renamed[upto++] = (byte)(0xC0 | (code >> 6));
                    out_Renamed[upto++] = (byte)(0x80 | (code & 0x3F));
                }
                else if (code < 0xD800 || code > 0xDFFF)
                {
                    if (code == 0xffff)
                        // END
                        break;
                    out_Renamed[upto++] = (byte)(0xE0 | (code >> 12));
                    out_Renamed[upto++] = (byte)(0x80 | ((code >> 6) & 0x3F));
                    out_Renamed[upto++] = (byte)(0x80 | (code & 0x3F));
                }
                else
                {
                    // surrogate pair
                    // confirm valid high surrogate
                    if (code < 0xDC00 && source[i] != 0xffff)
                    {
                        int utf32 = (int)source[i];
                        // confirm valid low surrogate and write pair
                        if (utf32 >= 0xDC00 && utf32 <= 0xDFFF)
                        {
                            utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF);
                            i++;
                            out_Renamed[upto++] = (byte)(0xF0 | (utf32 >> 18));
                            out_Renamed[upto++] = (byte)(0x80 | ((utf32 >> 12) & 0x3F));
                            out_Renamed[upto++] = (byte)(0x80 | ((utf32 >> 6) & 0x3F));
                            out_Renamed[upto++] = (byte)(0x80 | (utf32 & 0x3F));
                            continue;
                        }
                    }
                    // replace unpaired surrogate or out_Renamed-of-order low surrogate
                    // with substitution character
                    out_Renamed[upto++] = (byte)0xEF;
                    out_Renamed[upto++] = (byte)0xBF;
                    out_Renamed[upto++] = (byte)0xBD;
                }
            }
            //System.Diagnostics.Debug.Assert(matches(source, offset, i-offset-1, out_Renamed, upto);
            result.length = upto;
        }