コード例 #1
0
ファイル: Utf8TextEncoding.cs プロジェクト: timba/corefxlab
        public override bool TryEncodeFromUnicode(ReadOnlySpan <UnicodeCodePoint> codePoints, Span <byte> buffer, out int bytesWritten)
        {
            int availableBytes           = buffer.Length;
            var inputLength              = codePoints.Length;
            int bytesWrittenForCodePoint = 0;

            bytesWritten = 0;

            for (int i = 0; i < inputLength; i++)
            {
                UnicodeCodePoint codePoint = codePoints[i];
                bytesWrittenForCodePoint = GetNumberOfEncodedBytes(codePoint);
                if (!UnicodeCodePoint.IsSupportedCodePoint(codePoint) || bytesWritten + bytesWrittenForCodePoint > availableBytes)
                {
                    bytesWritten = 0;
                    return(false);
                }

                switch (bytesWrittenForCodePoint)
                {
                case 1:
                    buffer[bytesWritten] = (byte)(b0111_1111U & codePoint.Value);
                    break;

                case 2:
                    buffer[bytesWritten]     = (byte)(((codePoint.Value >> 6) & b0001_1111U) | b1100_0000U);
                    buffer[bytesWritten + 1] = (byte)(((codePoint.Value >> 0) & b0011_1111U) | b1000_0000U);
                    break;

                case 3:
                    buffer[bytesWritten]     = (byte)(((codePoint.Value >> 12) & b0000_1111U) | b1110_0000U);
                    buffer[bytesWritten + 1] = (byte)(((codePoint.Value >> 6) & b0011_1111U) | b1000_0000U);
                    buffer[bytesWritten + 2] = (byte)(((codePoint.Value >> 0) & b0011_1111U) | b1000_0000U);
                    break;

                case 4:
                    buffer[bytesWritten]     = (byte)(((codePoint.Value >> 18) & b0000_0111U) | b1111_0000U);
                    buffer[bytesWritten + 1] = (byte)(((codePoint.Value >> 12) & b0011_1111U) | b1000_0000U);
                    buffer[bytesWritten + 2] = (byte)(((codePoint.Value >> 6) & b0011_1111U) | b1000_0000U);
                    buffer[bytesWritten + 3] = (byte)(((codePoint.Value >> 0) & b0011_1111U) | b1000_0000U);
                    break;

                default:
                    bytesWritten = 0;
                    return(false);
                }

                bytesWritten += bytesWrittenForCodePoint;
            }

            return(true);
        }
コード例 #2
0
        public static bool TryEncodeCodePoint(UnicodeCodePoint codePoint, Span <byte> buffer, out int encodedBytes)
        {
            if (!UnicodeCodePoint.IsSupportedCodePoint(codePoint))
            {
                encodedBytes = 0;
                return(false);
            }

            encodedBytes = GetNumberOfEncodedBytes(codePoint);
            if (encodedBytes > buffer.Length)
            {
                encodedBytes = 0;
                return(false);
            }

            switch (encodedBytes)
            {
            case 1:
                buffer[0] = (byte)(b0111_1111U & codePoint.Value);
                return(true);

            case 2:
                byte b0 = (byte)(((codePoint.Value >> 6) & b0001_1111U) | b1100_0000U);
                byte b1 = (byte)(((codePoint.Value >> 0) & b0011_1111U) | b1000_0000U);
                buffer.Write((ushort)(b0 | b1 << 8));
                return(true);

            case 3:
                b0 = (byte)(((codePoint.Value >> 12) & b0000_1111U) | b1110_0000U);
                b1 = (byte)(((codePoint.Value >> 6) & b0011_1111U) | b1000_0000U);
                buffer.Write((ushort)(b0 | b1 << 8));
                buffer[2] = (byte)(((codePoint.Value >> 0) & b0011_1111U) | b1000_0000U);
                return(true);

            case 4:
                b0 = (byte)(((codePoint.Value >> 18) & b0000_0111U) | b1111_0000U);
                b1 = (byte)(((codePoint.Value >> 12) & b0011_1111U) | b1000_0000U);
                byte b2 = (byte)(((codePoint.Value >> 6) & b0011_1111U) | b1000_0000U);
                byte b3 = (byte)(((codePoint.Value >> 0) & b0011_1111U) | b1000_0000U);
                buffer.Write((uint)(b0 | b1 << 8 | b2 << 16 | b3 << 24));
                return(true);

            default:
                return(false);
            }
        }
コード例 #3
0
        public static bool TryEncodeCodePoint(UnicodeCodePoint codePoint, Span <byte> buffer, out int encodedBytes)
        {
            if (!UnicodeCodePoint.IsSupportedCodePoint(codePoint))
            {
                encodedBytes = default(int);
                return(false);
            }

            // TODO: Can we add this in UnicodeCodePoint class?
            // Should be represented as Surrogate?
            encodedBytes = ((uint)codePoint >= 0x10000) ? 4 : 2;

            if (buffer.Length < encodedBytes)
            {
                codePoint    = default(UnicodeCodePoint);
                encodedBytes = default(int);
                // buffer too small
                return(false);
            }

            if (encodedBytes == 2)
            {
                unchecked
                {
                    buffer[0] = (byte)((uint)codePoint);
                    buffer[1] = (byte)((uint)codePoint >> 8);
                }
            }
            else
            {
                unchecked
                {
                    uint highSurrogate = ((uint)codePoint >> 10) + UnicodeConstants.Utf16HighSurrogateFirstCodePoint;
                    uint lowSurrogate  = ((uint)codePoint & MaskLow10Bits) + UnicodeConstants.Utf16LowSurrogateFirstCodePoint;
                    buffer[0] = (byte)highSurrogate;
                    buffer[1] = (byte)(highSurrogate >> 8);

                    buffer[2] = (byte)lowSurrogate;
                    buffer[3] = (byte)(lowSurrogate >> 8);
                }
            }
            return(true);
        }
コード例 #4
0
        public static bool TryEncodeCodePoint(UnicodeCodePoint codePoint, Span <byte> buffer, out int encodedBytes)
        {
            if (!UnicodeCodePoint.IsSupportedCodePoint(codePoint))
            {
                encodedBytes = 0;
                return(false);
            }

            encodedBytes = GetNumberOfEncodedBytes(codePoint);
            if (encodedBytes > buffer.Length)
            {
                encodedBytes = 0;
                return(false);
            }

            switch (encodedBytes)
            {
            case 1:
                buffer[0] = (byte)(0b0111_1111U & codePoint.Value);
                return(true);

            case 2:
                buffer[0] = (byte)(((codePoint.Value >> 6) & 0b0001_1111U) | 0b1100_0000U);
                buffer[1] = (byte)(((codePoint.Value >> 0) & 0b0011_1111U) | 0b1000_0000U);
                return(true);

            case 3:
                buffer[0] = (byte)(((codePoint.Value >> 12) & 0b0000_1111U) | 0b1110_0000U);
                buffer[1] = (byte)(((codePoint.Value >> 6) & 0b0011_1111U) | 0b1000_0000U);
                buffer[2] = (byte)(((codePoint.Value >> 0) & 0b0011_1111U) | 0b1000_0000U);
                return(true);

            case 4:
                buffer[0] = (byte)(((codePoint.Value >> 18) & 0b0000_0111U) | 0b1111_0000U);
                buffer[1] = (byte)(((codePoint.Value >> 12) & 0b0011_1111U) | 0b1000_0000U);
                buffer[2] = (byte)(((codePoint.Value >> 6) & 0b0011_1111U) | 0b1000_0000U);
                buffer[3] = (byte)(((codePoint.Value >> 0) & 0b0011_1111U) | 0b1000_0000U);
                return(true);

            default:
                return(false);
            }
        }
コード例 #5
0
        public unsafe static bool TryEncodeCodePoint(UnicodeCodePoint codePoint, char *buffer, out int encodedChars)
        {
            if (!UnicodeCodePoint.IsSupportedCodePoint(codePoint))
            {
                encodedChars = default(int);
                return(false);
            }

            // TODO: Can we add this in UnicodeCodePoint class?
            // Should be represented as Surrogate?
            encodedChars = ((uint)codePoint >= 0x10000) ? 2 : 1;

            /*
             * Never happens. Max encodedBytes = 4 bytes = 2 chars. We already preallocate 2 chars for every UTF8 byte.
             * if (buffer.Length < encodedBytes)
             * {
             *  codePoint = default(UnicodeCodePoint);
             *  encodedBytes = default(int);
             *  // buffer too small
             *  return false;
             * }
             */

            if (encodedChars == 1)
            {
                unchecked
                {
                    Write(buffer, (ushort)codePoint);
                }
            }
            else
            {
                unchecked
                {
                    uint highSurrogate = ((uint)(codePoint.Value - 0x10000) >> 10) + UnicodeConstants.Utf16HighSurrogateFirstCodePoint;
                    uint lowSurrogate  = ((uint)codePoint & MaskLow10Bits) + UnicodeConstants.Utf16LowSurrogateFirstCodePoint;

                    Write(buffer, highSurrogate | (lowSurrogate << 16));
                }
            }
            return(true);
        }
コード例 #6
0
        public static bool TryEncodeCodePoint(UnicodeCodePoint codePoint, Span <byte> buffer, out int encodedBytes)
        {
            if (!UnicodeCodePoint.IsSupportedCodePoint(codePoint))
            {
                encodedBytes = default(int);
                return(false);
            }

            encodedBytes = UnicodeCodePoint.IsBmp(codePoint) ? 2 : 4;

            if (buffer.Length < encodedBytes)
            {
                codePoint    = default(UnicodeCodePoint);
                encodedBytes = default(int);
                // buffer too small
                return(false);
            }

            if (encodedBytes == 2)
            {
                unchecked
                {
                    buffer.Write((ushort)codePoint);
                }
            }
            else
            {
                unchecked
                {
                    uint codePointValue = (uint)codePoint;
                    uint highSurrogate  = ((codePointValue - 0x010000u) >> 10) + UnicodeConstants.Utf16HighSurrogateFirstCodePoint;
                    uint lowSurrogate   = (codePointValue & MaskLow10Bits) + UnicodeConstants.Utf16LowSurrogateFirstCodePoint;
                    buffer.Write(highSurrogate | (lowSurrogate << 16));
                }
            }
            return(true);
        }