C# (CSharp) System.Text EncodingHelper примеры использования

Язык программирования: C# (CSharp)
Пространство имен/Пакет: System.Text
Класс/Тип: EncodingHelper
Примеров на hotexamples.com: 1
C# (CSharp) System.Text EncodingHelper - 1 пример найден. Это лучшие примеры C# (CSharp) кода для System.Text.EncodingHelper, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.
Основные методы
Показать Скрыть
GetUtf8EncodedBytes(1)
InRange(1)
PtrDiff(1)
Документация по классу EncodingHelper
Пример #1
Показать файл
            /// <summary>
            /// Calculates the byte count needed to encode the UTF-16 bytes from the specified UTF-8 sequence.
            ///
            /// This method will consume as many of the input bytes as possible.
            /// </summary>
            /// <param name="source">A span containing a sequence of UTF-8 bytes.</param>
            /// <param name="bytesNeeded">On exit, contains the number of bytes required for encoding from the <paramref name="source"/>.</param>
            /// <returns>A <see cref="TransformationStatus"/> value representing the expected state of the conversion.</returns>
            public unsafe static TransformationStatus ComputeEncodedBytesFromUtf8(ReadOnlySpan <byte> source, out int bytesNeeded)
            {
                fixed(byte *pUtf8 = &source.DangerousGetPinnableReference())
                {
                    byte *pSrc    = pUtf8;
                    byte *pSrcEnd = pSrc + source.Length;

                    bytesNeeded = 0;

                    int ch = 0;

                    while (pSrc < pSrcEnd)
                    {
                        int availableBytes = EncodingHelper.PtrDiff(pSrcEnd, pSrc);

                        // don't fall into the fast decoding loop if we don't have enough bytes
                        if (availableBytes <= 13)
                        {
                            // try to get over the remainder of the ascii characters fast though
                            byte *pLocalEnd = pSrc + availableBytes;
                            while (pSrc < pLocalEnd)
                            {
                                ch = *pSrc;
                                pSrc++;

                                if (ch > 0x7F)
                                {
                                    goto LongCodeSlow;
                                }

                                bytesNeeded++;
                            }

                            // we are done
                            break;
                        }

                        // To compute the upper bound, assume that all characters are ASCII characters at this point,
                        //  the boundary will be decreased for every non-ASCII character we encounter
                        // Also, we need 7 chars reserve for the unrolled ansi decoding loop and for decoding of multibyte sequences
                        byte *pStop = pSrc + availableBytes - 7;

                        // Fast loop
                        while (pSrc < pStop)
                        {
                            ch = *pSrc;
                            pSrc++;

                            if (ch > 0x7F)
                            {
                                goto LongCode;
                            }

                            bytesNeeded++;

                            // 2-byte align
                            if ((unchecked ((int)pSrc) & 0x1) != 0)
                            {
                                ch = *pSrc;
                                pSrc++;

                                if (ch > 0x7F)
                                {
                                    goto LongCode;
                                }

                                bytesNeeded++;
                            }

                            // 4-byte align
                            if ((unchecked ((int)pSrc) & 0x2) != 0)
                            {
                                ch = *(ushort *)pSrc;
                                if ((ch & 0x8080) != 0)
                                {
                                    goto LongCodeWithMask16;
                                }
                                pSrc        += 2;
                                bytesNeeded += 2;
                            }

                            // Run 8 characters at a time!
                            while (pSrc < pStop)
                            {
                                ch = *(int *)pSrc;
                                int chb = *(int *)(pSrc + 4);
                                if (((ch | chb) & unchecked ((int)0x80808080)) != 0)
                                {
                                    goto LongCodeWithMask32;
                                }
                                pSrc        += 8;
                                bytesNeeded += 8;
                            }

                            break;

#if BIGENDIAN
LongCodeWithMask32:
                            // be careful about the sign extension
                            ch = (int)(((uint)ch) >> 16);
LongCodeWithMask16:
                            ch = (int)(((uint)ch) >> 8);
#else // BIGENDIAN
LongCodeWithMask32:
LongCodeWithMask16:
                            ch &= 0xFF;
#endif // BIGENDIAN
                            pSrc++;
                            if (ch <= 0x7F)
                            {
                                bytesNeeded++;
                                continue;
                            }

LongCode:
                            int chc = *pSrc;
                            pSrc++;

                            // Bit 6 should be 0, and trailing byte should be 10vvvvvv
                            if ((ch & 0x40) == 0 || (chc & unchecked ((sbyte)0xC0)) != 0x80)
                            {
                                goto InvalidData;
                            }

                            chc &= 0x3F;

                            if ((ch & 0x20) != 0)
                            {
                                // Handle 3 or 4 byte encoding.

                                // Fold the first 2 bytes together
                                chc |= (ch & 0x0F) << 6;

                                if ((ch & 0x10) != 0)
                                {
                                    // 4 byte - surrogate pair
                                    ch = *pSrc;

                                    // Bit 4 should be zero + the surrogate should be in the range 0x000000 - 0x10FFFF
                                    // and the trailing byte should be 10vvvvvv
                                    if (!EncodingHelper.InRange(chc >> 4, 0x01, 0x10) || (ch & unchecked ((sbyte)0xC0)) != 0x80)
                                    {
                                        goto InvalidData;
                                    }

                                    // Merge 3rd byte then read the last byte
                                    chc = (chc << 6) | (ch & 0x3F);
                                    ch  = *(pSrc + 1);

                                    // The last trailing byte still holds the form 10vvvvvv
                                    if ((ch & unchecked ((sbyte)0xC0)) != 0x80)
                                    {
                                        goto InvalidData;
                                    }

                                    pSrc += 2;
                                    ch    = (chc << 6) | (ch & 0x3F);

                                    bytesNeeded++;

                                    ch = (ch & 0x3FF) + unchecked ((short)(EncodingHelper.LowSurrogateStart));
                                }
                                else
                                {
                                    // 3 byte encoding
                                    ch = *pSrc;

                                    // Check for non-shortest form of 3 byte sequence
                                    // No surrogates
                                    // Trailing byte must be in the form 10vvvvvv
                                    if ((chc & (0x1F << 5)) == 0 ||
                                        (chc & (0xF800 >> 6)) == (0xD800 >> 6) ||
                                        (ch & unchecked ((sbyte)0xC0)) != 0x80)
                                    {
                                        goto InvalidData;
                                    }

                                    pSrc++;
                                    ch = (chc << 6) | (ch & 0x3F);
                                }

                                // extra byte, we're already planning 2 chars for 2 of these bytes,
                                // but the big loop is testing the target against pStop, so we need
                                // to subtract 2 more or we risk overrunning the input.  Subtract
                                // one here and one below.
                                pStop--;
                            }
                            else
                            {
                                // 2 byte encoding
                                ch &= 0x1F;

                                // Check for non-shortest form
                                if (ch <= 1)
                                {
                                    goto InvalidData;
                                }

                                ch = (ch << 6) | chc;
                            }

                            bytesNeeded++;

                            // extra byte, we're only expecting 1 char for each of these 2 bytes,
                            // but the loop is testing the target (not source) against pStop.
                            // subtract an extra count from pStop so that we don't overrun the input.
                            pStop--;
                        }

                        continue;

LongCodeSlow:
                        if (pSrc >= pSrcEnd)
                        {
                            // This is a special case where hit the end of the buffer but are in the middle
                            // of decoding a long code. The error exit thinks we have read 2 extra bytes already,
                            // so we add +1 to pSrc to get the count correct for the bytes consumed value.
                            pSrc++;
                            goto NeedMoreData;
                        }

                        int chd = *pSrc;
                        pSrc++;

                        // Bit 6 should be 0, and trailing byte should be 10vvvvvv
                        if ((ch & 0x40) == 0 || (chd & unchecked ((sbyte)0xC0)) != 0x80)
                        {
                            goto InvalidData;
                        }

                        chd &= 0x3F;

                        if ((ch & 0x20) != 0)
                        {
                            // Handle 3 or 4 byte encoding.

                            // Fold the first 2 bytes together
                            chd |= (ch & 0x0F) << 6;

                            if ((ch & 0x10) != 0)
                            {
                                // 4 byte - surrogate pair
                                // We need 2 more bytes
                                if (pSrc >= pSrcEnd - 1)
                                {
                                    goto NeedMoreData;
                                }

                                ch = *pSrc;

                                // Bit 4 should be zero + the surrogate should be in the range 0x000000 - 0x10FFFF
                                // and the trailing byte should be 10vvvvvv
                                if (!EncodingHelper.InRange(chd >> 4, 0x01, 0x10) || (ch & unchecked ((sbyte)0xC0)) != 0x80)
                                {
                                    goto InvalidData;
                                }

                                // Merge 3rd byte then read the last byte
                                chd = (chd << 6) | (ch & 0x3F);
                                ch  = *(pSrc + 1);

                                // The last trailing byte still holds the form 10vvvvvv
                                // We only know for sure we have room for one more char, but we need an extra now.
                                if ((ch & unchecked ((sbyte)0xC0)) != 0x80)
                                {
                                    goto InvalidData;
                                }

                                pSrc += 2;
                                ch    = (chd << 6) | (ch & 0x3F);

                                bytesNeeded++;

                                ch = (ch & 0x3FF) + unchecked ((short)(EncodingHelper.LowSurrogateStart));
                            }
                            else
                            {
                                // 3 byte encoding
                                if (pSrc >= pSrcEnd)
                                {
                                    goto NeedMoreData;
                                }

                                ch = *pSrc;

                                // Check for non-shortest form of 3 byte sequence
                                // No surrogates
                                // Trailing byte must be in the form 10vvvvvv
                                if ((chd & (0x1F << 5)) == 0 ||
                                    (chd & (0xF800 >> 6)) == (0xD800 >> 6) ||
                                    (ch & unchecked ((sbyte)0xC0)) != 0x80)
                                {
                                    goto InvalidData;
                                }

                                pSrc++;
                                ch = (chd << 6) | (ch & 0x3F);
                            }
                        }
                        else
                        {
                            // 2 byte encoding
                            ch &= 0x1F;

                            // Check for non-shortest form
                            if (ch <= 1)
                            {
                                goto InvalidData;
                            }

                            ch = (ch << 6) | chd;
                        }

                        bytesNeeded++;
                    }

                    bytesNeeded <<= 1;  // Count we have is chars, double for bytes.
                    return(EncodingHelper.PtrDiff(pSrcEnd, pSrc) == 0 ? TransformationStatus.Done : TransformationStatus.DestinationTooSmall);

NeedMoreData:
                    bytesNeeded <<= 1;  // Count we have is chars, double for bytes.
                    return(TransformationStatus.NeedMoreSourceData);

InvalidData:
                    bytesNeeded <<= 1;  // Count we have is chars, double for bytes.
                    return(TransformationStatus.InvalidData);
                }
            }