Esempio n. 1
0
        protected override string Decode(byte[] bytes, int index, int count, char[] chars, int charIndex)
        {
            this._hasError = false;
            var position    = index;
            var result      = "";
            var surrogate1  = '\u0000';
            var addFallback = false;
            var endpoint    = position + count;

            for (; position < endpoint; position++)
            {
                var accumulator = 0;
                var extraBytes  = 0;
                var hasError    = false;
                var firstByte   = bytes[position];

                if (firstByte <= 0x7F)
                {
                    accumulator = firstByte;
                }
                else if ((firstByte & 0x40) == 0)
                {
                    hasError = true;
                }
                else if ((firstByte & 0xE0) == 0xC0)
                {
                    accumulator = firstByte & 31;
                    extraBytes  = 1;
                }
                else if ((firstByte & 0xF0) == 0xE0)
                {
                    accumulator = firstByte & 15;
                    extraBytes  = 2;
                }
                else if ((firstByte & 0xF8) == 0xF0)
                {
                    accumulator = firstByte & 7;
                    extraBytes  = 3;
                }
                else if ((firstByte & 0xFC) == 0xF8)
                {
                    accumulator = firstByte & 3;
                    extraBytes  = 4;
                    hasError    = true;
                }
                else if ((firstByte & 0xFE) == 0xFC)
                {
                    accumulator = firstByte & 3;
                    extraBytes  = 5;
                    hasError    = true;
                }
                else
                {
                    accumulator = firstByte;
                    hasError    = false;
                }

                while (extraBytes > 0)
                {
                    position++;

                    if (position >= endpoint)
                    {
                        hasError = true;
                        break;
                    }

                    var extraByte = bytes[position];
                    extraBytes--;

                    if ((extraByte & 0xC0) != 0x80)
                    {
                        position--;
                        hasError = true;
                        break;
                    }

                    accumulator = (accumulator << 6) | (extraByte & 0x3F);
                }

                /*if ((accumulator == 0xFFFE) || (accumulator == 0xFFFF)) {
                 *  hasError = true;
                 * }*/

                string characters = null;
                addFallback = false;
                if (!hasError)
                {
                    if (surrogate1 > 0 && !((accumulator >= 0xDC00) && (accumulator <= 0xDFFF)))
                    {
                        hasError   = true;
                        surrogate1 = '\u0000';
                    }
                    else if ((accumulator >= 0xD800) && (accumulator <= 0xDBFF))
                    {
                        surrogate1 = (char)accumulator;
                    }
                    else if ((accumulator >= 0xDC00) && (accumulator <= 0xDFFF))
                    {
                        hasError    = true;
                        addFallback = true;
                        surrogate1  = '\u0000';
                    }
                    else
                    {
                        characters = Encoding.FromCharCode(accumulator);
                        surrogate1 = '\u0000';
                    }
                }

                if (hasError)
                {
                    if (this.throwOnInvalid)
                    {
                        throw new System.Exception("Invalid character in UTF8 text");
                    }

                    result        += this.fallbackCharacter;
                    this._hasError = true;
                }
                else if (surrogate1 == 0)
                {
                    result += characters;
                }
            }

            if (surrogate1 > 0 || addFallback)
            {
                if (this.throwOnInvalid)
                {
                    throw new System.Exception("Invalid character in UTF8 text");
                }

                if (result.Length > 0 && result[result.Length - 1] == this.fallbackCharacter)
                {
                    result += this.fallbackCharacter;
                }
                else
                {
                    result += this.fallbackCharacter + this.fallbackCharacter;
                }

                this._hasError = true;
            }

            return(result);
        }
Esempio n. 2
0
        protected override string Decode(byte[] bytes, int index, int count, char[] chars, int charIndex)
        {
            var position = index;
            var result   = "";
            var endpoint = position + count;

            Action fallback = () =>
            {
                if (this.throwOnInvalid)
                {
                    throw new System.Exception("Invalid character in UTF16 text");
                }

                result += this.fallbackCharacter;
            };

            Func <char, char> swap = ch => (char)(((byte)ch << 8) | (byte)(ch >> 8));

            Func <char?> readPair = () =>
            {
                if ((position + 2) > endpoint)
                {
                    position = position + 2;
                    return(null);
                }

                var a = bytes[position++];
                var b = bytes[position++];

                var point = (char)((a << 8) | b);
                if (!this.bigEndian)
                {
                    point = swap(point);
                }

                return(point);
            };

            while (position < endpoint)
            {
                var firstWord = readPair();

                if (!firstWord.HasValue)
                {
                    fallback();
                }
                else if ((firstWord < 0xD800) || (firstWord > 0xDFFF))
                {
                    result += Encoding.FromCharCode(firstWord.Value);
                }
                else if ((firstWord >= 0xD800) && (firstWord <= 0xDBFF))
                {
                    var end        = position >= endpoint;
                    var secondWord = readPair();
                    if (end)
                    {
                        fallback();
                    }
                    else if (!secondWord.HasValue)
                    {
                        fallback();
                        fallback();
                    }
                    else if ((secondWord >= 0xDC00) && (secondWord <= 0xDFFF))
                    {
                        var highBits = firstWord & 0x3FF;
                        var lowBits  = secondWord & 0x3FF;

                        var charCode = ((highBits << 10) | lowBits) + 0x10000;

                        result += Encoding.FromCharCode(charCode.Value);
                    }
                    else
                    {
                        fallback();
                        position = position - 2;
                    }
                }
                else
                {
                    fallback();
                }
            }

            return(result);
        }