示例#1
0
        // Detects a Unicode encoding
        private int DetectUnicodeEncoding()
        {
            int mode = this.mode;
            int c1   = this.stream.ReadByte();
            int c2;

            if (c1 < 0)
            {
                return(-1);
            }
            Utf8Reader utf8reader;

            switch (mode)
            {
            case 0:
                // UTF-8 only
                utf8reader  = new Utf8Reader(this.stream, this.errorThrow);
                this.reader = utf8reader;
                utf8reader.Unget(c1);
                c1 = utf8reader.ReadChar();
                if (c1 == 0xfeff && !this.dontSkipUtf8Bom)
                {
                    // Skip BOM
                    c1 = utf8reader.ReadChar();
                }
                return(c1);

            case 1:
            case 3:
                c2 = this.DetectUtf8OrUtf16(c1);
                if (c2 >= -1)
                {
                    return(c2);
                }
                break;

            case 2:
            case 4:
                // UTF-8, UTF-16, or UTF-32
                c2 = this.DetectUtf8Or16Or32(c1);
                if (c2 >= -1)
                {
                    return(c2);
                }
                break;
            }
            // Default case: assume UTF-8
            utf8reader  = new Utf8Reader(this.stream, this.errorThrow);
            this.reader = utf8reader;
            utf8reader.Unget(c1);
            c1 = utf8reader.ReadChar();
            if (!this.dontSkipUtf8Bom && c1 == 0xfeff)
            {
                // Skip BOM
                c1 = utf8reader.ReadChar();
            }
            return(c1);
        }
        // Detects a Unicode encoding
        private int DetectUnicodeEncoding()
        {
            int mode = this.mode;
            int c1   = this.stream.ReadByte();
            int c2;

            if (c1 < 0)
            {
                return(-1);
            }
            Utf8Reader utf8reader;

            if (mode == 0)
            {
                // UTF-8 only
                utf8reader  = new Utf8Reader(this.stream, this.errorThrow);
                this.reader = utf8reader;
                c1          = utf8reader.ReadChar();
                if (c1 == 0xfeff)
                {
                    // Skip BOM
                    c1 = utf8reader.ReadChar();
                }
                return(c1);
            }
            else if (mode == 1 || mode == 3)
            {
                c2 = this.DetectUtf8OrUtf16(c1);
                if (c2 >= -1)
                {
                    return(c2);
                }
            }
            else if (mode == 2 || mode == 4)
            {
                // UTF-8, UTF-16, or UTF-32
                c2 = this.DetectUtf8Or16Or32(c1);
                if (c2 >= -1)
                {
                    return(c2);
                }
            }
            // Default case: assume UTF-8
            utf8reader  = new Utf8Reader(this.stream, this.errorThrow);
            this.reader = utf8reader;
            utf8reader.Unget(c1);
            c1 = utf8reader.ReadChar();
            if (!this.dontSkipUtf8Bom && c1 == 0xfeff)
            {
                // Skip BOM
                c1 = utf8reader.ReadChar();
            }
            return(c1);
        }
        private int DetectUtf8OrUtf16(int c1)
        {
            int mode = this.mode;
            int c2;

            if (c1 == 0xff || c1 == 0xfe)
            {
                c2 = this.stream.ReadByte();
                bool bigEndian = c1 == 0xfe;
                int  otherbyte = bigEndian ? 0xff : 0xfe;
                if (c2 == otherbyte)
                {
                    var newReader = new Utf16Reader(
                        this.stream,
                        bigEndian,
                        this.errorThrow);
                    this.reader = newReader;
                    return(newReader.ReadChar());
                }
                // Assume UTF-8 here, so the 0xff or 0xfe is invalid
                if (this.errorThrow)
                {
                    throw new InvalidOperationException("Invalid Unicode stream");
                }
                else
                {
                    var utf8reader = new Utf8Reader(this.stream, this.errorThrow);
                    utf8reader.Unget(c2);
                    this.reader = utf8reader;
                    return(0xfffd);
                }
            }
            else if (mode == 1)
            {
                if (c1 >= 0x01 && c1 <= 0x7f)
                {
                    // Nonzero ASCII character
                    c2 = this.stream.ReadByte();
                    if (c2 == 0)
                    {
                        // NZA 0, so UTF-16LE
                        var newReader = new Utf16Reader(
                            this.stream,
                            false,
                            this.errorThrow);
                        this.reader = newReader;
                    }
                    else
                    {
                        // NZA NZ
                        var utf8reader = new Utf8Reader(this.stream, this.errorThrow);
                        utf8reader.Unget(c2);
                        this.reader = utf8reader;
                    }
                    return(c1);
                }
                else if (c1 == 0)
                {
                    // Zero
                    c2 = this.stream.ReadByte();
                    if (c2 >= 0x01 && c2 <= 0x7f)
                    {
                        // 0 NZA, so UTF-16BE
                        var newReader = new Utf16Reader(this.stream, true, this.errorThrow);
                        this.reader = newReader;
                        return(c2);
                    }
                    else
                    {
                        var utf8reader = new Utf8Reader(this.stream, this.errorThrow);
                        utf8reader.Unget(c2);
                        this.reader = utf8reader;
                        return(c1);
                    }
                }
            }
            // Use default of UTF-8
            return(-2);
        }
        private int DetectUtf8Or16Or32(int c1)
        {
            int c2, c3, c4;

            if (c1 == 0xff || c1 == 0xfe)
            {
                // Start of a possible byte-order mark
                // FF FE 0 0 --> UTF-32LE
                // FF FE ... --> UTF-16LE
                // FE FF --> UTF-16BE
                c2 = this.stream.ReadByte();
                bool bigEndian = c1 == 0xfe;
                int  otherbyte = bigEndian ? 0xff : 0xfe;
                if (c2 == otherbyte)
                {
                    c3 = this.stream.ReadByte();
                    c4 = this.stream.ReadByte();
                    if (!bigEndian && c3 == 0 && c4 == 0)
                    {
                        this.reader = new Utf32Reader(this.stream, false, this.errorThrow);
                        return(this.reader.ReadChar());
                    }
                    else
                    {
                        var newReader = new Utf16Reader(
                            this.stream,
                            bigEndian,
                            this.errorThrow);
                        newReader.Unget(c3, c4);
                        this.reader = newReader;
                        return(newReader.ReadChar());
                    }
                }
                // Assume UTF-8 here, so the 0xff or 0xfe is invalid
                if (this.errorThrow)
                {
                    throw new InvalidOperationException("Invalid Unicode stream");
                }
                else
                {
                    var utf8reader = new Utf8Reader(this.stream, this.errorThrow);
                    utf8reader.Unget(c2);
                    this.reader = utf8reader;
                    return(0xfffd);
                }
            }
            else if (c1 == 0 && this.mode == 4)
            {
                // Here, the relevant cases are:
                // 0 0 0 NZA --> UTF-32BE (if mode is 4)
                // 0 0 FE FF --> UTF-32BE
                // Anything else is treated as UTF-8
                c2 = this.stream.ReadByte();
                c3 = this.stream.ReadByte();
                c4 = this.stream.ReadByte();
                if (c2 == 0 &&
                    ((c3 == 0xfe && c4 == 0xff) ||
                     (c3 == 0 && c4 >= 0x01 && c4 <= 0x7f)))
                {
                    this.reader = new Utf32Reader(this.stream, true, this.errorThrow);
                    return(c3 == 0 ? c4 : this.reader.ReadChar());
                }
                else
                {
                    var utf8reader = new Utf8Reader(this.stream, this.errorThrow);
                    utf8reader.UngetThree(c2, c3, c4);
                    this.reader = utf8reader;
                    return(c1);
                }
            }
            else if (this.mode == 2)
            {
                if (c1 >= 0x01 && c1 <= 0x7f)
                {
                    // Nonzero ASCII character
                    c2 = this.stream.ReadByte();
                    if (c2 == 0)
                    {
                        // NZA 0, so UTF-16LE or UTF-32LE
                        c3 = this.stream.ReadByte();
                        c4 = this.stream.ReadByte();
                        if (c3 == 0 && c4 == 0)
                        {
                            this.reader = new Utf32Reader(
                                this.stream,
                                false,
                                this.errorThrow);
                            return(c1);
                        }
                        else
                        {
                            var newReader = new Utf16Reader(
                                this.stream,
                                false,
                                this.errorThrow);
                            newReader.Unget(c3, c4);
                            this.reader = newReader;
                            return(c1);
                        }
                    }
                    else
                    {
                        // NZA NZ, so UTF-8
                        var utf8reader = new Utf8Reader(this.stream, this.errorThrow);
                        utf8reader.Unget(c2);
                        this.reader = utf8reader;
                        return(c1);
                    }
                }
                else if (c1 == 0)
                {
                    // Zero
                    c2 = this.stream.ReadByte();
                    if (c2 >= 0x01 && c2 <= 0x7f)
                    {
                        // 0 NZA, so UTF-16BE
                        var newReader = new Utf16Reader(this.stream, true, this.errorThrow);
                        this.reader = newReader;
                        return(c2);
                    }
                    else if (c2 == 0)
                    {
                        // 0 0, so maybe UTF-32BE
                        c3 = this.stream.ReadByte();
                        c4 = this.stream.ReadByte();
                        if (c3 == 0 && c4 >= 0x01 && c4 <= 0x7f)
                        {
                            // 0 0 0 NZA
                            this.reader = new Utf32Reader(this.stream, true, this.errorThrow);
                            return(c4);
                        }
                        else if (c3 == 0xfe && c4 == 0xff)
                        {
                            // 0 0 FE FF
                            this.reader = new Utf32Reader(this.stream, true, this.errorThrow);
                            return(this.reader.ReadChar());
                        }
                        else
                        {
                            // 0 0 ...
                            var newReader = new Utf8Reader(this.stream, this.errorThrow);
                            newReader.UngetThree(c2, c3, c4);
                            this.reader = newReader;
                            return(c1);
                        }
                    }
                    else
                    {
                        // 0 NonAscii, so UTF-8
                        var utf8reader = new Utf8Reader(this.stream, this.errorThrow);
                        utf8reader.Unget(c2);
                        this.reader = utf8reader;
                        return(c1);
                    }
                }
            }
            // Use default of UTF-8
            return(-2);
        }
示例#5
0
 private int DetectUtf8OrUtf16(int c1)
 {
     int mode = this.mode;
       int c2;
       if (c1 == 0xff || c1 == 0xfe) {
     c2 = this.stream.ReadByte();
     bool bigEndian = c1 == 0xfe;
     int otherbyte = bigEndian ? 0xff : 0xfe;
     if (c2 == otherbyte) {
       var newReader = new Utf16Reader(
       this.stream,
       bigEndian,
       this.errorThrow);
       this.reader = newReader;
       return newReader.ReadChar();
     }
     // Assume UTF-8 here, so the 0xff or 0xfe is invalid
     if (this.errorThrow) {
       throw new InvalidOperationException("Invalid Unicode stream");
     } else {
       var utf8reader = new Utf8Reader(this.stream, this.errorThrow);
       utf8reader.Unget(c2);
       this.reader = utf8reader;
       return 0xfffd;
     }
       } else if (mode == 1) {
     if (c1 >= 0x01 && c1 <= 0x7f) {
       // Nonzero ASCII character
       c2 = this.stream.ReadByte();
       if (c2 == 0) {
     // NZA 0, so UTF-16LE
       var newReader = new Utf16Reader(
       this.stream,
       false,
       this.errorThrow);
     this.reader = newReader;
       } else {
     // NZA NZ
     var utf8reader = new Utf8Reader(this.stream, this.errorThrow);
     utf8reader.Unget(c2);
     this.reader = utf8reader;
       }
       return c1;
     } else if (c1 == 0) {
       // Zero
       c2 = this.stream.ReadByte();
       if (c2 >= 0x01 && c2 <= 0x7f) {
     // 0 NZA, so UTF-16BE
     var newReader = new Utf16Reader(this.stream, true, this.errorThrow);
     this.reader = newReader;
     return c2;
       } else {
     var utf8reader = new Utf8Reader(this.stream, this.errorThrow);
     utf8reader.Unget(c2);
     this.reader = utf8reader;
     return c1;
       }
     }
       }
       // Use default of UTF-8
       return -2;
 }
示例#6
0
 private int DetectUtf8Or16Or32(int c1)
 {
     int c2, c3, c4;
       if (c1 == 0xff || c1 == 0xfe) {
     // Start of a possible byte-order mark
     // FF FE 0 0 --> UTF-32LE
     // FF FE ... --> UTF-16LE
     // FE FF --> UTF-16BE
     c2 = this.stream.ReadByte();
     bool bigEndian = c1 == 0xfe;
     int otherbyte = bigEndian ? 0xff : 0xfe;
     if (c2 == otherbyte) {
       c3 = this.stream.ReadByte();
       c4 = this.stream.ReadByte();
       if (!bigEndian && c3 == 0 && c4 == 0) {
     this.reader = new Utf32Reader(this.stream, false, this.errorThrow);
     return this.reader.ReadChar();
       } else {
       var newReader = new Utf16Reader(
       this.stream,
       bigEndian,
       this.errorThrow);
     newReader.Unget(c3, c4);
     this.reader = newReader;
     return newReader.ReadChar();
       }
     }
     // Assume UTF-8 here, so the 0xff or 0xfe is invalid
     if (this.errorThrow) {
       throw new InvalidOperationException("Invalid Unicode stream");
     } else {
       var utf8reader = new Utf8Reader(this.stream, this.errorThrow);
       utf8reader.Unget(c2);
       this.reader = utf8reader;
       return 0xfffd;
     }
       } else if (c1 == 0 && this.mode == 4) {
     // Here, the relevant cases are:
     // 0 0 0 NZA --> UTF-32BE (if mode is 4)
     // 0 0 FE FF --> UTF-32BE
     // Anything else is treated as UTF-8
     c2 = this.stream.ReadByte();
     c3 = this.stream.ReadByte();
     c4 = this.stream.ReadByte();
     if (c2 == 0 &&
        ((c3 == 0xfe && c4 == 0xff) ||
     (c3 == 0 && c4 >= 0x01 && c4 <= 0x7f))) {
       this.reader = new Utf32Reader(this.stream, true, this.errorThrow);
       return c3 == 0 ? c4 : this.reader.ReadChar();
     } else {
       var utf8reader = new Utf8Reader(this.stream, this.errorThrow);
       utf8reader.UngetThree(c2, c3, c4);
       this.reader = utf8reader;
       return c1;
     }
       } else if (this.mode == 2) {
     if (c1 >= 0x01 && c1 <= 0x7f) {
       // Nonzero ASCII character
       c2 = this.stream.ReadByte();
       if (c2 == 0) {
     // NZA 0, so UTF-16LE or UTF-32LE
     c3 = this.stream.ReadByte();
     c4 = this.stream.ReadByte();
     if (c3 == 0 && c4 == 0) {
     this.reader = new Utf32Reader(
       this.stream,
       false,
       this.errorThrow);
       return c1;
     } else {
       var newReader = new Utf16Reader(
       this.stream,
       false,
       this.errorThrow);
       newReader.Unget(c3, c4);
       this.reader = newReader;
       return c1;
     }
       } else {
     // NZA NZ, so UTF-8
     var utf8reader = new Utf8Reader(this.stream, this.errorThrow);
     utf8reader.Unget(c2);
     this.reader = utf8reader;
     return c1;
       }
     } else if (c1 == 0) {
       // Zero
       c2 = this.stream.ReadByte();
       if (c2 >= 0x01 && c2 <= 0x7f) {
     // 0 NZA, so UTF-16BE
     var newReader = new Utf16Reader(this.stream, true, this.errorThrow);
     this.reader = newReader;
     return c2;
       } else if (c2 == 0) {
     // 0 0, so maybe UTF-32BE
     c3 = this.stream.ReadByte();
     c4 = this.stream.ReadByte();
     if (c3 == 0 && c4 >= 0x01 && c4 <= 0x7f) {
       // 0 0 0 NZA
       this.reader = new Utf32Reader(this.stream, true, this.errorThrow);
       return c4;
     } else if (c3 == 0xfe && c4 == 0xff) {
       // 0 0 FE FF
       this.reader = new Utf32Reader(this.stream, true, this.errorThrow);
       return this.reader.ReadChar();
     } else {
       // 0 0 ...
       var newReader = new Utf8Reader(this.stream, this.errorThrow);
       newReader.UngetThree(c2, c3, c4);
       this.reader = newReader;
       return c1;
     }
       } else {
     // 0 NonAscii, so UTF-8
     var utf8reader = new Utf8Reader(this.stream, this.errorThrow);
     utf8reader.Unget(c2);
     this.reader = utf8reader;
     return c1;
       }
     }
       }
       // Use default of UTF-8
       return -2;
 }
示例#7
0
 // Detects a Unicode encoding
 private int DetectUnicodeEncoding()
 {
     int mode = this.mode;
       int c1 = this.stream.ReadByte();
       int c2;
       if (c1 < 0) {
     return -1;
       }
       Utf8Reader utf8reader;
       if (mode == 0) {
     // UTF-8 only
     utf8reader = new Utf8Reader(this.stream, this.errorThrow);
     this.reader = utf8reader;
     c1 = utf8reader.ReadChar();
     if (c1 == 0xfeff) {
       // Skip BOM
       c1 = utf8reader.ReadChar();
     }
     return c1;
       } else if (mode == 1 || mode == 3) {
     c2 = this.DetectUtf8OrUtf16(c1);
     if (c2 >= -1) {
      return c2;
     }
       } else if (mode == 2 || mode == 4) {
     // UTF-8, UTF-16, or UTF-32
     c2 = this.DetectUtf8Or16Or32(c1);
     if (c2 >= -1) {
      return c2;
     }
       }
       // Default case: assume UTF-8
       utf8reader = new Utf8Reader(this.stream, this.errorThrow);
       this.reader = utf8reader;
       utf8reader.Unget(c1);
       c1 = utf8reader.ReadChar();
       if (!this.dontSkipUtf8Bom && c1 == 0xfeff) {
     // Skip BOM
     c1 = utf8reader.ReadChar();
       }
       return c1;
 }