// Detects a Unicode encoding private int DetectUnicodeEncoding() { int mode = this.mode; int c1 = this.stream.ReadByte(); int c2; if (c1 < 0) { return(-1); } Utf8Reader utf8reader; switch (mode) { case 0: // UTF-8 only utf8reader = new Utf8Reader(this.stream, this.errorThrow); this.reader = utf8reader; utf8reader.Unget(c1); c1 = utf8reader.ReadChar(); if (c1 == 0xfeff && !this.dontSkipUtf8Bom) { // Skip BOM c1 = utf8reader.ReadChar(); } return(c1); case 1: case 3: c2 = this.DetectUtf8OrUtf16(c1); if (c2 >= -1) { return(c2); } break; case 2: case 4: // UTF-8, UTF-16, or UTF-32 c2 = this.DetectUtf8Or16Or32(c1); if (c2 >= -1) { return(c2); } break; } // Default case: assume UTF-8 utf8reader = new Utf8Reader(this.stream, this.errorThrow); this.reader = utf8reader; utf8reader.Unget(c1); c1 = utf8reader.ReadChar(); if (!this.dontSkipUtf8Bom && c1 == 0xfeff) { // Skip BOM c1 = utf8reader.ReadChar(); } return(c1); }
// Detects a Unicode encoding private int DetectUnicodeEncoding() { int mode = this.mode; int c1 = this.stream.ReadByte(); int c2; if (c1 < 0) { return(-1); } Utf8Reader utf8reader; if (mode == 0) { // UTF-8 only utf8reader = new Utf8Reader(this.stream, this.errorThrow); this.reader = utf8reader; c1 = utf8reader.ReadChar(); if (c1 == 0xfeff) { // Skip BOM c1 = utf8reader.ReadChar(); } return(c1); } else if (mode == 1 || mode == 3) { c2 = this.DetectUtf8OrUtf16(c1); if (c2 >= -1) { return(c2); } } else if (mode == 2 || mode == 4) { // UTF-8, UTF-16, or UTF-32 c2 = this.DetectUtf8Or16Or32(c1); if (c2 >= -1) { return(c2); } } // Default case: assume UTF-8 utf8reader = new Utf8Reader(this.stream, this.errorThrow); this.reader = utf8reader; utf8reader.Unget(c1); c1 = utf8reader.ReadChar(); if (!this.dontSkipUtf8Bom && c1 == 0xfeff) { // Skip BOM c1 = utf8reader.ReadChar(); } return(c1); }
private int DetectUtf8OrUtf16(int c1) { int mode = this.mode; int c2; if (c1 == 0xff || c1 == 0xfe) { c2 = this.stream.ReadByte(); bool bigEndian = c1 == 0xfe; int otherbyte = bigEndian ? 0xff : 0xfe; if (c2 == otherbyte) { var newReader = new Utf16Reader( this.stream, bigEndian, this.errorThrow); this.reader = newReader; return(newReader.ReadChar()); } // Assume UTF-8 here, so the 0xff or 0xfe is invalid if (this.errorThrow) { throw new InvalidOperationException("Invalid Unicode stream"); } else { var utf8reader = new Utf8Reader(this.stream, this.errorThrow); utf8reader.Unget(c2); this.reader = utf8reader; return(0xfffd); } } else if (mode == 1) { if (c1 >= 0x01 && c1 <= 0x7f) { // Nonzero ASCII character c2 = this.stream.ReadByte(); if (c2 == 0) { // NZA 0, so UTF-16LE var newReader = new Utf16Reader( this.stream, false, this.errorThrow); this.reader = newReader; } else { // NZA NZ var utf8reader = new Utf8Reader(this.stream, this.errorThrow); utf8reader.Unget(c2); this.reader = utf8reader; } return(c1); } else if (c1 == 0) { // Zero c2 = this.stream.ReadByte(); if (c2 >= 0x01 && c2 <= 0x7f) { // 0 NZA, so UTF-16BE var newReader = new Utf16Reader(this.stream, true, this.errorThrow); this.reader = newReader; return(c2); } else { var utf8reader = new Utf8Reader(this.stream, this.errorThrow); utf8reader.Unget(c2); this.reader = utf8reader; return(c1); } } } // Use default of UTF-8 return(-2); }
private int DetectUtf8Or16Or32(int c1) { int c2, c3, c4; if (c1 == 0xff || c1 == 0xfe) { // Start of a possible byte-order mark // FF FE 0 0 --> UTF-32LE // FF FE ... --> UTF-16LE // FE FF --> UTF-16BE c2 = this.stream.ReadByte(); bool bigEndian = c1 == 0xfe; int otherbyte = bigEndian ? 0xff : 0xfe; if (c2 == otherbyte) { c3 = this.stream.ReadByte(); c4 = this.stream.ReadByte(); if (!bigEndian && c3 == 0 && c4 == 0) { this.reader = new Utf32Reader(this.stream, false, this.errorThrow); return(this.reader.ReadChar()); } else { var newReader = new Utf16Reader( this.stream, bigEndian, this.errorThrow); newReader.Unget(c3, c4); this.reader = newReader; return(newReader.ReadChar()); } } // Assume UTF-8 here, so the 0xff or 0xfe is invalid if (this.errorThrow) { throw new InvalidOperationException("Invalid Unicode stream"); } else { var utf8reader = new Utf8Reader(this.stream, this.errorThrow); utf8reader.Unget(c2); this.reader = utf8reader; return(0xfffd); } } else if (c1 == 0 && this.mode == 4) { // Here, the relevant cases are: // 0 0 0 NZA --> UTF-32BE (if mode is 4) // 0 0 FE FF --> UTF-32BE // Anything else is treated as UTF-8 c2 = this.stream.ReadByte(); c3 = this.stream.ReadByte(); c4 = this.stream.ReadByte(); if (c2 == 0 && ((c3 == 0xfe && c4 == 0xff) || (c3 == 0 && c4 >= 0x01 && c4 <= 0x7f))) { this.reader = new Utf32Reader(this.stream, true, this.errorThrow); return(c3 == 0 ? c4 : this.reader.ReadChar()); } else { var utf8reader = new Utf8Reader(this.stream, this.errorThrow); utf8reader.UngetThree(c2, c3, c4); this.reader = utf8reader; return(c1); } } else if (this.mode == 2) { if (c1 >= 0x01 && c1 <= 0x7f) { // Nonzero ASCII character c2 = this.stream.ReadByte(); if (c2 == 0) { // NZA 0, so UTF-16LE or UTF-32LE c3 = this.stream.ReadByte(); c4 = this.stream.ReadByte(); if (c3 == 0 && c4 == 0) { this.reader = new Utf32Reader( this.stream, false, this.errorThrow); return(c1); } else { var newReader = new Utf16Reader( this.stream, false, this.errorThrow); newReader.Unget(c3, c4); this.reader = newReader; return(c1); } } else { // NZA NZ, so UTF-8 var utf8reader = new Utf8Reader(this.stream, this.errorThrow); utf8reader.Unget(c2); this.reader = utf8reader; return(c1); } } else if (c1 == 0) { // Zero c2 = this.stream.ReadByte(); if (c2 >= 0x01 && c2 <= 0x7f) { // 0 NZA, so UTF-16BE var newReader = new Utf16Reader(this.stream, true, this.errorThrow); this.reader = newReader; return(c2); } else if (c2 == 0) { // 0 0, so maybe UTF-32BE c3 = this.stream.ReadByte(); c4 = this.stream.ReadByte(); if (c3 == 0 && c4 >= 0x01 && c4 <= 0x7f) { // 0 0 0 NZA this.reader = new Utf32Reader(this.stream, true, this.errorThrow); return(c4); } else if (c3 == 0xfe && c4 == 0xff) { // 0 0 FE FF this.reader = new Utf32Reader(this.stream, true, this.errorThrow); return(this.reader.ReadChar()); } else { // 0 0 ... var newReader = new Utf8Reader(this.stream, this.errorThrow); newReader.UngetThree(c2, c3, c4); this.reader = newReader; return(c1); } } else { // 0 NonAscii, so UTF-8 var utf8reader = new Utf8Reader(this.stream, this.errorThrow); utf8reader.Unget(c2); this.reader = utf8reader; return(c1); } } } // Use default of UTF-8 return(-2); }
private int DetectUtf8OrUtf16(int c1) { int mode = this.mode; int c2; if (c1 == 0xff || c1 == 0xfe) { c2 = this.stream.ReadByte(); bool bigEndian = c1 == 0xfe; int otherbyte = bigEndian ? 0xff : 0xfe; if (c2 == otherbyte) { var newReader = new Utf16Reader( this.stream, bigEndian, this.errorThrow); this.reader = newReader; return newReader.ReadChar(); } // Assume UTF-8 here, so the 0xff or 0xfe is invalid if (this.errorThrow) { throw new InvalidOperationException("Invalid Unicode stream"); } else { var utf8reader = new Utf8Reader(this.stream, this.errorThrow); utf8reader.Unget(c2); this.reader = utf8reader; return 0xfffd; } } else if (mode == 1) { if (c1 >= 0x01 && c1 <= 0x7f) { // Nonzero ASCII character c2 = this.stream.ReadByte(); if (c2 == 0) { // NZA 0, so UTF-16LE var newReader = new Utf16Reader( this.stream, false, this.errorThrow); this.reader = newReader; } else { // NZA NZ var utf8reader = new Utf8Reader(this.stream, this.errorThrow); utf8reader.Unget(c2); this.reader = utf8reader; } return c1; } else if (c1 == 0) { // Zero c2 = this.stream.ReadByte(); if (c2 >= 0x01 && c2 <= 0x7f) { // 0 NZA, so UTF-16BE var newReader = new Utf16Reader(this.stream, true, this.errorThrow); this.reader = newReader; return c2; } else { var utf8reader = new Utf8Reader(this.stream, this.errorThrow); utf8reader.Unget(c2); this.reader = utf8reader; return c1; } } } // Use default of UTF-8 return -2; }
private int DetectUtf8Or16Or32(int c1) { int c2, c3, c4; if (c1 == 0xff || c1 == 0xfe) { // Start of a possible byte-order mark // FF FE 0 0 --> UTF-32LE // FF FE ... --> UTF-16LE // FE FF --> UTF-16BE c2 = this.stream.ReadByte(); bool bigEndian = c1 == 0xfe; int otherbyte = bigEndian ? 0xff : 0xfe; if (c2 == otherbyte) { c3 = this.stream.ReadByte(); c4 = this.stream.ReadByte(); if (!bigEndian && c3 == 0 && c4 == 0) { this.reader = new Utf32Reader(this.stream, false, this.errorThrow); return this.reader.ReadChar(); } else { var newReader = new Utf16Reader( this.stream, bigEndian, this.errorThrow); newReader.Unget(c3, c4); this.reader = newReader; return newReader.ReadChar(); } } // Assume UTF-8 here, so the 0xff or 0xfe is invalid if (this.errorThrow) { throw new InvalidOperationException("Invalid Unicode stream"); } else { var utf8reader = new Utf8Reader(this.stream, this.errorThrow); utf8reader.Unget(c2); this.reader = utf8reader; return 0xfffd; } } else if (c1 == 0 && this.mode == 4) { // Here, the relevant cases are: // 0 0 0 NZA --> UTF-32BE (if mode is 4) // 0 0 FE FF --> UTF-32BE // Anything else is treated as UTF-8 c2 = this.stream.ReadByte(); c3 = this.stream.ReadByte(); c4 = this.stream.ReadByte(); if (c2 == 0 && ((c3 == 0xfe && c4 == 0xff) || (c3 == 0 && c4 >= 0x01 && c4 <= 0x7f))) { this.reader = new Utf32Reader(this.stream, true, this.errorThrow); return c3 == 0 ? c4 : this.reader.ReadChar(); } else { var utf8reader = new Utf8Reader(this.stream, this.errorThrow); utf8reader.UngetThree(c2, c3, c4); this.reader = utf8reader; return c1; } } else if (this.mode == 2) { if (c1 >= 0x01 && c1 <= 0x7f) { // Nonzero ASCII character c2 = this.stream.ReadByte(); if (c2 == 0) { // NZA 0, so UTF-16LE or UTF-32LE c3 = this.stream.ReadByte(); c4 = this.stream.ReadByte(); if (c3 == 0 && c4 == 0) { this.reader = new Utf32Reader( this.stream, false, this.errorThrow); return c1; } else { var newReader = new Utf16Reader( this.stream, false, this.errorThrow); newReader.Unget(c3, c4); this.reader = newReader; return c1; } } else { // NZA NZ, so UTF-8 var utf8reader = new Utf8Reader(this.stream, this.errorThrow); utf8reader.Unget(c2); this.reader = utf8reader; return c1; } } else if (c1 == 0) { // Zero c2 = this.stream.ReadByte(); if (c2 >= 0x01 && c2 <= 0x7f) { // 0 NZA, so UTF-16BE var newReader = new Utf16Reader(this.stream, true, this.errorThrow); this.reader = newReader; return c2; } else if (c2 == 0) { // 0 0, so maybe UTF-32BE c3 = this.stream.ReadByte(); c4 = this.stream.ReadByte(); if (c3 == 0 && c4 >= 0x01 && c4 <= 0x7f) { // 0 0 0 NZA this.reader = new Utf32Reader(this.stream, true, this.errorThrow); return c4; } else if (c3 == 0xfe && c4 == 0xff) { // 0 0 FE FF this.reader = new Utf32Reader(this.stream, true, this.errorThrow); return this.reader.ReadChar(); } else { // 0 0 ... var newReader = new Utf8Reader(this.stream, this.errorThrow); newReader.UngetThree(c2, c3, c4); this.reader = newReader; return c1; } } else { // 0 NonAscii, so UTF-8 var utf8reader = new Utf8Reader(this.stream, this.errorThrow); utf8reader.Unget(c2); this.reader = utf8reader; return c1; } } } // Use default of UTF-8 return -2; }
// Detects a Unicode encoding private int DetectUnicodeEncoding() { int mode = this.mode; int c1 = this.stream.ReadByte(); int c2; if (c1 < 0) { return -1; } Utf8Reader utf8reader; if (mode == 0) { // UTF-8 only utf8reader = new Utf8Reader(this.stream, this.errorThrow); this.reader = utf8reader; c1 = utf8reader.ReadChar(); if (c1 == 0xfeff) { // Skip BOM c1 = utf8reader.ReadChar(); } return c1; } else if (mode == 1 || mode == 3) { c2 = this.DetectUtf8OrUtf16(c1); if (c2 >= -1) { return c2; } } else if (mode == 2 || mode == 4) { // UTF-8, UTF-16, or UTF-32 c2 = this.DetectUtf8Or16Or32(c1); if (c2 >= -1) { return c2; } } // Default case: assume UTF-8 utf8reader = new Utf8Reader(this.stream, this.errorThrow); this.reader = utf8reader; utf8reader.Unget(c1); c1 = utf8reader.ReadChar(); if (!this.dontSkipUtf8Bom && c1 == 0xfeff) { // Skip BOM c1 = utf8reader.ReadChar(); } return c1; }