internal static char[] ConvertToUnicode(ParserOptions options, byte[] input, int startIndex, int length, out int charCount) { var invalid = new InvalidByteCountFallback(); var userCharset = options.CharsetEncoding; int min = int.MaxValue; int bestCharCount = 0; char[] output = null; Encoding encoding; Decoder decoder; int[] codepages; int best = -1; int count; // Note: 65001 is UTF-8 and 28591 is iso-8859-1 if (userCharset != null && userCharset.CodePage != 65001 && userCharset.CodePage != 28591) { codepages = new [] { 65001, userCharset.CodePage, 28591 }; } else { codepages = new [] { 65001, 28591 }; } for (int i = 0; i < codepages.Length; i++) { encoding = Encoding.GetEncoding(codepages[i], new EncoderReplacementFallback("?"), invalid); decoder = (Decoder)encoding.GetDecoder(); count = decoder.GetCharCount(input, startIndex, length, true); if (invalid.InvalidByteCount < min) { min = invalid.InvalidByteCount; bestCharCount = count; best = codepages[i]; if (min == 0) { break; } } invalid.Reset(); } encoding = GetEncoding(best, "?"); decoder = (Decoder)encoding.GetDecoder(); output = new char[bestCharCount]; charCount = decoder.GetChars(input, startIndex, length, output, 0, true); return(output); }
internal static char[] ConvertToUnicode(ParserOptions options, byte[] input, int startIndex, int length, out int charCount) { InvalidByteCountFallback invalidByteCountFallback = new InvalidByteCountFallback(); Encoding charsetEncoding = options.CharsetEncoding; int num = int.MaxValue; int num2 = 0; char[] array = null; int codepage = -1; int[] array2 = (charsetEncoding == null || charsetEncoding.CodePage == 65001 || charsetEncoding.CodePage == 28591) ? new int[2] { 65001, 28591 } : new int[3] { 65001, charsetEncoding.CodePage, 28591 }; Decoder decoder; for (int i = 0; i < array2.Length; i++) { decoder = Encoding.GetEncoding(array2[i], new EncoderReplacementFallback("?"), invalidByteCountFallback).GetDecoder(); int charCount2 = decoder.GetCharCount(input, startIndex, length, flush: true); if (invalidByteCountFallback.InvalidByteCount < num) { num = invalidByteCountFallback.InvalidByteCount; num2 = charCount2; codepage = array2[i]; if (num == 0) { break; } } invalidByteCountFallback.Reset(); } decoder = GetEncoding(codepage, "?").GetDecoder(); array = new char[num2]; charCount = decoder.GetChars(input, startIndex, length, array, 0, flush: true); return(array); }
public InvalidByteCountFallbackBuffer(InvalidByteCountFallback fallback) { this.fallback = fallback; }
internal static char[] ConvertToUnicode (ParserOptions options, byte[] input, int startIndex, int length, out int charCount) { var invalid = new InvalidByteCountFallback (); var userCharset = options.CharsetEncoding; int min = Int32.MaxValue; int bestCharCount = 0; char[] output = null; Encoding encoding; Decoder decoder; int[] codepages; int best = -1; int count; // Note: 65001 is UTF-8 and 28591 is iso-8859-1 if (userCharset != null && userCharset.CodePage != 65001 && userCharset.CodePage != 28591) { codepages = new [] { 65001, userCharset.CodePage, 28591 }; } else { codepages = new [] { 65001, 28591 }; } for (int i = 0; i < codepages.Length; i++) { encoding = Encoding.GetEncoding (codepages[i], new EncoderReplacementFallback ("?"), invalid); decoder = (Decoder) encoding.GetDecoder (); count = decoder.GetCharCount (input, startIndex, length, true); if (invalid.InvalidByteCount < min) { min = invalid.InvalidByteCount; bestCharCount = count; best = codepages[i]; if (min == 0) break; } invalid.Reset (); } encoding = CharsetUtils.GetEncoding (best, "?"); decoder = (Decoder) encoding.GetDecoder (); output = new char[bestCharCount]; charCount = decoder.GetChars (input, startIndex, length, output, 0, true); return output; }
public InvalidByteCountFallbackBuffer (InvalidByteCountFallback fallback) { this.fallback = fallback; }