Пример #1
0
        internal static char[] ConvertToUnicode(ParserOptions options, byte[] input, int startIndex, int length, out int charCount)
        {
            var invalid       = new InvalidByteCountFallback();
            var userCharset   = options.CharsetEncoding;
            int min           = int.MaxValue;
            int bestCharCount = 0;

            char[]   output = null;
            Encoding encoding;
            Decoder  decoder;

            int[] codepages;
            int   best = -1;
            int   count;

            // Note: 65001 is UTF-8 and 28591 is iso-8859-1
            if (userCharset != null && userCharset.CodePage != 65001 && userCharset.CodePage != 28591)
            {
                codepages = new [] { 65001, userCharset.CodePage, 28591 };
            }
            else
            {
                codepages = new [] { 65001, 28591 };
            }

            for (int i = 0; i < codepages.Length; i++)
            {
                encoding = Encoding.GetEncoding(codepages[i], new EncoderReplacementFallback("?"), invalid);
                decoder  = (Decoder)encoding.GetDecoder();

                count = decoder.GetCharCount(input, startIndex, length, true);
                if (invalid.InvalidByteCount < min)
                {
                    min           = invalid.InvalidByteCount;
                    bestCharCount = count;
                    best          = codepages[i];

                    if (min == 0)
                    {
                        break;
                    }
                }

                invalid.Reset();
            }

            encoding = GetEncoding(best, "?");
            decoder  = (Decoder)encoding.GetDecoder();
            output   = new char[bestCharCount];

            charCount = decoder.GetChars(input, startIndex, length, output, 0, true);

            return(output);
        }
Пример #2
0
        internal static char[] ConvertToUnicode(ParserOptions options, byte[] input, int startIndex, int length, out int charCount)
        {
            InvalidByteCountFallback invalidByteCountFallback = new InvalidByteCountFallback();
            Encoding charsetEncoding = options.CharsetEncoding;
            int      num             = int.MaxValue;
            int      num2            = 0;

            char[] array    = null;
            int    codepage = -1;

            int[] array2 = (charsetEncoding == null || charsetEncoding.CodePage == 65001 || charsetEncoding.CodePage == 28591) ? new int[2]
            {
                65001,
                28591
            } : new int[3]
            {
                65001,
                charsetEncoding.CodePage,
                28591
            };
            Decoder decoder;

            for (int i = 0; i < array2.Length; i++)
            {
                decoder = Encoding.GetEncoding(array2[i], new EncoderReplacementFallback("?"), invalidByteCountFallback).GetDecoder();
                int charCount2 = decoder.GetCharCount(input, startIndex, length, flush: true);
                if (invalidByteCountFallback.InvalidByteCount < num)
                {
                    num      = invalidByteCountFallback.InvalidByteCount;
                    num2     = charCount2;
                    codepage = array2[i];
                    if (num == 0)
                    {
                        break;
                    }
                }
                invalidByteCountFallback.Reset();
            }
            decoder   = GetEncoding(codepage, "?").GetDecoder();
            array     = new char[num2];
            charCount = decoder.GetChars(input, startIndex, length, array, 0, flush: true);
            return(array);
        }
Пример #3
0
 public InvalidByteCountFallbackBuffer(InvalidByteCountFallback fallback)
 {
     this.fallback = fallback;
 }
Пример #4
0
		internal static char[] ConvertToUnicode (ParserOptions options, byte[] input, int startIndex, int length, out int charCount)
		{
			var invalid = new InvalidByteCountFallback ();
			var userCharset = options.CharsetEncoding;
			int min = Int32.MaxValue;
			int bestCharCount = 0;
			char[] output = null;
			Encoding encoding;
			Decoder decoder;
			int[] codepages;
			int best = -1;
			int count;

			// Note: 65001 is UTF-8 and 28591 is iso-8859-1
			if (userCharset != null && userCharset.CodePage != 65001 && userCharset.CodePage != 28591) {
				codepages = new [] { 65001, userCharset.CodePage, 28591 };
			} else {
				codepages = new [] { 65001, 28591 };
			}

			for (int i = 0; i < codepages.Length; i++) {
				encoding = Encoding.GetEncoding (codepages[i], new EncoderReplacementFallback ("?"), invalid);
				decoder = (Decoder) encoding.GetDecoder ();

				count = decoder.GetCharCount (input, startIndex, length, true);
				if (invalid.InvalidByteCount < min) {
					min = invalid.InvalidByteCount;
					bestCharCount = count;
					best = codepages[i];

					if (min == 0)
						break;
				}

				invalid.Reset ();
			}

			encoding = CharsetUtils.GetEncoding (best, "?");
			decoder = (Decoder) encoding.GetDecoder ();
			output = new char[bestCharCount];

			charCount = decoder.GetChars (input, startIndex, length, output, 0, true);

			return output;
		}
Пример #5
0
				public InvalidByteCountFallbackBuffer (InvalidByteCountFallback fallback)
				{
					this.fallback = fallback;
				}