Example #1
0
        public void DetectUnicodeEncodingTest_ANSI()
        {
            string   file   = Path.Combine(new FileInfo(Assembly.GetExecutingAssembly().Location).DirectoryName, "Resources\\ANSI.txt");
            Encoding actual = EncodingUtility.DetectUnicodeEncoding(file);

            Assert.IsNull(actual);
        }
Example #2
0
        public void DetectUnicodeEncodingTest_UTF16_Big()
        {
            string   file     = Path.Combine(new FileInfo(Assembly.GetExecutingAssembly().Location).DirectoryName, "Resources\\UTF-16-Big.txt");
            Encoding expected = new UnicodeEncoding(true, true);
            Encoding actual   = EncodingUtility.DetectUnicodeEncoding(file);

            Assert.AreEqual(expected, actual);
        }
Example #3
0
        public void DetectUnicodeEncodingTest_UTF32_Little()
        {
            string   file     = Path.Combine(new FileInfo(Assembly.GetExecutingAssembly().Location).DirectoryName, "Resources\\UTF-32-Little.txt");
            Encoding expected = Encoding.UTF32;
            Encoding actual   = EncodingUtility.DetectUnicodeEncoding(file);

            Assert.AreEqual(expected, actual);
        }
        /// <summary>
        /// Gets the response content as a byte array.
        /// </summary>
        /// <param name="response"></param>
        /// <param name="buffer"></param>
        /// <param name="encoding"></param>
        /// <returns></returns>
        /// <exception cref="System.ArgumentNullException"><paramref name="buffer"/> is null.</exception>
        /// <exception cref="System.NotSupportedException">Not supported character set.</exception>
        public static string GetResponseString(this HttpWebResponse response, Encoding encoding, byte[] buffer)
        {
            if (buffer == null)
            {
                throw new ArgumentNullException("buffer");
            }

            if (encoding == null)
            {
                encoding = HttpHelper.GetTextEncoding(response);
            }

            byte[] data = GetResponseContent(response, buffer);
            if (encoding == null)
            {
                if ((encoding = EncodingUtility.DetectUnicodeEncoding(data)) == null)
                {
                    string content = DefaultContentEncoding.GetString(data);
                    Match  match   = g_charsetRegex.Match(content);
                    if (match != null && match.Success)
                    {
                        switch (match.Groups[1].Value.ToLower())
                        {
                        case "utf8":
                        case "utf-8":
                            encoding = Encoding.UTF8;
                            break;

                        case "utf16":
                        case "utf-16":
                            encoding = Encoding.Unicode;
                            break;

                        case "utf32":
                        case "utf-32":
                            encoding = Encoding.UTF32;
                            break;

                        default:
                            try {
                                encoding = Encoding.GetEncoding(match.Groups[1].Value);
                            } catch (ArgumentException) {
                            }
                            break;
                        }
                    }
                    if (encoding == null)
                    {
                        foreach (string item in content.Between("<meta ", ">", StringComparison.OrdinalIgnoreCase).Select((item) => item.ToLower()))
                        {
                            if (item.Contains("utf8") || item.Contains("utf-8"))
                            {
                                encoding = Encoding.UTF8;
                            }
                            else if (item.Contains("utf16") || item.Contains("utf-16"))
                            {
                                encoding = Encoding.Unicode;
                            }
                            else if (item.Contains("utf32") || item.Contains("utf-32"))
                            {
                                encoding = Encoding.UTF32;
                            }
                            else if (item.Contains("gb2312"))
                            {
                                encoding = Encoding.GetEncoding("gb2312");
                            }
                            else if (item.Contains("gbk"))
                            {
                                encoding = Encoding.GetEncoding("gbk");
                            }

                            if (encoding != null)
                            {
                                break;
                            }
                        }
                    }


                    /*
                     * if can not determine the encoding type, we use default encoding.
                     */
                    if (encoding == null || DefaultContentEncoding.Equals(encoding))
                    {
                        return(content);
                    }
                }
            }

            return(encoding.GetString(data));
        }