public void DetectUnicodeEncodingTest_ANSI() { string file = Path.Combine(new FileInfo(Assembly.GetExecutingAssembly().Location).DirectoryName, "Resources\\ANSI.txt"); Encoding actual = EncodingUtility.DetectUnicodeEncoding(file); Assert.IsNull(actual); }
public void DetectUnicodeEncodingTest_UTF16_Big() { string file = Path.Combine(new FileInfo(Assembly.GetExecutingAssembly().Location).DirectoryName, "Resources\\UTF-16-Big.txt"); Encoding expected = new UnicodeEncoding(true, true); Encoding actual = EncodingUtility.DetectUnicodeEncoding(file); Assert.AreEqual(expected, actual); }
public void DetectUnicodeEncodingTest_UTF32_Little() { string file = Path.Combine(new FileInfo(Assembly.GetExecutingAssembly().Location).DirectoryName, "Resources\\UTF-32-Little.txt"); Encoding expected = Encoding.UTF32; Encoding actual = EncodingUtility.DetectUnicodeEncoding(file); Assert.AreEqual(expected, actual); }
/// <summary> /// Gets the response content as a byte array. /// </summary> /// <param name="response"></param> /// <param name="buffer"></param> /// <param name="encoding"></param> /// <returns></returns> /// <exception cref="System.ArgumentNullException"><paramref name="buffer"/> is null.</exception> /// <exception cref="System.NotSupportedException">Not supported character set.</exception> public static string GetResponseString(this HttpWebResponse response, Encoding encoding, byte[] buffer) { if (buffer == null) { throw new ArgumentNullException("buffer"); } if (encoding == null) { encoding = HttpHelper.GetTextEncoding(response); } byte[] data = GetResponseContent(response, buffer); if (encoding == null) { if ((encoding = EncodingUtility.DetectUnicodeEncoding(data)) == null) { string content = DefaultContentEncoding.GetString(data); Match match = g_charsetRegex.Match(content); if (match != null && match.Success) { switch (match.Groups[1].Value.ToLower()) { case "utf8": case "utf-8": encoding = Encoding.UTF8; break; case "utf16": case "utf-16": encoding = Encoding.Unicode; break; case "utf32": case "utf-32": encoding = Encoding.UTF32; break; default: try { encoding = Encoding.GetEncoding(match.Groups[1].Value); } catch (ArgumentException) { } break; } } if (encoding == null) { foreach (string item in content.Between("<meta ", ">", StringComparison.OrdinalIgnoreCase).Select((item) => item.ToLower())) { if (item.Contains("utf8") || item.Contains("utf-8")) { encoding = Encoding.UTF8; } else if (item.Contains("utf16") || item.Contains("utf-16")) { encoding = Encoding.Unicode; } else if (item.Contains("utf32") || item.Contains("utf-32")) { encoding = Encoding.UTF32; } else if (item.Contains("gb2312")) { encoding = Encoding.GetEncoding("gb2312"); } else if (item.Contains("gbk")) { encoding = Encoding.GetEncoding("gbk"); } if (encoding != null) { break; } } } /* * if can not determine the encoding type, we use default encoding. */ if (encoding == null || DefaultContentEncoding.Equals(encoding)) { return(content); } } } return(encoding.GetString(data)); }