private static string GetCharacterEncoding(URLConnection urlConnection) { ParsedContentType pct = new ParsedContentType(urlConnection.GetContentType()); string encoding = pct.GetEncoding(); if (encoding != null) { return encoding; } string contentType = pct.GetContentType(); if (contentType != null && contentType.StartsWith("text/")) { return "8859_1"; } else { return "utf-8"; } }
/// <exception cref="System.IO.IOException"></exception> public static object ReadFileOrUrl(string path, bool convertToString, string defaultEncoding) { Uri url = ToUrl(path); Stream @is = null; int capacityHint = 0; string encoding; string contentType; byte[] data; try { if (url == null) { FilePath file = new FilePath(path); contentType = encoding = null; capacityHint = (int)file.Length(); @is = new FileInputStream(file); } else { URLConnection uc = url.OpenConnection(); @is = uc.GetInputStream(); if (convertToString) { ParsedContentType pct = new ParsedContentType(uc.GetContentType()); contentType = pct.GetContentType(); encoding = pct.GetEncoding(); } else { contentType = encoding = null; } capacityHint = uc.GetContentLength(); // Ignore insane values for Content-Length if (capacityHint > (1 << 20)) { capacityHint = -1; } } if (capacityHint <= 0) { capacityHint = 4096; } data = Kit.ReadStream(@is, capacityHint); } finally { if (@is != null) { @is.Close(); } } object result; if (!convertToString) { result = data; } else { if (encoding == null) { // None explicitly specified in Content-type header. Use RFC-4329 // 4.2.2 section to autodetect if (data.Length > 3 && data[0] == -1 && data[1] == -2 && data[2] == 0 && data[3] == 0) { encoding = "UTF-32LE"; } else { if (data.Length > 3 && data[0] == 0 && data[1] == 0 && data[2] == -2 && data[3] == -1) { encoding = "UTF-32BE"; } else { if (data.Length > 2 && data[0] == -17 && data[1] == -69 && data[2] == -65) { encoding = "UTF-8"; } else { if (data.Length > 1 && data[0] == -1 && data[1] == -2) { encoding = "UTF-16LE"; } else { if (data.Length > 1 && data[0] == -2 && data[1] == -1) { encoding = "UTF-16BE"; } else { // No autodetect. See if we have explicit value on command line encoding = defaultEncoding; if (encoding == null) { // No explicit encoding specification if (url == null) { // Local files default to system encoding encoding = Runtime.GetProperty("file.encoding"); } else { if (contentType != null && contentType.StartsWith("application/")) { // application/* types default to UTF-8 encoding = "UTF-8"; } else { // text/* MIME types default to US-ASCII encoding = "US-ASCII"; } } } } } } } } } string strResult = Sharpen.Runtime.GetStringForBytes(data, encoding); // Skip BOM if (strResult.Length > 0 && strResult[0] == '\uFEFF') { strResult = Sharpen.Runtime.Substring(strResult, 1); } result = strResult; } return result; }