/** * Attempts to determine the encoding of the body. If it can't be determined, we use * DEFAULT_ENCODING instead. * * @return The detected encoding or DEFAULT_ENCODING. */ private static String getAndUpdateEncoding(NameValueCollection headers, byte[] body) { String values = headers["Content-Type"]; String contentType = values == null ? null : values.Length == 0 ? null : values; if (contentType != null) { String[] parts = contentType.Split(';'); if (BINARY_CONTENT_TYPES.Contains(parts[0])) { return(DEFAULT_ENCODING); } if (parts.Length == 2) { int offset = parts[1].IndexOf("charset="); if (offset != -1) { String charset = parts[1].Substring(offset + 8).ToUpper(); // Some servers include quotes around the charset: // Content-Type: text/html; charset="UTF-8" if (charset[0] == '"') { charset = charset.Substring(1, charset.Length); } return(charset); } } } if (body == null || body.Length == 0) { return(DEFAULT_ENCODING); } // If the header doesn't specify the charset, try to determine it by examining the content. CharsetDetector detector = new CharsetDetector(); detector.setText(body); CharsetMatch match = detector.detect(); if (contentType != null) { // Record the charset in the content-type header so that its value can be cached // and re-used. This is a BIG performance win. headers.Add("Content-Type", contentType + "; charset=" + match.getName().ToUpper()); } return(match.getName().ToUpper()); }
/** * Attempts to determine the encoding of the body. If it can't be determined, we use * DEFAULT_ENCODING instead. * * @return The detected encoding or DEFAULT_ENCODING. */ private static String getAndUpdateEncoding(NameValueCollection headers, byte[] body) { String values = headers["Content-Type"]; String contentType = values == null ? null : values.Length == 0 ? null : values; if (contentType != null) { String[] parts = contentType.Split(';'); if (BINARY_CONTENT_TYPES.Contains(parts[0])) { return DEFAULT_ENCODING; } if (parts.Length == 2) { int offset = parts[1].IndexOf("charset="); if (offset != -1) { String charset = parts[1].Substring(offset + 8).ToUpper(); // Some servers include quotes around the charset: // Content-Type: text/html; charset="UTF-8" if (charset[0] == '"') { charset = charset.Substring(1, charset.Length); } return charset; } } } if (body == null || body.Length == 0) { return DEFAULT_ENCODING; } // If the header doesn't specify the charset, try to determine it by examining the content. CharsetDetector detector = new CharsetDetector(); detector.setText(body); CharsetMatch match = detector.detect(); if (contentType != null) { // Record the charset in the content-type header so that its value can be cached // and re-used. This is a BIG performance win. headers.Add("Content-Type", contentType + "; charset=" + match.getName().ToUpper()); } return match.getName().ToUpper(); }