/** * Attempts to determine the encoding of the body. If it can't be determined, we use * DEFAULT_ENCODING instead. * * @return The detected encoding or DEFAULT_ENCODING. */ private static String getAndUpdateEncoding(NameValueCollection headers, byte[] body) { String values = headers["Content-Type"]; String contentType = values == null ? null : values.Length == 0 ? null : values; if (contentType != null) { String[] parts = contentType.Split(';'); if (BINARY_CONTENT_TYPES.Contains(parts[0])) { return(DEFAULT_ENCODING); } if (parts.Length == 2) { int offset = parts[1].IndexOf("charset="); if (offset != -1) { String charset = parts[1].Substring(offset + 8).ToUpper(); // Some servers include quotes around the charset: // Content-Type: text/html; charset="UTF-8" if (charset[0] == '"') { charset = charset.Substring(1, charset.Length); } return(charset); } } } if (body == null || body.Length == 0) { return(DEFAULT_ENCODING); } // If the header doesn't specify the charset, try to determine it by examining the content. CharsetDetector detector = new CharsetDetector(); detector.setText(body); CharsetMatch match = detector.detect(); if (contentType != null) { // Record the charset in the content-type header so that its value can be cached // and re-used. This is a BIG performance win. headers.Add("Content-Type", contentType + "; charset=" + match.getName().ToUpper()); } return(match.getName().ToUpper()); }
private void CheckMatch(CharsetDetector det, String testString, String encoding, String language, String id) { CharsetMatch m = det.Detect(); String decoded; if (!m.GetName().Equals(encoding)) { Errln(id + ": encoding detection failure - expected " + encoding + ", got " + m.GetName()); return; } String charsetMatchLanguage = m.GetLanguage(); if ((language != null && !charsetMatchLanguage.Equals(language)) || (language == null && charsetMatchLanguage != null) || (language != null && charsetMatchLanguage == null)) { Errln(id + ", " + encoding + ": language detection failure - expected " + language + ", got " + m.GetLanguage()); } if (encoding.StartsWith("UTF-32")) { return; } decoded = m.GetString(); if (!testString.Equals(decoded)) { Errln(id + ", " + encoding + ": getString() didn't return the original string!"); } decoded = StringFromReader(m.GetReader()); if (!testString.Equals(decoded)) { Errln(id + ", " + encoding + ": getReader() didn't yield the original string!"); } }