Esempio n. 1
0
        /**
         * Attempts to determine the encoding of the body. If it can't be determined, we use
         * DEFAULT_ENCODING instead.
         *
         * @return The detected encoding or DEFAULT_ENCODING.
         */
        private static String getAndUpdateEncoding(NameValueCollection headers, byte[] body)
        {
            String values      = headers["Content-Type"];
            String contentType = values == null ? null : values.Length == 0 ? null : values;

            if (contentType != null)
            {
                String[] parts = contentType.Split(';');
                if (BINARY_CONTENT_TYPES.Contains(parts[0]))
                {
                    return(DEFAULT_ENCODING);
                }
                if (parts.Length == 2)
                {
                    int offset = parts[1].IndexOf("charset=");
                    if (offset != -1)
                    {
                        String charset = parts[1].Substring(offset + 8).ToUpper();
                        // Some servers include quotes around the charset:
                        //   Content-Type: text/html; charset="UTF-8"
                        if (charset[0] == '"')
                        {
                            charset = charset.Substring(1, charset.Length);
                        }
                        return(charset);
                    }
                }
            }

            if (body == null || body.Length == 0)
            {
                return(DEFAULT_ENCODING);
            }

            // If the header doesn't specify the charset, try to determine it by examining the content.
            CharsetDetector detector = new CharsetDetector();

            detector.setText(body);
            CharsetMatch match = detector.detect();

            if (contentType != null)
            {
                // Record the charset in the content-type header so that its value can be cached
                // and re-used. This is a BIG performance win.
                headers.Add("Content-Type",
                            contentType + "; charset=" + match.getName().ToUpper());
            }
            return(match.getName().ToUpper());
        }
Esempio n. 2
0
        private void CheckMatch(CharsetDetector det, String testString,
                                String encoding, String language, String id)
        {
            CharsetMatch m = det.Detect();
            String       decoded;

            if (!m.GetName().Equals(encoding))
            {
                Errln(id + ": encoding detection failure - expected " + encoding
                      + ", got " + m.GetName());
                return;
            }

            String charsetMatchLanguage = m.GetLanguage();

            if ((language != null && !charsetMatchLanguage.Equals(language)) ||
                (language == null && charsetMatchLanguage != null) ||
                (language != null && charsetMatchLanguage == null))
            {
                Errln(id + ", " + encoding
                      + ": language detection failure - expected " + language
                      + ", got " + m.GetLanguage());
            }

            if (encoding.StartsWith("UTF-32"))
            {
                return;
            }

            decoded = m.GetString();

            if (!testString.Equals(decoded))
            {
                Errln(id + ", " + encoding
                      + ": getString() didn't return the original string!");
            }

            decoded = StringFromReader(m.GetReader());

            if (!testString.Equals(decoded))
            {
                Errln(id + ", " + encoding
                      + ": getReader() didn't yield the original string!");
            }
        }