Esempio n. 1
0
        /**
         * Attempts to determine the encoding of the body. If it can't be determined, we use
         * DEFAULT_ENCODING instead.
         *
         * @return The detected encoding or DEFAULT_ENCODING.
         */
        private static String getAndUpdateEncoding(NameValueCollection headers, byte[] body)
        {
            String values      = headers["Content-Type"];
            String contentType = values == null ? null : values.Length == 0 ? null : values;

            if (contentType != null)
            {
                String[] parts = contentType.Split(';');
                if (BINARY_CONTENT_TYPES.Contains(parts[0]))
                {
                    return(DEFAULT_ENCODING);
                }
                if (parts.Length == 2)
                {
                    int offset = parts[1].IndexOf("charset=");
                    if (offset != -1)
                    {
                        String charset = parts[1].Substring(offset + 8).ToUpper();
                        // Some servers include quotes around the charset:
                        //   Content-Type: text/html; charset="UTF-8"
                        if (charset[0] == '"')
                        {
                            charset = charset.Substring(1, charset.Length);
                        }
                        return(charset);
                    }
                }
            }

            if (body == null || body.Length == 0)
            {
                return(DEFAULT_ENCODING);
            }

            // If the header doesn't specify the charset, try to determine it by examining the content.
            CharsetDetector detector = new CharsetDetector();

            detector.setText(body);
            CharsetMatch match = detector.detect();

            if (contentType != null)
            {
                // Record the charset in the content-type header so that its value can be cached
                // and re-used. This is a BIG performance win.
                headers.Add("Content-Type",
                            contentType + "; charset=" + match.getName().ToUpper());
            }
            return(match.getName().ToUpper());
        }
Esempio n. 2
0
        /**
   * Attempts to determine the encoding of the body. If it can't be determined, we use
   * DEFAULT_ENCODING instead.
   *
   * @return The detected encoding or DEFAULT_ENCODING.
   */
        private static String getAndUpdateEncoding(NameValueCollection headers, byte[] body)
        {
            String values = headers["Content-Type"];
            String contentType = values == null ? null : values.Length == 0 ? null : values;
            if (contentType != null)
            {
                String[] parts = contentType.Split(';');
                if (BINARY_CONTENT_TYPES.Contains(parts[0]))
                {
                    return DEFAULT_ENCODING;
                }
                if (parts.Length == 2)
                {
                    int offset = parts[1].IndexOf("charset=");
                    if (offset != -1)
                    {
                        String charset = parts[1].Substring(offset + 8).ToUpper();
                        // Some servers include quotes around the charset:
                        //   Content-Type: text/html; charset="UTF-8"
                        if (charset[0] == '"')
                        {
                            charset = charset.Substring(1, charset.Length);
                        }
                        return charset;
                    }
                }
            }

            if (body == null || body.Length == 0)
            {
                return DEFAULT_ENCODING;
            }

            // If the header doesn't specify the charset, try to determine it by examining the content.
            CharsetDetector detector = new CharsetDetector();
            detector.setText(body);
            CharsetMatch match = detector.detect();

            if (contentType != null)
            {
                // Record the charset in the content-type header so that its value can be cached
                // and re-used. This is a BIG performance win.
                headers.Add("Content-Type",
                            contentType + "; charset=" + match.getName().ToUpper());
            }
            return match.getName().ToUpper();
        }