Esempio n. 1
0
        /// <summary>
        /// Reads the file and returns its contents (autodetects encoding and fallback codepage)
        /// </summary>
        public static String ReadFile(String file)
        {
            EncodingFileInfo info = GetEncodingFileInfo(file);

            return(info.Contents);
        }
Esempio n. 2
0
        /// <summary>
        /// Checks if the file contains BOM
        /// </summary>
        public static Boolean ContainsBOM(String file)
        {
            EncodingFileInfo info = GetEncodingFileInfo(file);

            return(info.ContainsBOM);
        }
Esempio n. 3
0
        /// <summary>
        /// Acquires encoding related info on one read.
        /// </summary>
        public static EncodingFileInfo GetEncodingFileInfo(String file)
        {
            Int32            startIndex = 0;
            EncodingFileInfo info       = new EncodingFileInfo();

            try
            {
                if (File.Exists(file))
                {
                    Byte[] bytes = File.ReadAllBytes(file);
                    if (bytes.Length > 2 && (bytes[0] == 0xef && bytes[1] == 0xbb && bytes[2] == 0xbf))
                    {
                        startIndex       = 3;
                        info.BomLength   = 3;
                        info.ContainsBOM = true;
                        info.Charset     = Encoding.UTF8.WebName;
                        info.CodePage    = Encoding.UTF8.CodePage;
                    }
                    else if (bytes.Length > 3 && (bytes[0] == 0xff && bytes[1] == 0xfe && bytes[2] == 0x00 && bytes[3] == 0x00))
                    {
                        startIndex       = 4;
                        info.BomLength   = 4;
                        info.ContainsBOM = true;
                        info.Charset     = Encoding.UTF32.WebName;
                        info.CodePage    = Encoding.UTF32.CodePage;
                    }
                    else if (bytes.Length > 4 && ((bytes[0] == 0x2b && bytes[1] == 0x2f && bytes[2] == 0x76) && (bytes[3] == 0x38 || bytes[3] == 0x39 || bytes[3] == 0x2b || bytes[3] == 0x2f) && bytes[4] == 0x2D))
                    {
                        startIndex       = 5;
                        info.BomLength   = 5;
                        info.ContainsBOM = true;
                        info.Charset     = Encoding.UTF7.WebName;
                        info.CodePage    = Encoding.UTF7.CodePage;
                    }
                    else if (bytes.Length > 3 && ((bytes[0] == 0x2b && bytes[1] == 0x2f && bytes[2] == 0x76) && (bytes[3] == 0x38 || bytes[3] == 0x39 || bytes[3] == 0x2b || bytes[3] == 0x2f)))
                    {
                        startIndex       = 4;
                        info.BomLength   = 4;
                        info.ContainsBOM = true;
                        info.Charset     = Encoding.UTF7.WebName;
                        info.CodePage    = Encoding.UTF7.CodePage;
                    }
                    else if (bytes.Length > 1 && (bytes[0] == 0xff && bytes[1] == 0xfe))
                    {
                        startIndex       = 2;
                        info.BomLength   = 2;
                        info.ContainsBOM = true;
                        info.Charset     = Encoding.Unicode.WebName;
                        info.CodePage    = Encoding.Unicode.CodePage;
                    }
                    else if (bytes.Length > 1 && (bytes[0] == 0xfe && bytes[1] == 0xff))
                    {
                        startIndex       = 2;
                        info.BomLength   = 2;
                        info.ContainsBOM = true;
                        info.Charset     = Encoding.BigEndianUnicode.WebName;
                        info.CodePage    = Encoding.BigEndianUnicode.CodePage;
                    }
                    else
                    {
                        if (!ContainsInvalidUTF8Bytes(bytes))
                        {
                            info.Charset  = Encoding.UTF8.WebName;
                            info.CodePage = Encoding.UTF8.CodePage;
                        }
                        else // Try detecting using Ude...
                        {
                            /*
                             * Ude.CharsetDetector detector = new Ude.CharsetDetector();
                             *            detector.Feed(bytes, 0, bytes.Length); detector.DataEnd();
                             *
                             * if (detector.Charset != null)
                             *            {
                             *                Encoding encoding = Encoding.GetEncoding(detector.Charset);
                             *                info.Charset = encoding.WebName;
                             *                info.CodePage = encoding.CodePage;
                             *            }
                             *            else
                             *            {
                             *                info.Charset = Encoding.Default.WebName;
                             *                info.CodePage = Encoding.Default.CodePage;
                             *            }
                             */
                        }
                    }

                    Int32 contentLength = bytes.Length - startIndex;
                    if (bytes.Length > 0 && bytes.Length > startIndex)
                    {
                        Encoding encoding = Encoding.GetEncoding(info.CodePage);
                        info.Contents = encoding.GetString(bytes, startIndex, contentLength);
                    }
                }
            }
            catch (Exception)
            {
                info = new EncodingFileInfo();
            }
            return(info);
        }
Esempio n. 4
0
        /// <summary>
        /// Reads the file codepage from the file data
        /// </summary>
        public static Int32 GetFileCodepage(String file)
        {
            EncodingFileInfo info = GetEncodingFileInfo(file);

            return(info.CodePage);
        }