IdentifyEncoding C# (CSharp)代码示例

示例#1

0

显示文件

        /// <summary>
        /// 获取文件编码
        /// </summary>
        /// <param name="path"></param>
        /// <returns></returns>
        public static Encoding GetEncoding(String path)
        {
            FileInfo         fileInfo  = new FileInfo(path);
            IdentifyEncoding identitfy = new IdentifyEncoding();

            return(Encoding.GetEncoding(identitfy.GetEncodingName(fileInfo)));
        }

示例#2

0

显示文件

 /// <summary>
 /// 当窗口加载时发生
 /// </summary>
 private void MainForm_Load(object sender, EventArgs e)
 {
     Tag = Size;
     ReadConverterMapsList();
     ReadDefaultRegexList();
     ReadConfig();
     if (Environment.GetCommandLineArgs().Length > 1)
     {
         if (ReadSourceFile(Environment.GetCommandLineArgs()[1]))
         {
             ConvertSourceToDestText();
             txtSource.Select(0, 0);
         }
     }
     else
     {
         try
         {
             Thread identifyEncodingThread = new Thread(obj => IdentifyEncoding = new IdentifyEncoding());
             identifyEncodingThread.IsBackground = true;
             identifyEncodingThread.Start();
         }
         catch (Exception) { }
     }
 }

示例#3

0

显示文件

        private void ConvertFileEncode(string filePath)
        {
            Encoding oriEncode;

            if (chkUnknownEncoding.Checked)  //编码识别
            {
                IdentifyEncoding ie           = new IdentifyEncoding();
                FileInfo         fi           = new FileInfo(filePath);
                string           encodingName = ie.GetEncodingName(fi);
                if (encodingName == "UNKNOWN")
                {
                    txtResult.Text += string.Format("\r\n{0}文件格式不正确或已损坏。 ", filePath);
                    return;
                }
                else
                {
                    oriEncode = Encoding.GetEncoding(encodingName);
                }
            }
            else
            {
                oriEncode = GetSelectEncoding(cmbSourceEncode.SelectedIndex);
            }
            string text = File.ReadAllText(filePath, oriEncode);

            if (chkIsBackup.Checked)  //备份
            {
                File.WriteAllText(filePath + ".bak", text, oriEncode);
            }
            File.WriteAllText(filePath, text, GetSelectEncoding(cmbTargetEncode.SelectedIndex));
            if (filePath.LastIndexOf("[1]") != -1)
            {
                File.Move(filePath, filePath.Replace("[1]", string.Empty));
            }
        }

示例#4

0

显示文件

    private void ConvertCP(string sFromFilePath, Encoding encode)
    {
        if (sFromFilePath.Trim().Equals(""))
        {
            return;
        }
        Encoding oriEncode;

        #region 编码识别

        IdentifyEncoding ie           = new IdentifyEncoding();
        FileInfo         fi           = new FileInfo(sFromFilePath);
        string           encodingName = string.Empty;
        string           message      = string.Empty;
        encodingName = ie.GetEncodingName(fi);
        fi           = null;

        if (encodingName.ToLower() == "other")
        {
            message = string.Format("\r\n{0}文件格式不正确或已损坏。 ", sFromFilePath);
            txtOutput.AppendText((sFromFilePath + ('\t'
                                                   + (message))));
            return;
        }
        else
        {
            oriEncode = Encoding.GetEncoding(encodingName);
        }

        #endregion
        string text = File.ReadAllText(sFromFilePath, oriEncode);
        File.WriteAllText(sFromFilePath, text, encode);
        txtOutput.AppendText(sFromFilePath);
    }

示例#5

0

显示文件

文件： EncodingInfo.cs 项目： jiaping/JPCMS

        public static Encoding EncodingInfo(string path)
        {
            //对编码格式处理
            FileInfo         fileInfo  = new FileInfo(path);
            IdentifyEncoding identitfy = new IdentifyEncoding();
            Encoding         encoding  = Encoding.GetEncoding(identitfy.GetEncodingName(fileInfo));

            return(encoding);
        }

示例#6

0

显示文件

        public static async System.Threading.Tasks.Task <string> GetHtml(this HttpClient httpClient, string url)
        {
            var content = string.Empty;
            var data    = await httpClient.GetByteArrayAsync(url);

            var ide          = new IdentifyEncoding();
            var encodingName = ide.GetEncodingString(IdentifyEncoding.ToSByteArray(data));
            var encoding     = System.Text.Encoding.GetEncoding(encodingName);

            content = encoding.GetString(data);

            return(content);
        }

示例#7

0

显示文件

文件： PresentBuffer.cs 项目： iamoatil/S_P_F_P_r-o

        /// <summary>
        /// 从指定位置开始读取数据,直到接收到停止消息。
        /// </summary>
        public void Read(int offset)
        {
            //下面这句话保证，每次都从OneTimeRead的倍数位置读取数据
            offset = offset / OneTimeRead * OneTimeRead;

            byte[] bytes = new byte[OneTimeRead];
            TotalLen      = _cacheBuffer.Read(offset, bytes);
            StartedOffset = offset;

            string encodingName = identifyEncoding.GetEncodingName(IdentifyEncoding.ToSByteArray(bytes));
            string text         = Encoding.GetEncoding(encodingName).GetString(bytes, 0, TotalLen);

            Text = text;
        }

示例#8

0

显示文件

        /// <summary>
        /// 读取源文件
        /// </summary>
        /// <param name="path">源文件绝对路径</param>
        /// <returns>是否读取成功</returns>
        private bool ReadSourceFile(string path)
        {
            if (path == String.Empty || !File.Exists(path))
            {
                return(false);
            }
            if (cboConvertMethod.SelectedIndex == (int)ConvertMethod.ConvertText)
            {
                cboConvertMethod.SelectedIndex = (int)ConvertMethod.ConvertFile;
            }
            SourceFilePath = path;
            if (IdentifyEncoding == null)
            {
                IdentifyEncoding = new IdentifyEncoding();
            }
            string content      = "";
            string encodingName = "";

            try
            {
                if (cboReadEncoding.SelectedIndex == 0)
                {
                    encodingName = IdentifyEncoding.GetEncodingName(new FileInfo(SourceFilePath));
                    content      = File.ReadAllText(SourceFilePath, GetEncodingByName(encodingName));
                }
                else
                {
                    encodingName = cboReadEncoding.SelectedItem.ToString();
                    content      = File.ReadAllText(SourceFilePath, GetReadEncoding(cboReadEncoding.SelectedIndex));
                }
            }
            catch (Exception)
            {
                SourceFilePath = "";
                return(false);
            }
            if (Tools.CheckNewLineType(content) == 0)
            {
                content = content.Replace("\n", "\r\n");
            }
            txtSource.Text       = content;
            lblFilePath.Text     = "文件路径：" + SourceFilePath;
            lblFileEncoding.Text = "文件编码：" + encodingName;
            return(true);
        }

示例#9

0

显示文件

        public static void Test()
        {
            IdentifyEncoding sinodetector;
            string           result = null;

            sinodetector = new IdentifyEncoding();

            try
            {
                result = sinodetector.GetEncodingName(new System.Uri("http://china5.nikkeibp.co.jp/china/news/com/200307/pr_com200307170131.html"));
            }
            catch (System.Exception e)
            {
                Console.Error.WriteLine("Bad URL " + e.ToString());
            }
            Console.Write(result);
            Console.Read();
        }

示例#10

0

显示文件

文件： CstEncoding.cs 项目： uvbs/M_Y_P_F_P_R_O

        public string GetString(string encodingName, byte[] bytes, int count)
        {
            if (string.IsNullOrWhiteSpace(encodingName))
            {
                try
                {
                    string autoCheckName = _identifyEncoding.GetEncodingName(IdentifyEncoding.ToSByteArray(bytes));
                    string text          = Encoding.GetEncoding(autoCheckName).GetString(bytes, 0, count);
                    return(text);
                }
                catch (Exception)
                {
                    encodingName = "Hex";
                }
            }

            if (encodingName == "Hex")
            {
                string text = BytesToHexString(bytes, count);
                return(text);
            }
            return(string.Empty);
        }

示例#11

0

显示文件

        /// <summary>
        /// 解码
        /// </summary>
        /// <param name="webResponse"></param>
        /// <returns></returns>
        public string DecodeData(HttpWebResponse webResponse)
        {
            if (MemoryStream == null || MemoryStream.Length == 0)
            {
                return(string.Empty);
            }

            if (webResponse != null && webResponse.StatusCode == HttpStatusCode.OK)
            {
                byte[] pageBytes = MemoryStream.ToBytes();

                Encoding encoding = null;
                // html
                if (webResponse.ContentType.ToLower().Contains("text/css"))
                {
                    //将流的可读位置设置到起始值
                    this.MemoryStream.Seek(0, SeekOrigin.Begin);
                    //此处使用utf-8读取后，即使中文是乱码，也能正确读取到meta中的Content-Type,然后再使用正确的编码类型重读一次
                    string encode = Encoding.UTF8.GetString(this.MemoryStream.GetBuffer(), 0, (int)this.MemoryStream.Length);

                    var mat = RegexLibrary.RegCssContentType.Match(encode);
                    if (mat.Groups[2].Success)
                    {
                        encoding = Encoding.GetEncoding(mat.Groups[2].Value);
                    }
                }
                else
                {
                    // 基于火狐的统计学算法
                    encoding = this.GetEncodingByUniversalCharDet(pageBytes);

                    IdentifyEncoding ide = new IdentifyEncoding();


                    sbyte[] mySByte = new sbyte[pageBytes.Length];

                    for (int i = 0; i < pageBytes.Length; i++)
                    {
                        if (pageBytes[i] > 127)
                        {
                            mySByte[i] = (sbyte)(pageBytes[i] - 256);
                        }
                        else
                        {
                            mySByte[i] = (sbyte)pageBytes[i];
                        }
                    }

                    var tempOTHER = ide.GetEncodingString(mySByte);
                    if (tempOTHER == "OTHER")
                    {
                        // headers meta BOM的查找方式
                        Encoding secondEncoding = this.GetStringUsingEncoding(webResponse, pageBytes);

                        if (encoding != null && encoding.EncodingName != secondEncoding.EncodingName)
                        {
                            encoding = secondEncoding;
                        }
                    }
                    else
                    {
                        encoding = Encoding.GetEncoding(tempOTHER);
                    }
                }


                if (encoding == null)
                {
                    encoding = Encoding.UTF8;
                }

                return(encoding.GetString(pageBytes));
            }

            return(string.Empty);
        }

示例#12

0

显示文件

文件： OraData.cs 项目： catontheway/myevo

        /// <summary>
        /// 文件写入到 Oracle Blob 字段中。type = 1 表示地图文件，其他值表示战斗文件
        /// </summary>
        public static bool SaveFileToDB(int file_type, string DBKey, string FullFileName)
        {
            FileStream mapfs;

            try
            {
                mapfs = new FileStream(FullFileName, FileMode.Open, FileAccess.Read);
            }
            catch (Exception ex)
            {
                Logging.Write("SaveFileToDB error: " + ex.ToString());
                return(false);
            }

            string sql;

            if (file_type == 1)
            {
                //sql = string.Format("select file_context from map_file where map_name = '{0}' for update", DBKey);
                sql = string.Format("update map_file set file_context = :1 where map_name = '{0}'", DBKey);
            }
            else
            {
                //sql = string.Format("select file_context from fight_file where roll_type = '{0}' for update", DBKey);
                sql = string.Format("update fight_file set file_context = :2 where roll_type = '{0}'", DBKey);
            }

            OracleTransaction transaction = null;

            try
            {
                if (!isConnected)
                {
                    OraConnect();
                }
                // 利用事务处理（必须）
                transaction = conn.BeginTransaction();
                OracleCommand   oCmd  = new OracleCommand(sql, conn);
                OracleParameter param = oCmd.Parameters.Add("1", OracleDbType.Clob, ParameterDirection.Input);

                IdentifyEncoding sinodetector = new IdentifyEncoding();
                FileInfo         finfo        = new FileInfo(FullFileName);
                StreamReader     sr           = new StreamReader(mapfs, sinodetector.GetEncoding(sinodetector.GetEncodingName(finfo)));
                string           context      = sr.ReadToEnd();
                param.Value = context;

                oCmd.ExecuteNonQuery();

                transaction.Commit();
                sr.Close();
                mapfs.Close();
            }
            catch (Exception ex)
            {
                transaction.Rollback();
                Logging.Write("SaveFileToDB error: " + ex.ToString());
                return(false);
            }

            return(true);
        }

示例#13

0

显示文件

文件： Program.cs 项目： randianb/my-csharp-sample

        static void Main(string[] args)
        {
            // 测试例子类.
            ConvertSample sample = new ConvertSample();



            // 首先写一个 UTF-8 文件.
            sample.TestWrite(
                "test_UTF8.txt",
                Encoding.UTF8,
                @"测试文件 编码转换！
本测试过程，为模拟 UTF-8 编码的文件， 转换为 其他编码格式的文件！

注意：
本代码仅仅为 演示的例子。
算法为一次把整个文件内容读取进内存，然后再做转换。
如果文件超大的话，会出问题。

实际处理中，可以修改为 同时 打开源文件 与 目标文件。
源文件读取一行， 目标文件写入一行。
的方式来处理！
");

            // 读取前面的 UTF-8 文件中的内容.
            string text = sample.TestRead("test_UTF8.txt", Encoding.UTF8);



            // 将前面读取到的信息，写入到 新的编码格式下.
            sample.TestWrite(
                "test_Unicode.txt",
                Encoding.Unicode,
                text);

            // 读取
            text = sample.TestRead("test_Unicode.txt", Encoding.Unicode);



            // 写入.
            sample.TestWrite(
                "test_GB2312.txt",
                Encoding.GetEncoding("GB2312"),
                text);

            // 读取
            text = sample.TestRead("test_GB2312.txt", Encoding.GetEncoding("GB2312"));



            // 在一个 方法里面作转换处理.
            // 读取一行、写入一行.
            sample.FileEncodingConvert(
                "test_UTF8.txt",
                "test_Default.txt",
                Encoding.UTF8,
                Encoding.Default);



            IdentifyEncoding test = new IdentifyEncoding();

            Console.WriteLine("test_UTF8.txt 文件编码为：{0}",
                              test.GetEncodingString(new System.IO.FileInfo("test_UTF8.txt"))
                              );


            Console.WriteLine("test_Unicode.txt 文件编码为：{0}",
                              test.GetEncodingString(new System.IO.FileInfo("test_Unicode.txt"))
                              );

            Console.WriteLine("test_GB2312.txt 文件编码为：{0}",
                              test.GetEncodingString(new System.IO.FileInfo("test_GB2312.txt"))
                              );


            Console.WriteLine("test_Default.txt 文件编码为：{0}",
                              test.GetEncodingString(new System.IO.FileInfo("test_Default.txt"))
                              );



            Console.WriteLine("text\\GB2312.txt 文件编码为：{0}",
                              test.GetEncodingString(new System.IO.FileInfo("text\\GB2312.txt"))
                              );


            Console.WriteLine("text\\Unicode.txt 文件编码为：{0}",
                              test.GetEncodingString(new System.IO.FileInfo("text\\Unicode.txt"))
                              );


            Console.WriteLine("text\\UTF8.txt 文件编码为：{0}",
                              test.GetEncodingString(new System.IO.FileInfo("text\\UTF8.txt"))
                              );

            Console.ReadLine();
        }

C# (CSharp) IdentifyEncoding示例