/// <summary> /// 获取文件编码 /// </summary> /// <param name="path"></param> /// <returns></returns> public static Encoding GetEncoding(String path) { FileInfo fileInfo = new FileInfo(path); IdentifyEncoding identitfy = new IdentifyEncoding(); return(Encoding.GetEncoding(identitfy.GetEncodingName(fileInfo))); }
/// <summary> /// 当窗口加载时发生 /// </summary> private void MainForm_Load(object sender, EventArgs e) { Tag = Size; ReadConverterMapsList(); ReadDefaultRegexList(); ReadConfig(); if (Environment.GetCommandLineArgs().Length > 1) { if (ReadSourceFile(Environment.GetCommandLineArgs()[1])) { ConvertSourceToDestText(); txtSource.Select(0, 0); } } else { try { Thread identifyEncodingThread = new Thread(obj => IdentifyEncoding = new IdentifyEncoding()); identifyEncodingThread.IsBackground = true; identifyEncodingThread.Start(); } catch (Exception) { } } }
private void ConvertFileEncode(string filePath) { Encoding oriEncode; if (chkUnknownEncoding.Checked) //编码识别 { IdentifyEncoding ie = new IdentifyEncoding(); FileInfo fi = new FileInfo(filePath); string encodingName = ie.GetEncodingName(fi); if (encodingName == "UNKNOWN") { txtResult.Text += string.Format("\r\n{0}文件格式不正确或已损坏。 ", filePath); return; } else { oriEncode = Encoding.GetEncoding(encodingName); } } else { oriEncode = GetSelectEncoding(cmbSourceEncode.SelectedIndex); } string text = File.ReadAllText(filePath, oriEncode); if (chkIsBackup.Checked) //备份 { File.WriteAllText(filePath + ".bak", text, oriEncode); } File.WriteAllText(filePath, text, GetSelectEncoding(cmbTargetEncode.SelectedIndex)); if (filePath.LastIndexOf("[1]") != -1) { File.Move(filePath, filePath.Replace("[1]", string.Empty)); } }
private void ConvertCP(string sFromFilePath, Encoding encode) { if (sFromFilePath.Trim().Equals("")) { return; } Encoding oriEncode; #region 编码识别 IdentifyEncoding ie = new IdentifyEncoding(); FileInfo fi = new FileInfo(sFromFilePath); string encodingName = string.Empty; string message = string.Empty; encodingName = ie.GetEncodingName(fi); fi = null; if (encodingName.ToLower() == "other") { message = string.Format("\r\n{0}文件格式不正确或已损坏。 ", sFromFilePath); txtOutput.AppendText((sFromFilePath + ('\t' + (message)))); return; } else { oriEncode = Encoding.GetEncoding(encodingName); } #endregion string text = File.ReadAllText(sFromFilePath, oriEncode); File.WriteAllText(sFromFilePath, text, encode); txtOutput.AppendText(sFromFilePath); }
public static Encoding EncodingInfo(string path) { //对编码格式处理 FileInfo fileInfo = new FileInfo(path); IdentifyEncoding identitfy = new IdentifyEncoding(); Encoding encoding = Encoding.GetEncoding(identitfy.GetEncodingName(fileInfo)); return(encoding); }
public static async System.Threading.Tasks.Task <string> GetHtml(this HttpClient httpClient, string url) { var content = string.Empty; var data = await httpClient.GetByteArrayAsync(url); var ide = new IdentifyEncoding(); var encodingName = ide.GetEncodingString(IdentifyEncoding.ToSByteArray(data)); var encoding = System.Text.Encoding.GetEncoding(encodingName); content = encoding.GetString(data); return(content); }
/// <summary> /// 从指定位置开始读取数据,直到接收到停止消息。 /// </summary> public void Read(int offset) { //下面这句话保证,每次都从OneTimeRead的倍数位置读取数据 offset = offset / OneTimeRead * OneTimeRead; byte[] bytes = new byte[OneTimeRead]; TotalLen = _cacheBuffer.Read(offset, bytes); StartedOffset = offset; string encodingName = identifyEncoding.GetEncodingName(IdentifyEncoding.ToSByteArray(bytes)); string text = Encoding.GetEncoding(encodingName).GetString(bytes, 0, TotalLen); Text = text; }
/// <summary> /// 读取源文件 /// </summary> /// <param name="path">源文件绝对路径</param> /// <returns>是否读取成功</returns> private bool ReadSourceFile(string path) { if (path == String.Empty || !File.Exists(path)) { return(false); } if (cboConvertMethod.SelectedIndex == (int)ConvertMethod.ConvertText) { cboConvertMethod.SelectedIndex = (int)ConvertMethod.ConvertFile; } SourceFilePath = path; if (IdentifyEncoding == null) { IdentifyEncoding = new IdentifyEncoding(); } string content = ""; string encodingName = ""; try { if (cboReadEncoding.SelectedIndex == 0) { encodingName = IdentifyEncoding.GetEncodingName(new FileInfo(SourceFilePath)); content = File.ReadAllText(SourceFilePath, GetEncodingByName(encodingName)); } else { encodingName = cboReadEncoding.SelectedItem.ToString(); content = File.ReadAllText(SourceFilePath, GetReadEncoding(cboReadEncoding.SelectedIndex)); } } catch (Exception) { SourceFilePath = ""; return(false); } if (Tools.CheckNewLineType(content) == 0) { content = content.Replace("\n", "\r\n"); } txtSource.Text = content; lblFilePath.Text = "文件路径:" + SourceFilePath; lblFileEncoding.Text = "文件编码:" + encodingName; return(true); }
public static void Test() { IdentifyEncoding sinodetector; string result = null; sinodetector = new IdentifyEncoding(); try { result = sinodetector.GetEncodingName(new System.Uri("http://china5.nikkeibp.co.jp/china/news/com/200307/pr_com200307170131.html")); } catch (System.Exception e) { Console.Error.WriteLine("Bad URL " + e.ToString()); } Console.Write(result); Console.Read(); }
public string GetString(string encodingName, byte[] bytes, int count) { if (string.IsNullOrWhiteSpace(encodingName)) { try { string autoCheckName = _identifyEncoding.GetEncodingName(IdentifyEncoding.ToSByteArray(bytes)); string text = Encoding.GetEncoding(autoCheckName).GetString(bytes, 0, count); return(text); } catch (Exception) { encodingName = "Hex"; } } if (encodingName == "Hex") { string text = BytesToHexString(bytes, count); return(text); } return(string.Empty); }
/// <summary> /// 解码 /// </summary> /// <param name="webResponse"></param> /// <returns></returns> public string DecodeData(HttpWebResponse webResponse) { if (MemoryStream == null || MemoryStream.Length == 0) { return(string.Empty); } if (webResponse != null && webResponse.StatusCode == HttpStatusCode.OK) { byte[] pageBytes = MemoryStream.ToBytes(); Encoding encoding = null; // html if (webResponse.ContentType.ToLower().Contains("text/css")) { //将流的可读位置设置到起始值 this.MemoryStream.Seek(0, SeekOrigin.Begin); //此处使用utf-8读取后,即使中文是乱码,也能正确读取到meta中的Content-Type,然后再使用正确的编码类型重读一次 string encode = Encoding.UTF8.GetString(this.MemoryStream.GetBuffer(), 0, (int)this.MemoryStream.Length); var mat = RegexLibrary.RegCssContentType.Match(encode); if (mat.Groups[2].Success) { encoding = Encoding.GetEncoding(mat.Groups[2].Value); } } else { // 基于火狐的统计学算法 encoding = this.GetEncodingByUniversalCharDet(pageBytes); IdentifyEncoding ide = new IdentifyEncoding(); sbyte[] mySByte = new sbyte[pageBytes.Length]; for (int i = 0; i < pageBytes.Length; i++) { if (pageBytes[i] > 127) { mySByte[i] = (sbyte)(pageBytes[i] - 256); } else { mySByte[i] = (sbyte)pageBytes[i]; } } var tempOTHER = ide.GetEncodingString(mySByte); if (tempOTHER == "OTHER") { // headers meta BOM的查找方式 Encoding secondEncoding = this.GetStringUsingEncoding(webResponse, pageBytes); if (encoding != null && encoding.EncodingName != secondEncoding.EncodingName) { encoding = secondEncoding; } } else { encoding = Encoding.GetEncoding(tempOTHER); } } if (encoding == null) { encoding = Encoding.UTF8; } return(encoding.GetString(pageBytes)); } return(string.Empty); }
/// <summary> /// 文件写入到 Oracle Blob 字段中。type = 1 表示地图文件,其他值表示战斗文件 /// </summary> public static bool SaveFileToDB(int file_type, string DBKey, string FullFileName) { FileStream mapfs; try { mapfs = new FileStream(FullFileName, FileMode.Open, FileAccess.Read); } catch (Exception ex) { Logging.Write("SaveFileToDB error: " + ex.ToString()); return(false); } string sql; if (file_type == 1) { //sql = string.Format("select file_context from map_file where map_name = '{0}' for update", DBKey); sql = string.Format("update map_file set file_context = :1 where map_name = '{0}'", DBKey); } else { //sql = string.Format("select file_context from fight_file where roll_type = '{0}' for update", DBKey); sql = string.Format("update fight_file set file_context = :2 where roll_type = '{0}'", DBKey); } OracleTransaction transaction = null; try { if (!isConnected) { OraConnect(); } // 利用事务处理(必须) transaction = conn.BeginTransaction(); OracleCommand oCmd = new OracleCommand(sql, conn); OracleParameter param = oCmd.Parameters.Add("1", OracleDbType.Clob, ParameterDirection.Input); IdentifyEncoding sinodetector = new IdentifyEncoding(); FileInfo finfo = new FileInfo(FullFileName); StreamReader sr = new StreamReader(mapfs, sinodetector.GetEncoding(sinodetector.GetEncodingName(finfo))); string context = sr.ReadToEnd(); param.Value = context; oCmd.ExecuteNonQuery(); transaction.Commit(); sr.Close(); mapfs.Close(); } catch (Exception ex) { transaction.Rollback(); Logging.Write("SaveFileToDB error: " + ex.ToString()); return(false); } return(true); }
static void Main(string[] args) { // 测试例子类. ConvertSample sample = new ConvertSample(); // 首先写一个 UTF-8 文件. sample.TestWrite( "test_UTF8.txt", Encoding.UTF8, @"测试文件 编码转换! 本测试过程,为模拟 UTF-8 编码的文件, 转换为 其他编码格式的文件! 注意: 本代码仅仅为 演示的例子。 算法为一次把整个文件内容读取进内存,然后再做转换。 如果文件超大的话,会出问题。 实际处理中,可以修改为 同时 打开源文件 与 目标文件。 源文件读取一行, 目标文件写入一行。 的方式来处理! "); // 读取前面的 UTF-8 文件中的内容. string text = sample.TestRead("test_UTF8.txt", Encoding.UTF8); // 将前面读取到的信息,写入到 新的编码格式下. sample.TestWrite( "test_Unicode.txt", Encoding.Unicode, text); // 读取 text = sample.TestRead("test_Unicode.txt", Encoding.Unicode); // 写入. sample.TestWrite( "test_GB2312.txt", Encoding.GetEncoding("GB2312"), text); // 读取 text = sample.TestRead("test_GB2312.txt", Encoding.GetEncoding("GB2312")); // 在一个 方法里面作转换处理. // 读取一行、写入一行. sample.FileEncodingConvert( "test_UTF8.txt", "test_Default.txt", Encoding.UTF8, Encoding.Default); IdentifyEncoding test = new IdentifyEncoding(); Console.WriteLine("test_UTF8.txt 文件编码为:{0}", test.GetEncodingString(new System.IO.FileInfo("test_UTF8.txt")) ); Console.WriteLine("test_Unicode.txt 文件编码为:{0}", test.GetEncodingString(new System.IO.FileInfo("test_Unicode.txt")) ); Console.WriteLine("test_GB2312.txt 文件编码为:{0}", test.GetEncodingString(new System.IO.FileInfo("test_GB2312.txt")) ); Console.WriteLine("test_Default.txt 文件编码为:{0}", test.GetEncodingString(new System.IO.FileInfo("test_Default.txt")) ); Console.WriteLine("text\\GB2312.txt 文件编码为:{0}", test.GetEncodingString(new System.IO.FileInfo("text\\GB2312.txt")) ); Console.WriteLine("text\\Unicode.txt 文件编码为:{0}", test.GetEncodingString(new System.IO.FileInfo("text\\Unicode.txt")) ); Console.WriteLine("text\\UTF8.txt 文件编码为:{0}", test.GetEncodingString(new System.IO.FileInfo("text\\UTF8.txt")) ); Console.ReadLine(); }