/// <summary> /// 获取源文件的编码格式 /// </summary> /// <param name="path"></param> /// <returns></returns> private static MyEncodingType GetEncode(string path) { MyEncodingType type = MyEncodingType.ANSI_Default; // 获取源文本的编码方式 using (FileStream fs = new FileStream(path, FileMode.Open, FileAccess.Read)) { BinaryReader reader = new BinaryReader(fs); //这里选择任何一种编码都没关系,都是根据实际文本编码读取二进制 byte[] bytes = reader.ReadBytes((int)fs.Length); if (IsASCII(bytes)) { type = MyEncodingType.ASCII_UTF8withoutBom; // 将ACSII 都看作UTF8 或 ASCII 都可以 } else if (IsUtf8WithoutBom(bytes)) { if (IsUtf8WithBom(bytes)) { type = MyEncodingType.Utf8withBom; } else { type = MyEncodingType.Utf8withoutBom; } } else if (IsUtf16Big(bytes)) { type = MyEncodingType.Utf16Big; } else if (IsUtf16Little(bytes)) { type = MyEncodingType.Utf16Little; } else { Console.WriteLine("当前编码方式:代码页:" + Encoding.Default.CodePage); type = MyEncodingType.ANSI_Default; } } return(type); }
/// <summary> /// 转化文本的编码 /// 获取了中间的string字符串 /// </summary> /// <param name="path"></param> /// <param name="targetEncoding"></param> private static void SwitchEncoding(string path, Encoding targetEncoding) { // 获取编码 MyEncodingType myEncodingType = GetEncode(path); // 获取源文本 string sourceStr = String.Empty; using (FileStream fs = new FileStream(path, FileMode.Open, FileAccess.Read)) { // 获取原文本 using (BinaryReader rd = new BinaryReader(fs, Encoding.UTF8)) // 这里的编码并没有什么作用,只是为了第三个参数,读取后不关闭文件流 { // 获取解码器 Decoder de = null; switch (myEncodingType) { case MyEncodingType.ASCII_UTF8withoutBom: // 源文本全为英文,也采用UTF8来解码 de = new UTF8Encoding(false).GetDecoder(); break; case MyEncodingType.ANSI_Default: if (targetEncoding == Encoding.ASCII) { throw new Exception("当前编码,无法转为ASCII码"); } de = Encoding.Default.GetDecoder(); break; case MyEncodingType.Utf8withBom: if (targetEncoding == Encoding.ASCII) { throw new Exception("当前编码,无法转为ASCII码"); } de = new UTF8Encoding(true).GetDecoder(); rd.ReadBytes(3); break; case MyEncodingType.Utf8withoutBom: if (targetEncoding == Encoding.ASCII) { throw new Exception("当前编码,无法转为ASCII码"); } de = new UTF8Encoding(false).GetDecoder(); break; case MyEncodingType.Utf16Big: if (targetEncoding == Encoding.ASCII) { throw new Exception("当前编码,无法转为ASCII码"); } de = Encoding.BigEndianUnicode.GetDecoder(); rd.ReadBytes(2); break; case MyEncodingType.Utf16Little: if (targetEncoding == Encoding.ASCII) { throw new Exception("当前编码,无法转为ASCII码"); } de = Encoding.Unicode.GetDecoder(); rd.ReadBytes(2); break; default: break; } byte[] readBytes = rd.ReadBytes((int)rd.BaseStream.Length); // 此时 rd.BaseStream.Length 大于剩下的 byte数组数量,但是没关系,只会读取剩下所有的 char[] chars = new char[de.GetCharCount(readBytes, 0, readBytes.Length)]; de.GetChars(readBytes, 0, readBytes.Length, chars, 0); sourceStr = new string(chars); } } //使用新的编码写入文本 using (FileStream fs = new FileStream(path, FileMode.Create, FileAccess.Write)) { using (BinaryWriter writer = new BinaryWriter(fs, targetEncoding)) { // 转换成目标编码的字节数组 byte[] targetBytes = targetEncoding.GetBytes(sourceStr); byte[] preamble = targetEncoding.GetPreamble(); //若有BOM头,则添上Bom头 if (preamble.Length > 0) { byte[] finBytes = new byte[targetBytes.Length + preamble.Length]; Buffer.BlockCopy(preamble, 0, finBytes, 0, preamble.Length); Buffer.BlockCopy(targetBytes, 0, finBytes, preamble.Length, targetBytes.Length); targetBytes = finBytes; } writer.Write(targetBytes); } } }
/// <summary> /// 转化文本的编码 /// 不需要中间的string字符串 /// </summary> /// <param name="path"></param> /// <param name="targetEncoding"></param> private static void ConvertEncoding(string path, Encoding targetEncoding) { // 获取编码 MyEncodingType myEncodingType = GetEncode(path); Encoding srcEncoding = null; byte[] tarBytes; using (FileStream fs = new FileStream(path, FileMode.Open, FileAccess.Read)) { // 获取原文本 using (BinaryReader rd = new BinaryReader(fs, Encoding.Default)) // 这里的编码并没有什么作用,只是为了第三个参数,读取后不关闭文件流 { switch (myEncodingType) { case MyEncodingType.ASCII_UTF8withoutBom: srcEncoding = new UTF8Encoding(false); break; case MyEncodingType.ANSI_Default: srcEncoding = Encoding.Default; break; case MyEncodingType.Utf8withBom: if (targetEncoding == Encoding.ASCII) { throw new Exception("当前编码,无法转为ASCII码"); } srcEncoding = Encoding.UTF8; rd.ReadBytes(3); break; case MyEncodingType.Utf8withoutBom: if (targetEncoding == Encoding.ASCII) { throw new Exception("当前编码,无法转为ASCII码"); } srcEncoding = new UTF8Encoding(false); break; case MyEncodingType.Utf16Big: if (targetEncoding == Encoding.ASCII) { throw new Exception("当前编码,无法转为ASCII码"); } srcEncoding = Encoding.BigEndianUnicode; rd.ReadBytes(2); break; case MyEncodingType.Utf16Little: if (targetEncoding == Encoding.ASCII) { throw new Exception("当前编码,无法转为ASCII码"); } srcEncoding = Encoding.Unicode; rd.ReadBytes(2); break; } byte[] srcBytes = rd.ReadBytes((int)rd.BaseStream.Length); tarBytes = Encoding.Convert(srcEncoding, targetEncoding, srcBytes); } } //使用新的编码写入文本 using (FileStream fs = new FileStream(path, FileMode.Create, FileAccess.Write)) { using (BinaryWriter writer = new BinaryWriter(fs, targetEncoding)) { // 转换成目标编码的字节数组 byte[] preamble = targetEncoding.GetPreamble(); //若有BOM头,则添上Bom头 if (preamble.Length > 0) { byte[] finBytes = new byte[tarBytes.Length + preamble.Length]; Buffer.BlockCopy(preamble, 0, finBytes, 0, preamble.Length); Buffer.BlockCopy(tarBytes, 0, finBytes, preamble.Length, tarBytes.Length); tarBytes = finBytes; } writer.Write(tarBytes); } } }