Exemple #1
0
        /// <summary>
        /// 获取源文件的编码格式
        /// </summary>
        /// <param name="path"></param>
        /// <returns></returns>
        private static MyEncodingType GetEncode(string path)
        {
            MyEncodingType type = MyEncodingType.ANSI_Default;

            // 获取源文本的编码方式
            using (FileStream fs = new FileStream(path, FileMode.Open, FileAccess.Read))
            {
                BinaryReader reader = new BinaryReader(fs);     //这里选择任何一种编码都没关系,都是根据实际文本编码读取二进制
                byte[]       bytes  = reader.ReadBytes((int)fs.Length);
                if (IsASCII(bytes))
                {
                    type = MyEncodingType.ASCII_UTF8withoutBom; // 将ACSII 都看作UTF8 或 ASCII 都可以
                }
                else if (IsUtf8WithoutBom(bytes))
                {
                    if (IsUtf8WithBom(bytes))
                    {
                        type = MyEncodingType.Utf8withBom;
                    }
                    else
                    {
                        type = MyEncodingType.Utf8withoutBom;
                    }
                }
                else if (IsUtf16Big(bytes))
                {
                    type = MyEncodingType.Utf16Big;
                }
                else if (IsUtf16Little(bytes))
                {
                    type = MyEncodingType.Utf16Little;
                }
                else
                {
                    Console.WriteLine("当前编码方式:代码页:" + Encoding.Default.CodePage);
                    type = MyEncodingType.ANSI_Default;
                }
            }
            return(type);
        }
Exemple #2
0
        /// <summary>
        /// 转化文本的编码
        ///     获取了中间的string字符串
        /// </summary>
        /// <param name="path"></param>
        /// <param name="targetEncoding"></param>
        private static void SwitchEncoding(string path, Encoding targetEncoding)
        {
            // 获取编码
            MyEncodingType myEncodingType = GetEncode(path);
            // 获取源文本
            string sourceStr = String.Empty;

            using (FileStream fs = new FileStream(path, FileMode.Open, FileAccess.Read))
            {
                // 获取原文本
                using (BinaryReader rd = new BinaryReader(fs, Encoding.UTF8)) // 这里的编码并没有什么作用,只是为了第三个参数,读取后不关闭文件流
                {
                    // 获取解码器
                    Decoder de = null;
                    switch (myEncodingType)
                    {
                    case MyEncodingType.ASCII_UTF8withoutBom:     // 源文本全为英文,也采用UTF8来解码
                        de = new UTF8Encoding(false).GetDecoder();
                        break;

                    case MyEncodingType.ANSI_Default:
                        if (targetEncoding == Encoding.ASCII)
                        {
                            throw new Exception("当前编码,无法转为ASCII码");
                        }
                        de = Encoding.Default.GetDecoder();
                        break;

                    case MyEncodingType.Utf8withBom:
                        if (targetEncoding == Encoding.ASCII)
                        {
                            throw new Exception("当前编码,无法转为ASCII码");
                        }
                        de = new UTF8Encoding(true).GetDecoder();
                        rd.ReadBytes(3);
                        break;

                    case MyEncodingType.Utf8withoutBom:
                        if (targetEncoding == Encoding.ASCII)
                        {
                            throw new Exception("当前编码,无法转为ASCII码");
                        }
                        de = new UTF8Encoding(false).GetDecoder();
                        break;

                    case MyEncodingType.Utf16Big:
                        if (targetEncoding == Encoding.ASCII)
                        {
                            throw new Exception("当前编码,无法转为ASCII码");
                        }
                        de = Encoding.BigEndianUnicode.GetDecoder();
                        rd.ReadBytes(2);
                        break;

                    case MyEncodingType.Utf16Little:
                        if (targetEncoding == Encoding.ASCII)
                        {
                            throw new Exception("当前编码,无法转为ASCII码");
                        }
                        de = Encoding.Unicode.GetDecoder();
                        rd.ReadBytes(2);
                        break;

                    default:
                        break;
                    }
                    byte[] readBytes = rd.ReadBytes((int)rd.BaseStream.Length); // 此时 rd.BaseStream.Length 大于剩下的 byte数组数量,但是没关系,只会读取剩下所有的
                    char[] chars     = new char[de.GetCharCount(readBytes, 0, readBytes.Length)];
                    de.GetChars(readBytes, 0, readBytes.Length, chars, 0);
                    sourceStr = new string(chars);
                }
            }

            //使用新的编码写入文本
            using (FileStream fs = new FileStream(path, FileMode.Create, FileAccess.Write))
            {
                using (BinaryWriter writer = new BinaryWriter(fs, targetEncoding))
                {
                    // 转换成目标编码的字节数组
                    byte[] targetBytes = targetEncoding.GetBytes(sourceStr);
                    byte[] preamble    = targetEncoding.GetPreamble();
                    //若有BOM头,则添上Bom头
                    if (preamble.Length > 0)
                    {
                        byte[] finBytes = new byte[targetBytes.Length + preamble.Length];
                        Buffer.BlockCopy(preamble, 0, finBytes, 0, preamble.Length);
                        Buffer.BlockCopy(targetBytes, 0, finBytes, preamble.Length, targetBytes.Length);
                        targetBytes = finBytes;
                    }
                    writer.Write(targetBytes);
                }
            }
        }
Exemple #3
0
        /// <summary>
        /// 转化文本的编码
        ///     不需要中间的string字符串
        /// </summary>
        /// <param name="path"></param>
        /// <param name="targetEncoding"></param>
        private static void ConvertEncoding(string path, Encoding targetEncoding)
        {
            // 获取编码
            MyEncodingType myEncodingType = GetEncode(path);
            Encoding       srcEncoding    = null;

            byte[] tarBytes;
            using (FileStream fs = new FileStream(path, FileMode.Open, FileAccess.Read))
            {
                // 获取原文本
                using (BinaryReader rd = new BinaryReader(fs, Encoding.Default)) // 这里的编码并没有什么作用,只是为了第三个参数,读取后不关闭文件流
                {
                    switch (myEncodingType)
                    {
                    case MyEncodingType.ASCII_UTF8withoutBom:
                        srcEncoding = new UTF8Encoding(false);
                        break;

                    case MyEncodingType.ANSI_Default:
                        srcEncoding = Encoding.Default;
                        break;

                    case MyEncodingType.Utf8withBom:
                        if (targetEncoding == Encoding.ASCII)
                        {
                            throw new Exception("当前编码,无法转为ASCII码");
                        }
                        srcEncoding = Encoding.UTF8;
                        rd.ReadBytes(3);
                        break;

                    case MyEncodingType.Utf8withoutBom:
                        if (targetEncoding == Encoding.ASCII)
                        {
                            throw new Exception("当前编码,无法转为ASCII码");
                        }
                        srcEncoding = new UTF8Encoding(false);
                        break;

                    case MyEncodingType.Utf16Big:
                        if (targetEncoding == Encoding.ASCII)
                        {
                            throw new Exception("当前编码,无法转为ASCII码");
                        }
                        srcEncoding = Encoding.BigEndianUnicode;
                        rd.ReadBytes(2);
                        break;

                    case MyEncodingType.Utf16Little:
                        if (targetEncoding == Encoding.ASCII)
                        {
                            throw new Exception("当前编码,无法转为ASCII码");
                        }
                        srcEncoding = Encoding.Unicode;
                        rd.ReadBytes(2);
                        break;
                    }
                    byte[] srcBytes = rd.ReadBytes((int)rd.BaseStream.Length);
                    tarBytes = Encoding.Convert(srcEncoding, targetEncoding, srcBytes);
                }
            }

            //使用新的编码写入文本
            using (FileStream fs = new FileStream(path, FileMode.Create, FileAccess.Write))
            {
                using (BinaryWriter writer = new BinaryWriter(fs, targetEncoding))
                {
                    // 转换成目标编码的字节数组
                    byte[] preamble = targetEncoding.GetPreamble();
                    //若有BOM头,则添上Bom头
                    if (preamble.Length > 0)
                    {
                        byte[] finBytes = new byte[tarBytes.Length + preamble.Length];
                        Buffer.BlockCopy(preamble, 0, finBytes, 0, preamble.Length);
                        Buffer.BlockCopy(tarBytes, 0, finBytes, preamble.Length, tarBytes.Length);
                        tarBytes = finBytes;
                    }
                    writer.Write(tarBytes);
                }
            }
        }