コード例 #1
0
        /*
         * 将文件转换为文本
         */
        private void FileConvert(object thread_payload)
        {
            //获取参数
            string[]      param        = (string[])thread_payload;
            int           taskNo       = int.Parse(param[0].ToString());
            DirectoryInfo sourceFolder = new DirectoryInfo(param[1]);
            DirectoryInfo textFolder   = new DirectoryInfo(param[2]);

            //判断源文件夹和目标文件夹是否存在
            if (!sourceFolder.Exists || !textFolder.Exists)
            {
                runningEnv.UIContext.BeginInvoke(new Action(() =>
                {
                    MessageBox.Show(runningEnv.UIContext, "数据损坏,任务失败!", "错误");
                }));
                return;
            }

            //获取源文件夹中的待转换文件
            FileInfo[] fileInfo = sourceFolder.GetFiles();

            //按本线程所分配的任务进行转换
            int addNumo = int.Parse(runningEnv.SettingData.ConvertThreadCnt.ToString());

            for (int FileInfoNo = taskNo; FileInfoNo < fileInfo.Length; FileInfoNo += addNumo)
            {
                FileInfo NextFile = fileInfo[FileInfoNo];

                string path      = sourceFolder.FullName + "\\" + NextFile.Name;
                string dist_path = textFolder.FullName + "\\" + NextFile.Name + ".txt";

                //文件已经被转换则忽略该文件
                if (File.Exists(dist_path))
                {
                    continue;
                }

                ConverterFactory converterFactory = new ConverterFactory();

                try
                {
                    //获取文件后缀
                    string file_type = path.ToLower().Split('.')[Math.Max(path.ToLower().Split('.').Length - 1, 0)];
                    //获取转换器
                    ConvertCore file_converter = converterFactory.GetConverter(file_type, runningEnv);
                    string      text           = "";
                    if (file_converter != null)//如果可以获得到转换器
                    {
                        //获取文本
                        text = file_converter.ConvertToString(path);
                        if (text.Length > RunningEnv.ProgramParam.MaxWords)
                        {
                            //舍弃过长的部分
                            text = text.Substring(0, RunningEnv.ProgramParam.MaxWords);
                        }
                        //写入目标路径
                        File.WriteAllText(dist_path, text, Encoding.GetEncoding("GBK"));
                    }
                    else
                    {
                        //没有获取到转换器则忽略
                        continue;
                    }

                    //如果转换后的文件不符合所设阈值则删除
                    if (new FileInfo(dist_path).Length <= runningEnv.CheckData.MinBytes || text.Length <= runningEnv.CheckData.MinWords)
                    {
                        if (File.Exists(dist_path))
                        {
                            File.Delete(dist_path);
                        }
                        throw new Exception();//并爆出一个异常
                    }
                }
                catch (Exception e)
                {
                    runningEnv.CheckingData.ErrorPaperList.AddLast(NextFile.Name);
                }
            }
        }
コード例 #2
0
        /*
         * 将文件转换为文本
         */
        private void FileConvert(object thread_payload)
        {
            //获取参数
            string[]      param        = (string[])thread_payload;
            int           taskNo       = int.Parse(param[0].ToString());
            DirectoryInfo sourceFolder = new DirectoryInfo(param[1]);
            DirectoryInfo textFolder   = new DirectoryInfo(param[2]);

            //判断源文件夹和目标文件夹是否存在
            if (!sourceFolder.Exists || !textFolder.Exists)
            {
                runningEnv.UIContext.BeginInvoke(new Action(() =>
                {
                    MessageBox.Show(runningEnv.UIContext, "数据损坏,任务失败!", "错误");
                }));
                return;
            }

            //获取源文件夹中的待转换文件
            FileInfo[] fileInfo = Utils.GetFileInfoRecursion(sourceFolder).ToArray();

            //按本线程所分配的任务进行转换
            int addNumo = int.Parse(runningEnv.SettingData.ConvertThreadCnt.ToString());

            for (int FileInfoNo = taskNo; FileInfoNo < fileInfo.Length; FileInfoNo += addNumo)
            {
                FileInfo NextFile = fileInfo[FileInfoNo];

                string path = NextFile.FullName;
                string real_dis_file_name = Regex.Replace(NextFile.Name, @"[^\u4e00-\u9fa5\u0022\《\》\(\)\—\;\,\。\“\”\!\#\\_\-\.\,\:\(\)\'\[\]\【\】\+\·\:\<\>\w]", string.Empty);
                foreach (char rInvalidChar in Path.GetInvalidFileNameChars())
                {
                    real_dis_file_name = real_dis_file_name.Replace(rInvalidChar.ToString(), string.Empty);
                }
                string dist_path = textFolder.FullName + Path.DirectorySeparatorChar + real_dis_file_name + ".txt";

                //文件已经被转换则忽略该文件
                if (File.Exists(dist_path))
                {
                    continue;
                }

                ConverterFactory converterFactory = new ConverterFactory();

                try
                {
                    //获取文件后缀
                    string file_type = path.ToLower().Split('.')[Math.Max(path.ToLower().Split('.').Length - 1, 0)];
                    //获取转换器
                    ConvertCore file_converter = converterFactory.GetConverter(file_type, runningEnv);
                    string      text           = "";
                    if (file_converter != null)//如果可以获得到转换器
                    {
                        //获取文本
                        text = file_converter.ConvertToString(path, runningEnv.CheckData.Blocklist);
                        if (text != null && text.Length > 0)
                        {
                            if (text.Length > RunningEnv.ProgramParam.MaxWords)
                            {
                                //舍弃过长的部分
                                text = text.Substring(0, RunningEnv.ProgramParam.MaxWords);
                            }
                            //写入目标路径
                            File.WriteAllText(dist_path, text, Encoding.GetEncoding("GBK"));
                        }
                    }
                    else
                    {
                        //没有获取到转换器则忽略
                        continue;
                    }

                    //如果转换后的文件不符合所设阈值则删除
                    if (new FileInfo(dist_path).Length <= runningEnv.CheckData.MinBytes || text.Length <= runningEnv.CheckData.MinWords)
                    {
                        if (File.Exists(dist_path))
                        {
                            File.Delete(dist_path);
                        }
                        //并抛出一个异常
                        throw new Exception();
                    }
                }
                catch (Exception e)
                {
                    runningEnv.CheckingData.ErrorPaperList.AddLast(NextFile.Name);
                }
            }
        }