/* * 将文件转换为文本 */ private void FileConvert(object thread_payload) { //获取参数 string[] param = (string[])thread_payload; int taskNo = int.Parse(param[0].ToString()); DirectoryInfo sourceFolder = new DirectoryInfo(param[1]); DirectoryInfo textFolder = new DirectoryInfo(param[2]); //判断源文件夹和目标文件夹是否存在 if (!sourceFolder.Exists || !textFolder.Exists) { runningEnv.UIContext.BeginInvoke(new Action(() => { MessageBox.Show(runningEnv.UIContext, "数据损坏,任务失败!", "错误"); })); return; } //获取源文件夹中的待转换文件 FileInfo[] fileInfo = sourceFolder.GetFiles(); //按本线程所分配的任务进行转换 int addNumo = int.Parse(runningEnv.SettingData.ConvertThreadCnt.ToString()); for (int FileInfoNo = taskNo; FileInfoNo < fileInfo.Length; FileInfoNo += addNumo) { FileInfo NextFile = fileInfo[FileInfoNo]; string path = sourceFolder.FullName + "\\" + NextFile.Name; string dist_path = textFolder.FullName + "\\" + NextFile.Name + ".txt"; //文件已经被转换则忽略该文件 if (File.Exists(dist_path)) { continue; } ConverterFactory converterFactory = new ConverterFactory(); try { //获取文件后缀 string file_type = path.ToLower().Split('.')[Math.Max(path.ToLower().Split('.').Length - 1, 0)]; //获取转换器 ConvertCore file_converter = converterFactory.GetConverter(file_type, runningEnv); string text = ""; if (file_converter != null)//如果可以获得到转换器 { //获取文本 text = file_converter.ConvertToString(path); if (text.Length > RunningEnv.ProgramParam.MaxWords) { //舍弃过长的部分 text = text.Substring(0, RunningEnv.ProgramParam.MaxWords); } //写入目标路径 File.WriteAllText(dist_path, text, Encoding.GetEncoding("GBK")); } else { //没有获取到转换器则忽略 continue; } //如果转换后的文件不符合所设阈值则删除 if (new FileInfo(dist_path).Length <= runningEnv.CheckData.MinBytes || text.Length <= runningEnv.CheckData.MinWords) { if (File.Exists(dist_path)) { File.Delete(dist_path); } throw new Exception();//并爆出一个异常 } } catch (Exception e) { runningEnv.CheckingData.ErrorPaperList.AddLast(NextFile.Name); } } }
/* * 将文件转换为文本 */ private void FileConvert(object thread_payload) { //获取参数 string[] param = (string[])thread_payload; int taskNo = int.Parse(param[0].ToString()); DirectoryInfo sourceFolder = new DirectoryInfo(param[1]); DirectoryInfo textFolder = new DirectoryInfo(param[2]); //判断源文件夹和目标文件夹是否存在 if (!sourceFolder.Exists || !textFolder.Exists) { runningEnv.UIContext.BeginInvoke(new Action(() => { MessageBox.Show(runningEnv.UIContext, "数据损坏,任务失败!", "错误"); })); return; } //获取源文件夹中的待转换文件 FileInfo[] fileInfo = Utils.GetFileInfoRecursion(sourceFolder).ToArray(); //按本线程所分配的任务进行转换 int addNumo = int.Parse(runningEnv.SettingData.ConvertThreadCnt.ToString()); for (int FileInfoNo = taskNo; FileInfoNo < fileInfo.Length; FileInfoNo += addNumo) { FileInfo NextFile = fileInfo[FileInfoNo]; string path = NextFile.FullName; string real_dis_file_name = Regex.Replace(NextFile.Name, @"[^\u4e00-\u9fa5\u0022\《\》\(\)\—\;\,\。\“\”\!\#\\_\-\.\,\:\(\)\'\[\]\【\】\+\·\:\<\>\w]", string.Empty); foreach (char rInvalidChar in Path.GetInvalidFileNameChars()) { real_dis_file_name = real_dis_file_name.Replace(rInvalidChar.ToString(), string.Empty); } string dist_path = textFolder.FullName + Path.DirectorySeparatorChar + real_dis_file_name + ".txt"; //文件已经被转换则忽略该文件 if (File.Exists(dist_path)) { continue; } ConverterFactory converterFactory = new ConverterFactory(); try { //获取文件后缀 string file_type = path.ToLower().Split('.')[Math.Max(path.ToLower().Split('.').Length - 1, 0)]; //获取转换器 ConvertCore file_converter = converterFactory.GetConverter(file_type, runningEnv); string text = ""; if (file_converter != null)//如果可以获得到转换器 { //获取文本 text = file_converter.ConvertToString(path, runningEnv.CheckData.Blocklist); if (text != null && text.Length > 0) { if (text.Length > RunningEnv.ProgramParam.MaxWords) { //舍弃过长的部分 text = text.Substring(0, RunningEnv.ProgramParam.MaxWords); } //写入目标路径 File.WriteAllText(dist_path, text, Encoding.GetEncoding("GBK")); } } else { //没有获取到转换器则忽略 continue; } //如果转换后的文件不符合所设阈值则删除 if (new FileInfo(dist_path).Length <= runningEnv.CheckData.MinBytes || text.Length <= runningEnv.CheckData.MinWords) { if (File.Exists(dist_path)) { File.Delete(dist_path); } //并抛出一个异常 throw new Exception(); } } catch (Exception e) { runningEnv.CheckingData.ErrorPaperList.AddLast(NextFile.Name); } } }