private void backgroundWorkerConvertPdf_DoWork(object sender, DoWorkEventArgs e) { ArrayList args = (ArrayList)e.Argument; string inputFile = (string)args[0]; string targetFile = (string)args[1]; string outputTiffFile = PdfUtilities.ConvertPdf2Tiff(inputFile); File.Delete(targetFile); File.Move(outputTiffFile, targetFile); e.Result = targetFile; }
private void backgroundWorkerConvertPdf_DoWork(object sender, DoWorkEventArgs e) { string[] inputFiles = (string[])e.Argument; foreach (string inputFile in inputFiles) { string outputTiffFile = PdfUtilities.ConvertPdf2Tiff(inputFile); string targetFile = Path.Combine(Path.GetDirectoryName(inputFile), Path.GetFileNameWithoutExtension(inputFile) + ".tif"); File.Delete(targetFile); File.Move(outputTiffFile, targetFile); e.Result = targetFile; } }
/// <summary> /// Performs OCR for bulk/batch and console operations. /// </summary> /// <param name="imageFile">Image file</param> /// <param name="outputFile">Output file without extension</param> /// <param name="langCode">language code</param> /// <param name="pageSegMode">page segmentation mode</param> /// <param name="outputFormat">format of output file. Possible values: <code>text</code>, <code>text+</code> (with post-corrections), <code>hocr</code></param> /// <param name="deskew">deskew</param> public static void PerformOCR(string imageFile, string outputFile, string langCode, string pageSegMode, string outputFormat, bool deskew) { DirectoryInfo dir = Directory.GetParent(outputFile); if (dir != null && !dir.Exists) { dir.Create(); } bool postprocess = "text+" == outputFormat; OCR <Image> ocrEngine = new OCRImages(); ocrEngine.PageSegMode = pageSegMode; ocrEngine.Language = langCode; ocrEngine.OutputFormat = outputFormat.Replace("+", string.Empty); ocrEngine.OutputFile = outputFile; ocrEngine.Deskew = deskew; // convert PDF to TIFF if (imageFile.ToLower().EndsWith(".pdf")) { imageFile = PdfUtilities.ConvertPdf2Tiff(imageFile); } ocrEngine.ProcessFile(imageFile); // post-corrections for text+ output if (postprocess) { string filename = outputFile + ".txt"; string result = File.ReadAllText(filename); // postprocess to correct common OCR errors result = Processor.PostProcess(result, langCode); // correct letter cases result = TextUtilities.CorrectLetterCases(result); using (StreamWriter sw = new StreamWriter(filename, false, new System.Text.UTF8Encoding())) { sw.Write(result); } } }