/// <summary> /// Uses parallel processing to perform OCR on the mailing address region of the pdf files. /// Calls other methods to update the pdf files with that OCR information and builds a /// dictionary for file paths with associated page counts. /// </summary> /// <param name="inputFiles">List of files to be processed.</param> /// <param name="currentFolder">Active input directory.</param> /// <returns>Dictionary of file paths and associated page counts.</returns> internal static Dictionary <string, int> Process(IEnumerable <string> inputFiles, KeyValuePair <string, string> currentFolder) { var filePageCounts = new ConcurrentDictionary <string, int>(); SetupOcrWorkingDirectory(); Parallel.ForEach(inputFiles, file => { string returnedText; using (var document = new PDFDocument(file)) { filePageCounts.TryAdd(file.ToString(), document.Pages.Count); using (var ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Advantage, false)) { ocrEngine.Startup(null, null, OcrWorkingDir, OcrAdvantageRuntimeDir); ocrEngine.SpellCheckManager.SpellCheckEngine = OcrSpellCheckEngine.None; returnedText = GetAddressBlockText(ocrEngine, document); ocrEngine.Shutdown(); } } PdfUtility.OverlayOcrText(returnedText, file, currentFolder); } ); var returnDictionary = filePageCounts.ToDictionary(kvp => kvp.Key, kvp => kvp.Value); return(returnDictionary); }
/// <summary> /// IHeart OCR and Merge Utility created by: Michael Quinton /// /// Takes the groups of input PDF files and performs OCR on the /// mailing address. OCR output is overlaid with a first page marking /// onto the files, then all files are merged in page count order. The /// resulting file is then moved to the appropriate drop folder. /// </summary> private static void Main() { Console.WriteLine("IHeart Media OCR and Merge Utility"); if (!CheckForInputFiles()) { return; } SetLeadtoolsLicense(); foreach (var currentFolder in Constants.InputFolders) { Constants.InputDirectory.DeleteAllContents(); GetInputFiles(currentFolder); if (Constants.InputDirectory.IsEmpty()) { continue; } LoadInputFileList(); _filePageCounts = LeadToolsOcr.Process(InputFiles, currentFolder); PdfUtility.MergeAscendingPageCount(currentFolder, _filePageCounts); InputFiles.Clear(); _filePageCounts.Clear(); GC.Collect(); GC.WaitForPendingFinalizers(); } Constants.InputDirectory.DeleteAllContents(); ArchiveInputFiles(); DeleteOldErrorLogs(); }