/// <summary> /// Uses parallel processing to perform OCR on the mailing address region of the pdf files. /// Calls other methods to update the pdf files with that OCR information and builds a /// dictionary for file paths with associated page counts. /// </summary> /// <param name="inputFiles">List of files to be processed.</param> /// <param name="currentFolder">Active input directory.</param> /// <returns>Dictionary of file paths and associated page counts.</returns> internal static Dictionary <string, int> Process(List <string> inputFiles, KeyValuePair <string, string> currentFolder) { var filePageCounts = new ConcurrentDictionary <string, int>(); SetupOcrWorkingDirectory(); Parallel.ForEach(inputFiles, file => { string returnedText = null; using (var document = new PDFDocument(file)) { filePageCounts.TryAdd(file.ToString(), document.Pages.Count); using (IOcrEngine ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Advantage, false)) { ocrEngine.Startup(null, null, ocrWorkingDir, ocrAdvantageRuntimeDir); ocrEngine.SpellCheckManager.SpellCheckEngine = OcrSpellCheckEngine.None; returnedText = GetAddressBlockText(ocrEngine, document); ocrEngine.Shutdown(); } } PdfUtility.OverlayOcrText(returnedText, file, currentFolder); } ); var returnDictionary = filePageCounts.ToDictionary(kvp => kvp.Key, kvp => kvp.Value); return(returnDictionary); }