예제 #1
0
        /// <summary>
        /// Uses parallel processing to perform OCR on the mailing address region of the pdf files.
        /// Calls other methods to update the pdf files with that OCR information and builds a
        /// dictionary for file paths with associated page counts.
        /// </summary>
        /// <param name="inputFiles">List of files to be processed.</param>
        /// <param name="currentFolder">Active input directory.</param>
        /// <returns>Dictionary of file paths and associated page counts.</returns>
        internal static Dictionary <string, int> Process(IEnumerable <string> inputFiles, KeyValuePair <string, string> currentFolder)
        {
            var filePageCounts = new ConcurrentDictionary <string, int>();

            SetupOcrWorkingDirectory();

            Parallel.ForEach(inputFiles, file =>
            {
                string returnedText;
                using (var document = new PDFDocument(file))
                {
                    filePageCounts.TryAdd(file.ToString(), document.Pages.Count);

                    using (var ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Advantage, false))
                    {
                        ocrEngine.Startup(null, null, OcrWorkingDir, OcrAdvantageRuntimeDir);
                        ocrEngine.SpellCheckManager.SpellCheckEngine = OcrSpellCheckEngine.None;
                        returnedText = GetAddressBlockText(ocrEngine, document);
                        ocrEngine.Shutdown();
                    }
                }
                PdfUtility.OverlayOcrText(returnedText, file, currentFolder);
            }
                             );
            var returnDictionary = filePageCounts.ToDictionary(kvp => kvp.Key,
                                                               kvp => kvp.Value);

            return(returnDictionary);
        }
예제 #2
0
        /// <summary>
        /// IHeart OCR and Merge Utility created by: Michael Quinton
        ///
        /// Takes the groups of input PDF files and performs OCR on the
        /// mailing address.  OCR output is overlaid with a first page marking
        /// onto the files, then all files are merged in page count order.  The
        /// resulting file is then moved to the appropriate drop folder.
        /// </summary>
        private static void Main()
        {
            Console.WriteLine("IHeart Media OCR and Merge Utility");
            if (!CheckForInputFiles())
            {
                return;
            }

            SetLeadtoolsLicense();

            foreach (var currentFolder in Constants.InputFolders)
            {
                Constants.InputDirectory.DeleteAllContents();
                GetInputFiles(currentFolder);

                if (Constants.InputDirectory.IsEmpty())
                {
                    continue;
                }

                LoadInputFileList();

                _filePageCounts = LeadToolsOcr.Process(InputFiles, currentFolder);

                PdfUtility.MergeAscendingPageCount(currentFolder, _filePageCounts);

                InputFiles.Clear();
                _filePageCounts.Clear();

                GC.Collect();
                GC.WaitForPendingFinalizers();
            }
            Constants.InputDirectory.DeleteAllContents();
            ArchiveInputFiles();
            DeleteOldErrorLogs();
        }