예제 #1
0
        private static IOcrEngine CreateEngine(OcrEngineType engineType, byte[] documentWriterOptions, bool useThunk)
        {
            IOcrEngine ocrEngine = OcrEngineManager.CreateEngine(engineType, useThunk);

            ocrEngine.Startup(null, null, null, null);

            using (MemoryStream ms = new MemoryStream(documentWriterOptions))
            {
                ocrEngine.DocumentWriterInstance.LoadOptions(ms);
            }

            RasterCodecs codecs = ocrEngine.RasterCodecsInstance;

            // Use the new RasterizeDocumentOptions to default loading document files at 300 DPI
            codecs.Options.RasterizeDocument.Load.XResolution = 300;
            codecs.Options.RasterizeDocument.Load.YResolution = 300;
            codecs.Options.Pdf.Load.EnableInterpolate         = true;
            codecs.Options.Load.AutoFixImageResolution        = true;

            // We fine-tuned our app to only run a certain number of threads. The OCR engine can spawn it is own
            // threads to increase the performance of a single recognition process, however, we do not want this
            // in this scenario, these extra threads will decrease the overall performance of our app and we will
            // not be able to keep track of how many threads are actually running

            IOcrSettingManager settingManager = ocrEngine.SettingManager;

            // Disable multi-threaded recognition
            if (settingManager.IsSettingNameSupported("Recognition.Threading.MaximumThreads"))
            {
                settingManager.SetIntegerValue("Recognition.Threading.MaximumThreads", 1);
            }

            // Disable multi-threaded auto-zoning
            if (settingManager.IsSettingNameSupported("Recognition.Zoning.DisableMultiThreading"))
            {
                settingManager.SetBooleanValue("Recognition.Zoning.DisableMultiThreading", true);
            }

            return(ocrEngine);
        }
예제 #2
0
        private void ThreadProc(object stateInfo)
        {
            WorkItemData data                = (WorkItemData)stateInfo;
            IOcrEngine   ocrEngine           = null;
            bool         passedCriticalStage = false;

            try
            {
                // See if we have canceled
                lock (_abortedLockObject)
                {
                    if (_aborted)
                    {
                        return;
                    }
                }

                string destinationFile = Path.Combine(data.DestinationDirectory, Path.GetFileName(data.SourceFile));

                ocrEngine = data.OcrEngine;

                lock (_abortedLockObject)
                {
                    if (_aborted)
                    {
                        return;
                    }
                }

                // Convert this image file to a document
                string extension = DocumentWriter.GetFormatFileExtension(data.Format);
                destinationFile = string.Concat(destinationFile, ".", extension);
                if (data.Format == DocumentFormat.Ltd && File.Exists(destinationFile))
                {
                    File.Delete(destinationFile);
                }

                string sourceFile = Path.GetFileName(data.SourceFile);

                try
                {
                    // Create a document and add the pages
                    using (IOcrDocument ocrDocument = ocrEngine.DocumentManager.CreateDocument(null, OcrCreateDocumentOptions.AutoDeleteFile))
                    {
                        // Get the image number of pages
                        int imagePageCount;

                        RasterCodecs codecs = ocrDocument.RasterCodecsInstance;

                        using (CodecsImageInfo imageInfo = codecs.GetInformation(data.SourceFile, true))
                        {
                            long maximumMemorySize            = 42187;
                            IOcrSettingManager settingManager = ocrEngine.SettingManager;

                            // Get the maximum size of the bitmap from the setting
                            if (settingManager.IsSettingNameSupported("Recognition.MaximumPageConventionalMemorySize"))
                            {
                                int maximumConventionalMemorySize = settingManager.GetIntegerValue("Recognition.MaximumPageConventionalMemorySize");
                                maximumMemorySize = (long)maximumConventionalMemorySize * 1024;
                            }

                            SetRecommendedLoadingOptions(codecs, imageInfo, maximumMemorySize);

                            imagePageCount = imageInfo.TotalPages;
                        }

                        // Set the DocumentWriter options
                        using (MemoryStream ms = new MemoryStream(data.DocumentWriterOptions))
                        {
                            ocrDocument.DocumentWriterInstance.LoadOptions(ms);
                        }

                        passedCriticalStage = true;

                        //recognize and add pages
                        for (int pageNumber = 1; pageNumber <= imagePageCount; pageNumber++)
                        {
                            lock (_abortedLockObject)
                            {
                                if (_aborted)
                                {
                                    return;
                                }
                            }

                            var image = codecs.Load(data.SourceFile, pageNumber);

                            using (var ocrPage = ocrEngine.CreatePage(image, OcrImageSharingMode.AutoDispose))
                            {
                                ocrPage.Recognize(null);
                                ocrDocument.Pages.Add(ocrPage);
                            }
                        }

                        // Save
                        ocrDocument.Save(destinationFile, data.Format, null);
                    }
                }
                finally
                {
                }

                OnSuccess(destinationFile);
            }
            catch (Exception ex)
            {
                string message;

                if (passedCriticalStage && data.FirstTry)
                {
                    message = string.Format("Error '{0}' while converting file '{1}' (first time, quarantined)", ex.Message, data.SourceFile);
                    AddToQuarantine(data.SourceFile);
                }
                else if (passedCriticalStage && !data.FirstTry)
                {
                    message = string.Format("Error '{0}' while converting file '{1}' (quarantined error)", ex.Message, data.SourceFile);
                }
                else
                {
                    message = string.Format("Error '{0}' while converting file '{1}'", ex.Message, data.SourceFile);
                }

                OnError(message);
            }
            finally
            {
                if (ocrEngine != null && ocrEngine != data.OcrEngine)
                {
                    ocrEngine.Dispose();
                }

                if (Interlocked.Decrement(ref _workItemCount) == 0)
                {
                    _batchFinishedEvent.Set();
                }
            }
        }