private static IOcrEngine CreateEngine(OcrEngineType engineType, byte[] documentWriterOptions, bool useThunk) { IOcrEngine ocrEngine = OcrEngineManager.CreateEngine(engineType, useThunk); ocrEngine.Startup(null, null, null, null); using (MemoryStream ms = new MemoryStream(documentWriterOptions)) { ocrEngine.DocumentWriterInstance.LoadOptions(ms); } RasterCodecs codecs = ocrEngine.RasterCodecsInstance; // Use the new RasterizeDocumentOptions to default loading document files at 300 DPI codecs.Options.RasterizeDocument.Load.XResolution = 300; codecs.Options.RasterizeDocument.Load.YResolution = 300; codecs.Options.Pdf.Load.EnableInterpolate = true; codecs.Options.Load.AutoFixImageResolution = true; // We fine-tuned our app to only run a certain number of threads. The OCR engine can spawn it is own // threads to increase the performance of a single recognition process, however, we do not want this // in this scenario, these extra threads will decrease the overall performance of our app and we will // not be able to keep track of how many threads are actually running IOcrSettingManager settingManager = ocrEngine.SettingManager; // Disable multi-threaded recognition if (settingManager.IsSettingNameSupported("Recognition.Threading.MaximumThreads")) { settingManager.SetIntegerValue("Recognition.Threading.MaximumThreads", 1); } // Disable multi-threaded auto-zoning if (settingManager.IsSettingNameSupported("Recognition.Zoning.DisableMultiThreading")) { settingManager.SetBooleanValue("Recognition.Zoning.DisableMultiThreading", true); } return(ocrEngine); }
private void ThreadProc(object stateInfo) { WorkItemData data = (WorkItemData)stateInfo; IOcrEngine ocrEngine = null; bool passedCriticalStage = false; try { // See if we have canceled lock (_abortedLockObject) { if (_aborted) { return; } } string destinationFile = Path.Combine(data.DestinationDirectory, Path.GetFileName(data.SourceFile)); ocrEngine = data.OcrEngine; lock (_abortedLockObject) { if (_aborted) { return; } } // Convert this image file to a document string extension = DocumentWriter.GetFormatFileExtension(data.Format); destinationFile = string.Concat(destinationFile, ".", extension); if (data.Format == DocumentFormat.Ltd && File.Exists(destinationFile)) { File.Delete(destinationFile); } string sourceFile = Path.GetFileName(data.SourceFile); try { // Create a document and add the pages using (IOcrDocument ocrDocument = ocrEngine.DocumentManager.CreateDocument(null, OcrCreateDocumentOptions.AutoDeleteFile)) { // Get the image number of pages int imagePageCount; RasterCodecs codecs = ocrDocument.RasterCodecsInstance; using (CodecsImageInfo imageInfo = codecs.GetInformation(data.SourceFile, true)) { long maximumMemorySize = 42187; IOcrSettingManager settingManager = ocrEngine.SettingManager; // Get the maximum size of the bitmap from the setting if (settingManager.IsSettingNameSupported("Recognition.MaximumPageConventionalMemorySize")) { int maximumConventionalMemorySize = settingManager.GetIntegerValue("Recognition.MaximumPageConventionalMemorySize"); maximumMemorySize = (long)maximumConventionalMemorySize * 1024; } SetRecommendedLoadingOptions(codecs, imageInfo, maximumMemorySize); imagePageCount = imageInfo.TotalPages; } // Set the DocumentWriter options using (MemoryStream ms = new MemoryStream(data.DocumentWriterOptions)) { ocrDocument.DocumentWriterInstance.LoadOptions(ms); } passedCriticalStage = true; //recognize and add pages for (int pageNumber = 1; pageNumber <= imagePageCount; pageNumber++) { lock (_abortedLockObject) { if (_aborted) { return; } } var image = codecs.Load(data.SourceFile, pageNumber); using (var ocrPage = ocrEngine.CreatePage(image, OcrImageSharingMode.AutoDispose)) { ocrPage.Recognize(null); ocrDocument.Pages.Add(ocrPage); } } // Save ocrDocument.Save(destinationFile, data.Format, null); } } finally { } OnSuccess(destinationFile); } catch (Exception ex) { string message; if (passedCriticalStage && data.FirstTry) { message = string.Format("Error '{0}' while converting file '{1}' (first time, quarantined)", ex.Message, data.SourceFile); AddToQuarantine(data.SourceFile); } else if (passedCriticalStage && !data.FirstTry) { message = string.Format("Error '{0}' while converting file '{1}' (quarantined error)", ex.Message, data.SourceFile); } else { message = string.Format("Error '{0}' while converting file '{1}'", ex.Message, data.SourceFile); } OnError(message); } finally { if (ocrEngine != null && ocrEngine != data.OcrEngine) { ocrEngine.Dispose(); } if (Interlocked.Decrement(ref _workItemCount) == 0) { _batchFinishedEvent.Set(); } } }