protected override void OnFormClosed(FormClosedEventArgs e) { // Clean up // Save the last setting Properties.Settings settings = new Properties.Settings(); if (_ocrEngine != null) { settings.OcrEngineType = _ocrEngine.EngineType.ToString(); } settings.Save(); if (_ocrDocument != null) { _ocrDocument.Dispose(); _ocrDocument = null; } // Dispose the OCR engine (this will call Shutdown as well) if (_ocrEngine != null) { _ocrEngine.Dispose(); _ocrEngine = null; } if (_rasterCodecs != null) { _rasterCodecs.Dispose(); _rasterCodecs = null; } base.OnFormClosed(e); }
public ProcessDialog(TwainSession twainSession, IOcrEngine ocrEngine, string documentFileName, DocumentFormat format) { InitializeComponent(); _twainSession = twainSession; _document = ocrEngine.DocumentManager.CreateDocument(); _documentFileName = documentFileName; _format = format; }
/// <summary> /// Start a new worker thread for ocr decode /// </summary> /// <param name="sImg">Image file path. Only bmp files</param> /// <param name="sLang">Language source es:IT EN FR..</param> public String Start(String sFolder, String sImg, String sLang) { String sRet=""; try { sLang = sLang.ToLower(); _sImgFile = sImg; if (sLang.ToLower() == "zh") sLang = "zh-Hant"; _sLang = sLang; _sFolder = sFolder; _ocrEngine.LanguageManager.EnableLanguages(new string[] { sLang }); string[] enabledLanguages = _ocrEngine.LanguageManager.GetEnabledLanguages(); Console.WriteLine("Current enabled languages in the engine are:"); foreach (string enabledLanguage in enabledLanguages) { // Get the friendly name of this language using the .NET CultureInfo class CultureInfo ci = new CultureInfo(enabledLanguage); Console.WriteLine(" {0} ({1})", enabledLanguage, ci.EnglishName); } // spell check IOcrSpellCheckManager spellCheckManager = _ocrEngine.SpellCheckManager; // Get the spell language supported (languages with a dictionary) string[] spellLanguages = spellCheckManager.GetSupportedSpellLanguages(); foreach (string spellLanguage in spellLanguages) Console.WriteLine(spellLanguage); if (spellCheckManager.IsSpellLanguageSupported(sLang)) { // Yes, set it spellCheckManager.SpellLanguage = sLang; spellCheckManager.Enabled = true; Console.WriteLine("Current spell language: {0}", spellCheckManager.SpellLanguage); } else spellCheckManager.Enabled = false; _ocrDocument = _ocrEngine.DocumentManager.CreateDocument(); RasterImage image = _codecs.Load(sImg); _ocrDocument.Pages.Clear(); _ocrDocument.Pages.AddPage(image, null); _ocrDocument.Pages.AutoZone(null); _ocrDocument.Pages.UpdateFillMethod(); sRet = Worker(); } catch (Exception ex) { Program.MainForm.AddLog(String.Format("Exception Worker:{0}", ex.Message), ""); } return sRet; }
private void _btnRecognize_Click(object sender, EventArgs e) { try { using (WaitCursor wait = new WaitCursor()) { DocumentFormat format = _documentFormatSelector.SelectedFormat; String documentFileName = _lblImageFileName.Text; documentFileName = string.Concat(documentFileName, ".", DocumentWriter.GetFormatFileExtension(format)); _ocrPage.Recognize(null); using (IOcrDocument _document = _ocrEngine.DocumentManager.CreateDocument(null, OcrCreateDocumentOptions.AutoDeleteFile)) { _document.Pages.Add(_ocrPage); _document.Save(documentFileName, format, null); } // Engine will correct the page deskew before AutoZone or Recognize, so we need to load the page again form the engine. _viewer.Image = _ocrPage.GetRasterImage(); _viewer.Refresh(); UpdateMyControls(); // if the "View Final Document" option is checked then no need to show this message since // it will load the saved document file. if (!_cbViewFinalDocument.Checked) { Messager.ShowInformation(this, String.Format("The output document file was saved at ({0})", documentFileName)); } else { if (File.Exists(documentFileName)) { try { Process.Start(documentFileName); } catch { Messager.ShowError(this, "Unable to open generated results file with external viewing application"); } } else { Messager.ShowError(this, "Unable to open generated results file with external viewing application.\nThe system cannot find the file specified"); } } } } catch (Exception ex) { Messager.ShowError(this, ex.Message); } }
public MainForm() { InitializeComponent(); Leadtools.Demos.Support.Unlock(); _ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Plus, false); _ocrEngine.Startup(null, null, null, null); _ocrDocument = _ocrEngine.DocumentManager.CreateDocument(); }
private void CleanupImage(RasterImage imageToClean, int startIndex, int count) { try { //Deskew if (cleanUpOcrEngine != null && cleanUpOcrEngine.IsStarted) { using (IOcrDocument document = cleanUpOcrEngine.DocumentManager.CreateDocument()) { for (var i = startIndex; i < startIndex + count; i++) { imageToClean.Page = i; document.Pages.AddPage(imageToClean, null); int angle = -document.Pages[0].GetDeskewAngle(); RotateCommand cmd = new RotateCommand(angle * 10, RotateCommandFlags.Bicubic, RasterColor.FromKnownColor(RasterKnownColor.White)); cmd.Run(imageToClean); document.Pages.Clear(); } } } else { for (var i = startIndex; i < startIndex + count; i++) { imageToClean.Page = i; var deskewCommand = new DeskewCommand(); if (imageToClean.Height > 3500) { deskewCommand.Flags = DeskewCommandFlags.DocumentAndPictures | DeskewCommandFlags.DoNotPerformPreProcessing | DeskewCommandFlags.UseNormalDetection | DeskewCommandFlags.DoNotFillExposedArea; } else { deskewCommand.Flags = DeskewCommandFlags.DeskewImage | DeskewCommandFlags.DoNotFillExposedArea; } deskewCommand.Run(imageToClean); } } } catch (Exception ex) { //log.Error(ex); Console.WriteLine("Not recognized"); } }
/// <summary> /// Uses an instance of the LEADTools OCR Engine to "read" the text /// in the pre-defined OCR region representing the mailing address /// area. Is thread safe. /// </summary> /// <param name="ocrEngine"></param> /// <param name="document"></param> /// <returns></returns> private static string GetAddressBlockText(IOcrEngine ocrEngine, PDFDocument document) { string returnedText = null; using (var codecs = new RasterCodecs()) using (var image = new RasterImage(document.GetPageImage(codecs, 1))) using (IOcrDocument ocrDocument = ocrEngine.DocumentManager.CreateDocument()) using (IOcrPage ocrPage = ocrDocument.Pages.AddPage(image, null)) { var myZone = new OcrZone(); myZone.Bounds = new LogicalRectangle(0, 2, 4, 1.4, LogicalUnit.Inch); ocrPage.Zones.Add(myZone); ocrPage.Recognize(null); returnedText = ocrPage.GetText(0).ToUpper(); } return(returnedText); }
private bool CleanupImage(RasterImage imageToClean, int startIndex, int count) { try { if (this.IsStartedOcrEngine()) { using (IOcrDocument document = this.TheOcrEngine.DocumentManager.CreateDocument()) { for (var i = startIndex; i < startIndex + count; i++) { imageToClean.Page = i; document.Pages.AddPage(imageToClean, null); int angle = -document.Pages[0].GetDeskewAngle(); RotateCommand cmd = new RotateCommand(angle * 10, RotateCommandFlags.Bicubic, RasterColor.FromKnownColor(RasterKnownColor.White)); cmd.Run(imageToClean); document.Pages.Clear(); } } } else { for (var i = startIndex; i < startIndex + count; i++) { imageToClean.Page = i; var deskewCommand = new DeskewCommand(); if (imageToClean.Height > 3500) { deskewCommand.Flags = DeskewCommandFlags.DocumentAndPictures | DeskewCommandFlags.DoNotPerformPreProcessing | DeskewCommandFlags.UseNormalDetection | DeskewCommandFlags.DoNotFillExposedArea; } else { deskewCommand.Flags = DeskewCommandFlags.DeskewImage | DeskewCommandFlags.DoNotFillExposedArea; } deskewCommand.Run(imageToClean); } } return(true); } catch (Exception ex) { return(false); } }
private void SetDocument(IOcrDocument ocrDocument, string documentFileName) { // Delete the old document if it exists if (_ocrDocument != null) { _ocrDocument.Dispose(); } _lastDocumentFile = documentFileName; _ocrDocument = ocrDocument; _ocrPage = _ocrDocument.Pages[0]; BuildWordLists(); _wordTextBox.Text = string.Empty; SetImage(_ocrPage.GetRasterImage()); UpdateUIState(); }
public static void exe_ocr(string filename, IOcrDocument ocrDocument) { Console.WriteLine(filename); RasterCodecs rasterCodecs = null; RasterImage rasterImage = null; try { rasterCodecs = new RasterCodecs(); rasterCodecs.ThrowExceptionsOnInvalidImages = false; rasterImage = rasterCodecs.Load(filename); AutoBinarizeCommand command = new AutoBinarizeCommand(); command.Run(rasterImage); //AutoLineRemoveCommand commandLine = new AutoLineRemoveCommand(); //commandLine.Run(rasterImage); IOcrPage page = ocrDocument.Pages.AddPage(rasterImage, null); if (page != null) { page.UpdateNativeFillMethod(); page.Recognize(null); } // ocrDocument.Pages.zon(NativeOcrZoneType.AutoGraphic); }catch (Exception e) { // Console.WriteLine("add image faild: " + e); } finally { if (rasterCodecs != null) { rasterCodecs.Dispose(); } if (rasterImage != null) { rasterImage.Dispose(); } } }
public RecognizedWordsDialog(IOcrDocument ocrDocument) { InitializeComponent(); _ocrDocument = ocrDocument; for (int i = 0; i < _ocrDocument.Pages.Count; i++) { _pagesListBox.Items.Add(DemosGlobalization.GetResxString(GetType(), "Resx_Page") + (i + 1).ToString()); } if (_pagesListBox.Items.Count > 0) { _pagesListBox.SelectedIndex = 0; } // change some of the words listbox in case of Arabic OCR for better words display. if (_ocrDocument.Engine.EngineType == OcrEngineType.OmniPageArabic) { _wordsListBox.RightToLeft = RightToLeft.Yes; _wordsListBox.Font = new System.Drawing.Font("Times New Roman", 10); } }
private void SaveText() { if (_imageViewer.HasImage) { SaveFileDialog dlg = new SaveFileDialog(); dlg.Filter = "Rich Text File (.rtf)|*.rtf|Text (.txt)|*.txt"; if (dlg.ShowDialog() == DialogResult.OK) { string ext = Path.GetExtension(dlg.FileName); DocumentFormat outputFormat = DocumentFormat.Text; switch (ext) { case ".txt": outputFormat = DocumentFormat.Text; break; case ".rtf": outputFormat = DocumentFormat.Rtf; break; } using (IOcrDocument document = _ocrEngine.DocumentManager.CreateDocument()) { document.Pages.AddPage(_imageViewer.Image, null); document.Pages.Recognize(null); RtfDocumentOptions rtfOptions = _ocrEngine.DocumentWriterInstance.GetOptions(DocumentFormat.Rtf) as RtfDocumentOptions; if (rtfOptions != null) { rtfOptions.DropObjects = DocumentDropObjects.None; rtfOptions.TextMode = DocumentTextMode.Framed; _ocrEngine.DocumentWriterInstance.SetOptions(DocumentFormat.Rtf, rtfOptions); } document.Save(dlg.FileName, outputFormat, null); } } } }
public SaveRecognizedXmlDialog(IOcrDocument ocrDocument) { InitializeComponent(); _ocrDocument = ocrDocument; }
public SaveDocumentDialog(IOcrDocument ocrDocument, DocumentFormat initialFormat, string initialFileName, bool isCustomFileName, string outputDir, bool viewDocument) { InitializeComponent(); _ocrDocument = ocrDocument; _outputDir = outputDir; // Get the formats // This is the order of importance, show these first then the rest as they come along DocumentFormat[] importantFormats = { DocumentFormat.Ltd, DocumentFormat.Pdf, DocumentFormat.Docx, DocumentFormat.Rtf, DocumentFormat.Text, DocumentFormat.Doc, DocumentFormat.Xls, DocumentFormat.Html }; List <DocumentFormat> formatsToAdd = new List <DocumentFormat>(); Array temp = Enum.GetValues(typeof(DocumentFormat)); List <DocumentFormat> allFormats = new List <DocumentFormat>(); foreach (DocumentFormat format in temp) { allFormats.Add(format); } // Add important once first: foreach (DocumentFormat format in importantFormats) { formatsToAdd.Add(format); allFormats.Remove(format); } // Add rest formatsToAdd.AddRange(allFormats); MyFormat pdfFormat = null; DocumentWriter docWriter = _ocrDocument.Engine.DocumentWriterInstance; IOcrDocumentManager ocrDocumentManager = _ocrDocument.Engine.DocumentManager; string[] engineSupportedFormatNames = ocrDocumentManager.GetSupportedEngineFormats(); foreach (DocumentFormat format in formatsToAdd) { bool addFormat = true; // If this is the "User" or Engines format, only add it if the OCR engine supports them if (format == DocumentFormat.User && engineSupportedFormatNames.Length == 0) { addFormat = false; } if (addFormat) { string friendlyName; if (format == DocumentFormat.User) { friendlyName = "Engine native"; } else { friendlyName = DocumentWriter.GetFormatFriendlyName(format); } MyFormat mf = new MyFormat(format, friendlyName); _formatComboBox.Items.Add(mf); if (mf.Format == initialFormat) { _formatComboBox.SelectedItem = mf; } else if (mf.Format == DocumentFormat.Pdf) { pdfFormat = mf; } } switch (format) { case DocumentFormat.User: // Update the User (Engine) options page { foreach (string engineFormatName in engineSupportedFormatNames) { MyEngineFormat mef = new MyEngineFormat( engineFormatName, ocrDocumentManager.GetEngineFormatFriendlyName(engineFormatName)); _userFormatNameComboBox.Items.Add(mef); if (mef.Format == ocrDocumentManager.EngineFormat) { _userFormatNameComboBox.SelectedItem = mef; } } if (_userFormatNameComboBox.SelectedItem == null && _userFormatNameComboBox.Items.Count > 0) { _userFormatNameComboBox.SelectedIndex = 0; } } break; case DocumentFormat.Pdf: // Update the PDF options page { PdfDocumentOptions pdfOptions = docWriter.GetOptions(DocumentFormat.Pdf) as PdfDocumentOptions; // Clone it in case we change it in the Advance PDF options dialog _pdfOptions = pdfOptions.Clone() as PdfDocumentOptions; Array a = Enum.GetValues(typeof(PdfDocumentType)); foreach (PdfDocumentType i in a) { // PDFA does NOT support Arabic characters so we are not adding it in case of Arabic OCR engine. if (i == PdfDocumentType.PdfA && _ocrDocument.Engine.EngineType == OcrEngineType.OmniPageArabic) { continue; } _pdfDocumentTypeComboBox.Items.Add(i); } _pdfDocumentTypeComboBox.SelectedItem = _pdfOptions.DocumentType; _pdfImageOverTextCheckBox.Checked = _pdfOptions.ImageOverText; _pdfLinearizedCheckBox.Checked = _pdfOptions.Linearized; if (string.IsNullOrEmpty(_pdfOptions.Creator)) { _pdfOptions.Creator = "LEADTOOLS PDFWriter"; } if (string.IsNullOrEmpty(_pdfOptions.Producer)) { _pdfOptions.Producer = "LEAD Technologies, Inc."; } } break; case DocumentFormat.Doc: // Update the DOC options page { DocDocumentOptions docOptions = docWriter.GetOptions(DocumentFormat.Doc) as DocDocumentOptions; _cbFramedDoc.Checked = (docOptions.TextMode == DocumentTextMode.Framed) ? true : false; } break; case DocumentFormat.Docx: // Update the DOCX options page { DocxDocumentOptions docxOptions = docWriter.GetOptions(DocumentFormat.Docx) as DocxDocumentOptions; _cbFramedDocX.Checked = (docxOptions.TextMode == DocumentTextMode.Framed) ? true : false; } break; case DocumentFormat.Rtf: // Update the RTF options page { RtfDocumentOptions rtfOptions = docWriter.GetOptions(DocumentFormat.Rtf) as RtfDocumentOptions; _cbFramedRtf.Checked = (rtfOptions.TextMode == DocumentTextMode.Framed) ? true : false; } break; case DocumentFormat.Html: // Update the HTML options page { HtmlDocumentOptions htmlOptions = docWriter.GetOptions(DocumentFormat.Html) as HtmlDocumentOptions; Array a = Enum.GetValues(typeof(DocumentFontEmbedMode)); foreach (DocumentFontEmbedMode i in a) { _htmlEmbedFontModeComboBox.Items.Add(i); } _htmlEmbedFontModeComboBox.SelectedItem = htmlOptions.FontEmbedMode; _htmlUseBackgroundColorCheckBox.Checked = htmlOptions.UseBackgroundColor; _htmlBackgroundColorValueLabel.BackColor = MainForm.ConvertColor(htmlOptions.BackgroundColor); _htmlBackgroundColorLabel.Enabled = _htmlUseBackgroundColorCheckBox.Checked; _htmlBackgroundColorValueLabel.Enabled = _htmlUseBackgroundColorCheckBox.Checked; _htmlBackgroundColorButton.Enabled = _htmlUseBackgroundColorCheckBox.Checked; } break; case DocumentFormat.Text: // Update the TEXT options page { TextDocumentOptions textOptions = docWriter.GetOptions(DocumentFormat.Text) as TextDocumentOptions; Array a = Enum.GetValues(typeof(TextDocumentType)); foreach (TextDocumentType i in a) { if (i == TextDocumentType.Ansi) { if (textOptions.DocumentType == TextDocumentType.Ansi) { textOptions.DocumentType = TextDocumentType.Unicode; } if (_ocrDocument.Engine.EngineType == OcrEngineType.OmniPageArabic) { continue; } } _textDocumentTypeComboBox.Items.Add(i); } _textDocumentTypeComboBox.SelectedItem = textOptions.DocumentType; _textAddPageNumberCheckBox.Checked = textOptions.AddPageNumber; _textAddPageBreakCheckBox.Checked = textOptions.AddPageBreak; _textFormattedCheckBox.Checked = textOptions.Formatted; } break; case DocumentFormat.AltoXml: // Update the ALTOXML options page { AltoXmlDocumentOptions altoXmlOptions = docWriter.GetOptions(DocumentFormat.AltoXml) as AltoXmlDocumentOptions; _altoXmlFileNameTextBox.Text = altoXmlOptions.FileName; _altoXmlSoftwareCreatorTextBox.Text = altoXmlOptions.SoftwareCreator; _altoXmlSoftwareNameTextBox.Text = altoXmlOptions.SoftwareName; _altoXmlApplicationDescriptionTextBox.Text = altoXmlOptions.ApplicationDescription; _altoXmlFormattedCheckBox.Checked = altoXmlOptions.Formatted; _altoXmlIndentationTextBox.Text = altoXmlOptions.Indentation; _altoXmlSort.Checked = altoXmlOptions.Sort; _altoXmlPlainText.Checked = altoXmlOptions.PlainText; _altoXmlShowGlyphInfo.Checked = altoXmlOptions.ShowGlyphInfo; _altoXmlShowGlyphVariants.Checked = altoXmlOptions.ShowGlyphVariants; Array a = Enum.GetValues(typeof(AltoXmlMeasurementUnit)); foreach (AltoXmlMeasurementUnit i in a) { _altoXmlMeasurementUnit.Items.Add(i); } _altoXmlMeasurementUnit.SelectedItem = altoXmlOptions.MeasurementUnit; } break; case DocumentFormat.Ltd: case DocumentFormat.Emf: case DocumentFormat.Xls: case DocumentFormat.Pub: case DocumentFormat.Mob: case DocumentFormat.Svg: default: // These formats have no options break; } } // Remove all the tab pages _optionsTabControl.TabPages.Clear(); // If no format is selected, default to PDF if (_formatComboBox.SelectedIndex == -1) { if (pdfFormat != null) { _formatComboBox.SelectedItem = pdfFormat; } else { _formatComboBox.SelectedIndex = -1; } } _viewDocumentCheckBox.Checked = viewDocument; _initialFileName = initialFileName; _isCustomFileName = isCustomFileName; if (!string.IsNullOrEmpty(_outputDir)) { MyFormat mf = _formatComboBox.SelectedItem as MyFormat; char[] trimChars = { '\\' }; _fileNameTextBox.Text = _outputDir.TrimEnd(trimChars) + "\\" + Path.GetFileName(initialFileName); if (!_isCustomFileName) { _fileNameTextBox.Text += "." + GetFileExtension(mf.Format); } } else { _fileNameTextBox.Text = initialFileName; } _formatComboBox_SelectedIndexChanged(this, EventArgs.Empty); UpdateUIState(); }
public void RepopulateDocumentInformationControl(IOcrDocument ocrDocument) { _lvOcrDocumentInfo.Items[0].SubItems[1].Text = (ocrDocument != null) ? ((ocrDocument.IsInMemory) ? "Memory" : "File") : "None"; _lvOcrDocumentInfo.Items[1].SubItems[1].Text = string.Format("{0}", (ocrDocument != null) ? ocrDocument.Pages.Count : 0); }
static void Main(string[] args) { string licenseFilePath = @"LEADTOOLS.lic"; string developerKey = @"mcxvXsdTqZbnbQrDM9FSk5+RAsBJLhAIot2m3qdpoDO8oK7YMWOw1z6YpXqhCnFE"; RasterSupport.SetLicense(licenseFilePath, developerKey); // Assuming you added "using Leadtools.Codecs;", "using Leadtools.Forms.Ocr;" and "using Leadtools.Forms.DocumentWriters;" at the beginning of this class // *** Step 1: Select the engine type and create an instance of the IOcrEngine interface. // We will use the LEADTOOLS OCR Advantage engine and use it in the same process IOcrEngine ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Professional, false); // *** Step 2: Startup the engine. // Use the default parameters ocrEngine.Startup(null, null, null, @"C:\LEADTOOLS 19\Bin\Common\OcrProfessionalRuntime64"); // *** Step 3: Create an OCR document with one or more pages. IOcrDocument ocrDocument = ocrEngine.DocumentManager.CreateDocument(); // Add all the pages of a multi-page TIF image to the document ocrDocument.Pages.AddPages(@"C:\Users\Public\Documents\LEADTOOLS Images\OCR1.tif", 1, -1, null); // *** Step 4: Establish zones on the page(s), either manually or automatically // Automatic zoning ocrDocument.Pages.AutoZone(null); // *** Step 5: (Optional) Set the active languages to be used by the OCR engine // Enable English and German languages ocrEngine.LanguageManager.EnableLanguages(new string[] { "en", "de" }); // *** Step 6: (Optional) Set the spell checking engine // Enable the spell checking system ocrEngine.SpellCheckManager.SpellCheckEngine = OcrSpellCheckEngine.Native; // *** Step 7: (Optional) Set any special recognition module options // Change the zone method for the first zone in the first page to be Graphics so it will not be recognized OcrZone ocrZone = ocrDocument.Pages[0].Zones[0]; ocrZone.ZoneType = OcrZoneType.Text; ocrDocument.Pages[0].Zones[0] = ocrZone; // *** Step 8: Recognize ocrDocument.Pages.Recognize(null); // *** Step 9: Save recognition results // Save the results to a PDF file ocrDocument.Save(@"C:\Users\Public\Documents\LEADTOOLS Images\Document.pdf", DocumentFormat.Pdf, null); ocrDocument.Dispose(); // *** Step 10: Shut down the OCR engine when finished ocrEngine.Shutdown(); ocrEngine.Dispose(); }
private void DoLoadAndRecognizeDocument(OcrProgressDialog dlg, Dictionary <string, object> args) { // Perform load and recognize here OcrProgressCallback callback = dlg.OcrProgressCallback; IOcrDocument ocrDocument = null; try { string documentFileName = args["documentFileName"] as string; ocrDocument = _ocrEngine.DocumentManager.CreateDocument("", OcrCreateDocumentOptions.InMemory); IOcrPage ocrPage = null; if (!dlg.IsCanceled) { // If we are not using a progress bar, update the description text if (callback == null) { dlg.UpdateDescription("Loading the document (first page only)..."); } ocrPage = ocrDocument.Pages.AddPage(documentFileName, callback); } if (!dlg.IsCanceled) { // If we are not using a progress bar, update the description text if (callback == null) { dlg.UpdateDescription("Recognizing the page(s) of the document..."); } ocrPage.Recognize(callback); } if (!dlg.IsCanceled) { // We did not cancel, use this document SetDocument(ocrDocument, documentFileName); ocrDocument = null; } } catch (Exception ex) { ShowError(ex); } finally { if (callback == null) { dlg.EndOperation(); } // Clean up if (ocrDocument != null) { ocrDocument.Dispose(); } } }
private void ThreadProc(object stateInfo) { WorkItemData data = (WorkItemData)stateInfo; IOcrEngine ocrEngine = null; bool passedCriticalStage = false; try { // See if we have canceled lock (_abortedLockObject) { if (_aborted) { return; } } string destinationFile = Path.Combine(data.DestinationDirectory, Path.GetFileName(data.SourceFile)); ocrEngine = data.OcrEngine; lock (_abortedLockObject) { if (_aborted) { return; } } // Convert this image file to a document string extension = DocumentWriter.GetFormatFileExtension(data.Format); destinationFile = string.Concat(destinationFile, ".", extension); if (data.Format == DocumentFormat.Ltd && File.Exists(destinationFile)) { File.Delete(destinationFile); } string sourceFile = Path.GetFileName(data.SourceFile); try { // Create a document and add the pages using (IOcrDocument ocrDocument = ocrEngine.DocumentManager.CreateDocument(null, OcrCreateDocumentOptions.AutoDeleteFile)) { // Get the image number of pages int imagePageCount; RasterCodecs codecs = ocrDocument.RasterCodecsInstance; using (CodecsImageInfo imageInfo = codecs.GetInformation(data.SourceFile, true)) { long maximumMemorySize = 42187; IOcrSettingManager settingManager = ocrEngine.SettingManager; // Get the maximum size of the bitmap from the setting if (settingManager.IsSettingNameSupported("Recognition.MaximumPageConventionalMemorySize")) { int maximumConventionalMemorySize = settingManager.GetIntegerValue("Recognition.MaximumPageConventionalMemorySize"); maximumMemorySize = (long)maximumConventionalMemorySize * 1024; } SetRecommendedLoadingOptions(codecs, imageInfo, maximumMemorySize); imagePageCount = imageInfo.TotalPages; } // Set the DocumentWriter options using (MemoryStream ms = new MemoryStream(data.DocumentWriterOptions)) { ocrDocument.DocumentWriterInstance.LoadOptions(ms); } passedCriticalStage = true; //recognize and add pages for (int pageNumber = 1; pageNumber <= imagePageCount; pageNumber++) { lock (_abortedLockObject) { if (_aborted) { return; } } var image = codecs.Load(data.SourceFile, pageNumber); using (var ocrPage = ocrEngine.CreatePage(image, OcrImageSharingMode.AutoDispose)) { ocrPage.Recognize(null); ocrDocument.Pages.Add(ocrPage); } } // Save ocrDocument.Save(destinationFile, data.Format, null); } } finally { } OnSuccess(destinationFile); } catch (Exception ex) { string message; if (passedCriticalStage && data.FirstTry) { message = string.Format("Error '{0}' while converting file '{1}' (first time, quarantined)", ex.Message, data.SourceFile); AddToQuarantine(data.SourceFile); } else if (passedCriticalStage && !data.FirstTry) { message = string.Format("Error '{0}' while converting file '{1}' (quarantined error)", ex.Message, data.SourceFile); } else { message = string.Format("Error '{0}' while converting file '{1}'", ex.Message, data.SourceFile); } OnError(message); } finally { if (ocrEngine != null && ocrEngine != data.OcrEngine) { ocrEngine.Dispose(); } if (Interlocked.Decrement(ref _workItemCount) == 0) { _batchFinishedEvent.Set(); } } }
public ConvertLdDialog(IOcrDocument ocrDocument, DocumentWriter docWriter, DocumentFormat initialFormat, string initialLdFileName, bool viewDocument) { InitializeComponent(); _ocrDocument = ocrDocument; _docWriter = docWriter; // Get the formats // This is the order of importance, show these first then the rest as they come along DocumentFormat[] importantFormats = { DocumentFormat.Pdf, DocumentFormat.Docx, DocumentFormat.Rtf, DocumentFormat.Text, DocumentFormat.Doc, DocumentFormat.Xls, DocumentFormat.Html }; List <DocumentFormat> formatsToAdd = new List <DocumentFormat>(); Array temp = Enum.GetValues(typeof(DocumentFormat)); List <DocumentFormat> allFormats = new List <DocumentFormat>(); foreach (DocumentFormat format in temp) { allFormats.Add(format); } // Add important once first: foreach (DocumentFormat format in importantFormats) { formatsToAdd.Add(format); allFormats.Remove(format); } // Add rest formatsToAdd.AddRange(allFormats); MyFormat pdfFormat = null; foreach (DocumentFormat format in formatsToAdd) { bool addFormat = true; // If this is the "User" or Engines format, only add it if the OCR engine supports them if (format == DocumentFormat.User || format == DocumentFormat.Ltd) { addFormat = false; } if (addFormat) { string friendlyName = DocumentWriter.GetFormatFriendlyName(format); string extension = DocumentWriter.GetFormatFileExtension(format).ToUpper(); MyFormat mf = new MyFormat(format, friendlyName, extension); _formatComboBox.Items.Add(mf); if (mf.Format == initialFormat) { _formatComboBox.SelectedItem = mf; } else if (mf.Format == DocumentFormat.Pdf) { pdfFormat = mf; } switch (format) { case DocumentFormat.Pdf: // Update the PDF options page { PdfDocumentOptions pdfOptions = docWriter.GetOptions(DocumentFormat.Pdf) as PdfDocumentOptions; // Clone it in case we change it in the Advance PDF options dialog _pdfOptions = pdfOptions.Clone() as PdfDocumentOptions; Array a = Enum.GetValues(typeof(PdfDocumentType)); foreach (PdfDocumentType i in a) { _pdfDocumentTypeComboBox.Items.Add(i); } _pdfDocumentTypeComboBox.SelectedItem = _pdfOptions.DocumentType; _pdfImageOverTextCheckBox.Checked = _pdfOptions.ImageOverText; _pdfLinearizedCheckBox.Checked = _pdfOptions.Linearized; if (string.IsNullOrEmpty(_pdfOptions.Creator)) { _pdfOptions.Creator = "LEADTOOLS PDFWriter"; } if (string.IsNullOrEmpty(_pdfOptions.Producer)) { _pdfOptions.Producer = "LEAD Technologies, Inc."; } } break; case DocumentFormat.Doc: // Update the DOC options page { DocDocumentOptions docOptions = docWriter.GetOptions(DocumentFormat.Doc) as DocDocumentOptions; _cbFramedDoc.Checked = (docOptions.TextMode == DocumentTextMode.Framed) ? true : false; } break; case DocumentFormat.Docx: // Update the DOCX options page { DocxDocumentOptions docxOptions = docWriter.GetOptions(DocumentFormat.Docx) as DocxDocumentOptions; _cbFramedDocX.Checked = (docxOptions.TextMode == DocumentTextMode.Framed) ? true : false; } break; case DocumentFormat.Rtf: // Update the RTF options page { RtfDocumentOptions rtfOptions = docWriter.GetOptions(DocumentFormat.Rtf) as RtfDocumentOptions; _cbFramedRtf.Checked = (rtfOptions.TextMode == DocumentTextMode.Framed) ? true : false; } break; case DocumentFormat.Html: // Update the HTML options page { HtmlDocumentOptions htmlOptions = docWriter.GetOptions(DocumentFormat.Html) as HtmlDocumentOptions; Array a = Enum.GetValues(typeof(DocumentFontEmbedMode)); foreach (DocumentFontEmbedMode i in a) { _htmlEmbedFontModeComboBox.Items.Add(i); } _htmlEmbedFontModeComboBox.SelectedItem = htmlOptions.FontEmbedMode; _htmlUseBackgroundColorCheckBox.Checked = htmlOptions.UseBackgroundColor; _htmlBackgroundColorValueLabel.BackColor = MainForm.ConvertColor(htmlOptions.BackgroundColor); _htmlBackgroundColorLabel.Enabled = _htmlUseBackgroundColorCheckBox.Checked; _htmlBackgroundColorValueLabel.Enabled = _htmlUseBackgroundColorCheckBox.Checked; _htmlBackgroundColorButton.Enabled = _htmlUseBackgroundColorCheckBox.Checked; } break; case DocumentFormat.Text: // Update the TEXT options page { TextDocumentOptions textOptions = docWriter.GetOptions(DocumentFormat.Text) as TextDocumentOptions; Array a = Enum.GetValues(typeof(TextDocumentType)); foreach (TextDocumentType i in a) { _textDocumentTypeComboBox.Items.Add(i); } _textDocumentTypeComboBox.SelectedItem = textOptions.DocumentType; _textAddPageNumberCheckBox.Checked = textOptions.AddPageNumber; _textAddPageBreakCheckBox.Checked = textOptions.AddPageBreak; _textFormattedCheckBox.Checked = textOptions.Formatted; } break; case DocumentFormat.AltoXml: // Update the ALTOXML options page { AltoXmlDocumentOptions altoXmlOptions = docWriter.GetOptions(DocumentFormat.AltoXml) as AltoXmlDocumentOptions; _altoXmlFileNameTextBox.Text = altoXmlOptions.FileName; _altoXmlSoftwareCreatorTextBox.Text = altoXmlOptions.SoftwareCreator; _altoXmlSoftwareNameTextBox.Text = altoXmlOptions.SoftwareName; _altoXmlApplicationDescriptionTextBox.Text = altoXmlOptions.ApplicationDescription; _altoXmlFormattedCheckBox.Checked = altoXmlOptions.Formatted; _altoXmlIndentationTextBox.Text = altoXmlOptions.Indentation; _altoXmlSort.Checked = altoXmlOptions.Sort; _altoXmlPlainText.Checked = altoXmlOptions.PlainText; _altoXmlShowGlyphInfo.Checked = altoXmlOptions.ShowGlyphInfo; _altoXmlShowGlyphVariants.Checked = altoXmlOptions.ShowGlyphVariants; Array a = Enum.GetValues(typeof(AltoXmlMeasurementUnit)); foreach (AltoXmlMeasurementUnit i in a) { _altoXmlMeasurementUnit.Items.Add(i); } _altoXmlMeasurementUnit.SelectedItem = altoXmlOptions.MeasurementUnit; } break; case DocumentFormat.Emf: case DocumentFormat.Xls: case DocumentFormat.Pub: case DocumentFormat.Mob: case DocumentFormat.Svg: default: // These formats have no options break; } } } // Remove all the tab pages _optionsTabControl.TabPages.Clear(); // If no format is selected, default to PDF if (_formatComboBox.SelectedIndex == -1) { if (pdfFormat != null) { _formatComboBox.SelectedItem = pdfFormat; } else { _formatComboBox.SelectedIndex = -1; } } _viewDocumentCheckBox.Checked = viewDocument; _formatComboBox_SelectedIndexChanged(this, EventArgs.Empty); if (!string.IsNullOrEmpty(initialLdFileName)) { _ldFileNameTextBox.Text = initialLdFileName; UpdateOutputFileName(); } UpdateUIState(); }