private void _pagesListBox_SelectedIndexChanged(object sender, EventArgs e) { // Get the recognized words of the selected page _wordsListBox.Items.Clear(); IOcrPage ocrPage = _ocrDocument.Pages[_pagesListBox.SelectedIndex]; if (ocrPage.IsRecognized) { IOcrPageCharacters pageCharacters = ocrPage.GetRecognizedCharacters(); if (pageCharacters == null) { return; } foreach (IOcrZoneCharacters zoneCharacters in pageCharacters) { ICollection <OcrWord> words = zoneCharacters.GetWords(); foreach (OcrWord word in words) { _wordsListBox.Items.Add(word.Value); } } } }
public ZonePropertiesDialog(IOcrEngine ocrEngine, IOcrPage ocrPage, OcrDemo.ViewerControl.ViewerControl viewerControl, int selectedZoneIndex) { InitializeComponent(); _ocrEngine = ocrEngine; _ocrPage = ocrPage; _viewerControl = viewerControl; _newZoneCount = 0; _updateZonesControl = new OcrDemo.UpdateZonesControl.UpdateZonesControl(_viewerControl); _updateZonesControl.Action += new EventHandler <ActionEventArgs>(_updateZonesControl_Action); _pnlContainer.Controls.Add(_updateZonesControl); // Initialize the zones list _lbZonesList.SelectedIndexChanged -= new System.EventHandler(this._lbZonesList_SelectedIndexChanged); for (int i = 0; i < _ocrPage.Zones.Count; i++) { _lbZonesList.Items.Add(new ZoneItem("Zone", i)); } _lbZonesList.SelectedIndexChanged += new System.EventHandler(this._lbZonesList_SelectedIndexChanged); _updateZonesControl.Activate(ocrEngine, ocrPage, _lbZonesList, _ocrPage.Zones); if (_lbZonesList.Items.Count > 0) { _lbZonesList.SelectedIndex = (selectedZoneIndex >= 0) ? selectedZoneIndex : 0; } _lbZonesList.Select(); UpdateUIState(); }
public void Activate(IOcrEngine ocrEngine, IOcrPage ocrPage, TreeView tvZonesList, IOcrZoneCollection zones, IList <OcrZoneCell> cells) { _ocrEngine = ocrEngine; _ocrPage = ocrPage; _tvZonesList = tvZonesList; _zones = zones; _cells = cells; // Fill the cell type combo box. _cmbCellType.Items.Clear(); Array a = ocrPage.TableZoneManager.GetSupportedCellTypes(); foreach (OcrZoneType i in a) { _cmbCellType.Items.Add(i); } // Fill the cell border style combo boxes. _cmbLeftBorderStyle.Items.Clear(); _cmbTopBorderStyle.Items.Clear(); _cmbRightBorderStyle.Items.Clear(); _cmbBottomBorderStyle.Items.Clear(); Array b = Enum.GetValues(typeof(OcrCellBorderLineStyle)); foreach (OcrCellBorderLineStyle i in b) { _cmbLeftBorderStyle.Items.Add(i); _cmbTopBorderStyle.Items.Add(i); _cmbRightBorderStyle.Items.Add(i); _cmbBottomBorderStyle.Items.Add(i); } UpdateUIControls(); }
public DetectPageLanguagesDialog(IOcrEngine ocrEngine, IOcrPage ocrPage) { InitializeComponent(); _ocrEngine = ocrEngine; _ocrPage = ocrPage; }
private void _cmbOcrModules_SelectedIndexChanged(object sender, EventArgs e) { using (WaitCursor wait = new WaitCursor()) { if (_cmbOcrModules.SelectedIndex < 0) { return; } if (_ocrPage != null) { _ocrPage.Dispose(); _ocrPage = null; } // Load the default image associated with each ocr module. LoadOcrModuleAssociatedImage(); // Save the image file name the user associate with the selected OCR module. MyItemData itemData = (MyItemData)_cmbOcrModules.Items[_cmbOcrModules.SelectedIndex]; itemData.ImageFileName = _lblImageFileName.Text; this._cmbOcrModules.SelectedIndexChanged -= new System.EventHandler(this._cmbOcrModules_SelectedIndexChanged); _cmbOcrModules.Items[_cmbOcrModules.SelectedIndex] = itemData; this._cmbOcrModules.SelectedIndexChanged += new System.EventHandler(this._cmbOcrModules_SelectedIndexChanged); _omrOptionsButton.Enabled = ((OcrZoneType)itemData.ZoneType == OcrZoneType.Omr); UpdateMyControls(); } }
public void SetZone(IOcrPage ocrPage, int zoneIndex, bool isVisible, bool isNameVisible) { _ocrPage = ocrPage; _zoneIndex = zoneIndex; IsVisible = isVisible; mylabel.IsVisible = isNameVisible; if (_ocrPage != null && _zoneIndex >= 0 && _zoneIndex < _ocrPage.Zones.Count) { OcrZone zone = _ocrPage.Zones[_zoneIndex]; if (string.IsNullOrEmpty(zone.Name)) { mylabel.Text = "Zone " + (_zoneIndex + 1).ToString(); } else { mylabel.Text = zone.Name; } if (zone.ZoneType == OcrZoneType.None || zone.ZoneType == OcrZoneType.Graphic || zone.ZoneType == OcrZoneType.Barcode) { RasterColor color = RasterColorConverter.FromColor(Color.FromArgb(32, Color.Yellow)); this.Fill = AnnSolidColorBrush.Create(color.ToString());//Color.FromArgb(32, Color.Yellow) this.Stroke = AnnStroke.Create(AnnSolidColorBrush.Create("Blue"), new LeadLengthD(1)); } else { this.Stroke = AnnStroke.Create(AnnSolidColorBrush.Create("Red"), new LeadLengthD(1)); } } }
static void Main(string[] args) { String fileToConvert = @"FILE PATH HERE"; RasterSupport.SetLicense(@"C:\LEADTOOLS 20\Common\License\LEADTOOLS.LIC", System.IO.File.ReadAllText(@"C:\LEADTOOLS 20\Common\License\LEADTOOLS.LIC.KEY")); using (RasterCodecs codecs = new RasterCodecs()) { using (IOcrEngine ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.LEAD, false)) { ocrEngine.Startup(null, null, null, @"C:\LEADTOOLS 20\Bin\Common\OcrLEADRuntime"); using (IOcrPage ocrPage = ocrEngine.CreatePage(ocrEngine.RasterCodecsInstance.Load(fileToConvert, 1), OcrImageSharingMode.AutoDispose)) { ocrPage.AutoZone(null); ocrPage.Recognize(null); string recognizedCharacters = ocrPage.GetText(-1); BarcodeEngine engine = new BarcodeEngine(); int resolution = 300; using (RasterImage image = RasterImage.Create((int)(8.5 * resolution), (int)(11.0 * resolution), 1, resolution, RasterColor.FromKnownColor(RasterKnownColor.White))) { BarcodeWriter writer = engine.Writer; QRBarcodeData data = BarcodeData.CreateDefaultBarcodeData(BarcodeSymbology.QR) as QRBarcodeData; data.Bounds = new LeadRect(0, 0, image.ImageWidth, image.ImageHeight); QRBarcodeWriteOptions writeOptions = writer.GetDefaultOptions(data.Symbology) as QRBarcodeWriteOptions; writeOptions.XModule = 30; writeOptions.HorizontalAlignment = BarcodeAlignment.Near; writeOptions.VerticalAlignment = BarcodeAlignment.Near; data.Value = recognizedCharacters; writer.CalculateBarcodeDataBounds(new LeadRect(0, 0, image.ImageWidth, image.ImageHeight), image.XResolution, image.YResolution, data, writeOptions); Console.WriteLine("{0} by {1} pixels", data.Bounds.Width, data.Bounds.Height); writer.WriteBarcode(image, data, writeOptions); CropCommand cmd = new CropCommand(new LeadRect(0, 0, data.Bounds.Width, data.Bounds.Height)); cmd.Run(image); codecs.Save(image, "QR.tif", RasterImageFormat.CcittGroup4, 1); if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { var process = new Process(); process.StartInfo = new ProcessStartInfo("QR.tif") { UseShellExecute = true }; process.Start(); } Console.WriteLine(); } } } } }
private static double GetConfidence(IOcrPage page) { var recognizedCharacters = page.GetRecognizedCharacters(); var findZoneCharacters = recognizedCharacters.FindZoneCharacters(0); return findZoneCharacters .DefaultIfEmpty() .Average(character => character.Confidence) / 100D; }
private static double GetConfidence(IOcrPage page) { var recognizedCharacters = page.GetRecognizedCharacters(); var findZoneCharacters = recognizedCharacters.FindZoneCharacters(0); return(findZoneCharacters .DefaultIfEmpty() .Average(character => character.Confidence) / 100D); }
private IOcrPage DoublePass(RasterImage image) { //first pass with default settings IOcrPage page = _ocrEngine.CreatePage(image.Clone(), OcrImageSharingMode.AutoDispose); page.Recognize(null); //second pass with mobile image processing set to true _ocrEngine.SettingManager.SetBooleanValue("Recognition.Preprocess.MobileImagePreprocess", true); IOcrPage mobilePage = _ocrEngine.CreatePage(image.Clone(), OcrImageSharingMode.AutoDispose); mobilePage.Recognize(null); //get the confidence of both pages PageResults firstPassResults = GetPageConfidence(page); PageResults secondPassResults = GetPageConfidence(mobilePage); double confidenceDif = firstPassResults.Confidence - secondPassResults.Confidence; IOcrPage highestConfidence; PageResults pageResultsHighest; if (confidenceDif > 2) { highestConfidence = page; pageResultsHighest = firstPassResults; } else { highestConfidence = mobilePage; pageResultsHighest = secondPassResults; } if (pageResultsHighest.TotalWords < 20) { IOcrPage thirdPass = highestConfidence.Copy(); thirdPass.Unrecognize(); OcrZone singleZone = new OcrZone() { Bounds = new LeadRect(0, 0, image.Width, image.Height) }; thirdPass.Zones.Add(singleZone); thirdPass.Recognize(null); PageResults thirdResults = GetPageConfidence(thirdPass); double confidencetDifThird = thirdResults.Confidence - pageResultsHighest.Confidence; if (confidenceDif > 5) { highestConfidence = thirdPass; pageResultsHighest = thirdResults; } } return(highestConfidence); }
private static LeadRect RestrictZoneBoundsToPage(IOcrPage ocrPage, LeadRect bounds) { if (bounds.IsEmpty) { return(bounds); } LeadRect pageBounds = new LeadRect(0, 0, ocrPage.Width, ocrPage.Height); bounds = LeadRect.Intersect(pageBounds, bounds); return(bounds); }
private OcrZone GetPageZone(IOcrPage ocrPageZones, int zoneID) { foreach (OcrZone zone in ocrPageZones.Zones) { if (zone.Id == zoneID) { return(zone); } } return(new OcrZone()); }
public ZoneAnnotationObject() : base() { _ocrPage = null; _zoneIndex = 0; _cellPen = null; SetId(AnnObject.UserObjectId); mylabel = this.Labels["AnnObjectName"]; mylabel.Background = AnnSolidColorBrush.Create("Black"); mylabel.Foreground = AnnSolidColorBrush.Create("White"); mylabel.RestrictionMode = AnnLabelRestriction.None; mylabel.IsVisible = true; }
public ZonePropertiesDialog(IOcrEngine ocrEngine, IOcrPage ocrPage, OcrMultiEngineDemo.ViewerControl.ViewerControl viewerControl, int selectedZoneIndex) { InitializeComponent(); _ocrEngine = ocrEngine; _ocrPage = ocrPage; _viewerControl = viewerControl; // Initialize the zones list for (int i = 0; i < _ocrPage.Zones.Count; i++) { TreeNode addedZone = _tvZonesList.Nodes.Add(DemosGlobalization.GetResxString(GetType(), "Resx_Zone") + (i + 1).ToString()); addedZone.Tag = i; OcrZoneCell[] cells = null; cells = _ocrPage.Zones.GetZoneCells(_ocrPage.Zones[i]); if (_ocrPage.TableZoneManager != null && cells != null && cells.Length > 0) { for (int j = 0; j < cells.Length; j++) { TreeNode addedCell = addedZone.Nodes.Add(DemosGlobalization.GetResxString(GetType(), "Resx_Cell") + (j + 1).ToString()); addedCell.Tag = cells[j]; } } } if (_tvZonesList.Nodes.Count > 0) { _tvZonesList.SelectedNode = (selectedZoneIndex >= 0) ? _tvZonesList.Nodes[selectedZoneIndex] : _tvZonesList.Nodes[0]; } _updateZonesControl = new OcrMultiEngineDemo.UpdateZonesControl.UpdateZonesControl(_viewerControl); _updateZonesControl.Action += new EventHandler <ActionEventArgs>(_updateZonesControl_Action); _pnlContainer.Controls.Add(_updateZonesControl); _updateCellsControl = new OcrMultiEngineDemo.UpdateCellsControl.UpdateCellsControl(); _pnlContainer.Controls.Add(_updateCellsControl); _pnlContainer.Controls["UpdateCellsControl"].Visible = false; _updateZonesControl.Activate(ocrEngine, ocrPage, _tvZonesList, _ocrPage.Zones); _tvZonesList.Select(); UpdateUIState(); }
/// <summary> /// Uses an instance of the LEADTools OCR Engine to "read" the text /// in the pre-defined OCR region representing the mailing address /// area. Is thread safe. /// </summary> /// <param name="ocrEngine"></param> /// <param name="document"></param> /// <returns></returns> private static string GetAddressBlockText(IOcrEngine ocrEngine, PDFDocument document) { string returnedText = null; using (var codecs = new RasterCodecs()) using (var image = new RasterImage(document.GetPageImage(codecs, 1))) using (IOcrDocument ocrDocument = ocrEngine.DocumentManager.CreateDocument()) using (IOcrPage ocrPage = ocrDocument.Pages.AddPage(image, null)) { var myZone = new OcrZone(); myZone.Bounds = new LogicalRectangle(0, 2, 4, 1.4, LogicalUnit.Inch); ocrPage.Zones.Add(myZone); ocrPage.Recognize(null); returnedText = ocrPage.GetText(0).ToUpper(); } return(returnedText); }
void CleanUp() { if (_ocrPage != null) { _ocrPage.Dispose(); _ocrPage = null; } if (_ocrEngine != null && _ocrEngine.IsStarted) { _ocrEngine.Shutdown(); _ocrEngine.Dispose(); } if (_codecs != null) { _codecs.Dispose(); } }
private void SetDocument(IOcrDocument ocrDocument, string documentFileName) { // Delete the old document if it exists if (_ocrDocument != null) { _ocrDocument.Dispose(); } _lastDocumentFile = documentFileName; _ocrDocument = ocrDocument; _ocrPage = _ocrDocument.Pages[0]; BuildWordLists(); _wordTextBox.Text = string.Empty; SetImage(_ocrPage.GetRasterImage()); UpdateUIState(); }
public static void exe_ocr(string filename, IOcrDocument ocrDocument) { Console.WriteLine(filename); RasterCodecs rasterCodecs = null; RasterImage rasterImage = null; try { rasterCodecs = new RasterCodecs(); rasterCodecs.ThrowExceptionsOnInvalidImages = false; rasterImage = rasterCodecs.Load(filename); AutoBinarizeCommand command = new AutoBinarizeCommand(); command.Run(rasterImage); //AutoLineRemoveCommand commandLine = new AutoLineRemoveCommand(); //commandLine.Run(rasterImage); IOcrPage page = ocrDocument.Pages.AddPage(rasterImage, null); if (page != null) { page.UpdateNativeFillMethod(); page.Recognize(null); } // ocrDocument.Pages.zon(NativeOcrZoneType.AutoGraphic); }catch (Exception e) { // Console.WriteLine("add image faild: " + e); } finally { if (rasterCodecs != null) { rasterCodecs.Dispose(); } if (rasterImage != null) { rasterImage.Dispose(); } } }
private void GetOmrReading(IOcrPage ocrPage, FormField field, ImageField imageField, int retry = 1) { IOcrPageCharacters pageCharacters = ocrPage.GetRecognizedCharacters(); if (pageCharacters == null) { logger.Warn($"could not read OMR for ${field} "); imageField.FieldResult.Confidence = 0; imageField.FieldResult.Text = ""; } else { IOcrZoneCharacters zoneCharacters = pageCharacters[0]; if (zoneCharacters.Count > 0) { OcrCharacter omrCharacter = zoneCharacters[0]; imageField.FieldResult.Text = omrCharacter.Code.ToString(); imageField.FieldResult.IsFilled = omrCharacter.Code == FilledChar; imageField.FieldResult.Confidence = omrCharacter.Confidence; // often on a fill we get the line from the box, so we retry more narrowly if (imageField.FieldResult.IsFilled) { if (retry > 0) { var orgZone = ocrPage.Zones[0]; orgZone.Bounds = ChangeBoundsRatio(orgZone.Bounds, 0.66); ocrPage.Recognize(null); GetOmrReading(ocrPage, field, imageField, 0); logger.Info($"FILLED {field.Name}"); } } } else { imageField.FieldResult.Text = ""; } } }
/// <summary> /// Called by the main form to set the new raster image and OCR page /// </summary> /// <param name="image"></param> /// <param name="ocrPage"></param> public void SetImageAndPage(RasterImage image, IOcrPage ocrPage) { _ocrPage = ocrPage; var options = _rasterImageViewer.AutoResetOptions; // save _rasterImageViewer.AutoResetOptions = ImageViewerAutoResetOptions.None; _rasterImageViewer.Image = image; _rasterImageViewer.AutoResetOptions = options; if (image != null) { AnnContainerMapper saveMapper = _annAutomation.Container.Mapper.Clone(); AnnContainerMapper identityMapper = new AnnContainerMapper(saveMapper.SourceDpiX, saveMapper.SourceDpiY, saveMapper.SourceDpiX, saveMapper.SourceDpiY); identityMapper.UpdateTransform(LeadMatrix.Identity); _annAutomation.Container.Mapper = identityMapper; //Set Container Size if (_annAutomation != null) { _annAutomation.Container.Size = identityMapper.SizeToContainerCoordinates(LeadSizeD.Create(image.ImageWidth, image.ImageHeight)); } _annAutomation.Container.Mapper = saveMapper; // Converts the zones to annotation objects ZonesUpdated(); _rasterImageViewer.ViewBorderThickness = 1; } else { _rasterImageViewer.ViewBorderThickness = 0; } UpdateTitle(); UpdateUIState(); }
private IOcrEngine _ocrEngine = null; // The LEADTOOLS OCR Engine // Use LEADTOOLS to OCR the image and get back text (RTF) private string DoOcr(RasterImage image) { this.Cursor = Cursors.WaitCursor; string temp = System.IO.Path.GetTempFileName(); // temp file for the RTF if (image == null) { return(string.Empty); } // Use Double pass method to get highest confidence page from OCR IOcrPage highestConfidencePage = DoublePass(image); // Create a document and add the page using (var document = _ocrEngine.DocumentManager.CreateDocument(null, OcrCreateDocumentOptions.AutoDeleteFile)) { // Add the page document.Pages.Add(highestConfidencePage); try { // Save as svg document.Save(temp, DocumentFormat.Pdf, null); } catch (Exception ex) { if (File.Exists(temp)) { File.Delete(temp); } throw ex; } } this.Cursor = Cursors.Default; return(temp); }
bool LoadImage(string fileName) { try { RasterImage image = _codecs.Load(fileName); // Add the page first. _ocrPage = _ocrEngine.CreatePage(image, OcrImageSharingMode.None); // Add zones by default. AddZones(false); // Engine will correct the page deskew before AutoZone or Recognize, so we need to load the page again form the engine. _viewer.Image = _ocrPage.GetRasterImage(); _viewer.Refresh(); return(true); } catch (Exception ex) { Messager.ShowError(this, ex); return(false); } }
private void LoadUniqueFieldValueInstanceImages() { string zoneFile, sql, imageFile = "", imagePath; int page = 0, zoneID = 1; if (ocrPageZones != null) { ocrPageZones.Dispose(); ocrPageZones = null; } if (uniqueFieldValueRowNumber >= dataTableUniqueFieldValues.Rows.Count) { return; } OcrZone zone; LeadRect zoneRect; RasterImage pageImage = null, zoneImage; textBoxValue.Text = dataTableUniqueFieldValues.Rows[uniqueFieldValueRowNumber][0].ToString(); sql = "select Page, ZoneID from Field where Batch = '" + BatchName + "' and FieldID = " + fieldIndex.ToString() + " and (ValueAdvantage = '" + textBoxValue.Text.Replace("'", "''") + "') order by Page, Line"; oleDbCommand = new OleDbCommand(sql, oleDbConnection); oleDbDataAdapter = new OleDbDataAdapter(oleDbCommand); dataTableUniqueFieldValueInstances = new DataTable(); oleDbDataAdapter.Fill(dataTableUniqueFieldValueInstances); images = new List <RasterImage>(); foreach (DataRow instanceRow in dataTableUniqueFieldValueInstances.Rows) { zoneID = int.Parse(instanceRow["ZoneID"].ToString()); if (page != int.Parse(instanceRow[0].ToString())) { page = int.Parse(instanceRow[0].ToString()); sql = "select top 1 Srcfile from " + BatchTableName + " where Batch = '" + BatchName + "' and Page = " + page.ToString(); oleDbCommand = new OleDbCommand(sql, oleDbConnection); oleDbDataAdapter = new OleDbDataAdapter(oleDbCommand); DataTable dataTableSrcfile = new System.Data.DataTable(); oleDbDataAdapter.Fill(dataTableSrcfile); if (dataTableSrcfile.Rows.Count > 0) { imageFile = dataTableSrcfile.Rows[0][0].ToString(); imagePath = @"I:\" + BatchName.Split(' ')[0] + @"\" + imageFile; if (imageFile.Split('\\').Length == 3) { imageFile = imageFile.Split('\\')[2]; } else { imageFile = imageFile.Split('\\')[1]; } zoneFile = imagePath.Substring(0, imagePath.LastIndexOf(".")) + ".ozf"; if (File.Exists(zoneFile)) { try { pageImage = codecs.Load(imagePath); if (pageImage.XResolution < 150) { pageImage.XResolution = 150; } if (pageImage.YResolution < 150) { pageImage.YResolution = 150; } ocrPageZones = OcrEngine.CreatePage(pageImage, OcrImageSharingMode.AutoDispose); ocrPageZones.LoadZones(zoneFile); } catch (Exception ex) { if (showedMemoryMessage == false) { Messager.ShowInformation(this, "Could not open " + imagePath + ". " + ex.Message); showedMemoryMessage = true; } pageImage = null; } } } } if (pageImage != null) { zone = GetPageZone(ocrPageZones, zoneID); zoneRect = zone.Bounds.ToRectangle(150, 150); zoneImage = pageImage.Clone(zoneRect); zoneImage.CustomData.Add("Page", page.ToString()); zoneImage.CustomData.Add("ZoneID", instanceRow["ZoneID"].ToString()); zoneImage.CustomData.Add("FieldID", fieldIndex.ToString()); images.Add(zoneImage); } if (images.Count > 59) { break; } } DisplayUniqueFieldValueInstanceImagePage(); }
public void Activate(IOcrEngine ocrEngine, IOcrPage ocrPage, ListBox tvZonesList, IOcrZoneCollection zones) { _ocrEngine = ocrEngine; _ocrPage = ocrPage; _lbZonesList = tvZonesList; _zones = zones; // Initialize the combo boxes OcrZoneType[] zoneTypes = ocrEngine.ZoneManager.GetSupportedZoneTypes(); foreach (OcrZoneType zoneType in zoneTypes) { _typeComboBox.Items.Add(zoneType); } // Get the languages supported by this engine and fill the list box string[] languages = ocrEngine.LanguageManager.GetSupportedLanguages(); string[] additionalLanguages = ocrEngine.LanguageManager.GetAdditionalLanguages(); Dictionary <string, string> languagesDictionary = new Dictionary <string, string>(); string[] friendlyNames = new string[languages.Length + additionalLanguages.Length]; int i = 0; foreach (string language in languages) { friendlyNames[i] = MyLanguage.GetLanguageFriendlyName(language); languagesDictionary.Add(friendlyNames[i], language); i++; } foreach (string language in additionalLanguages) { friendlyNames[i] = MyLanguage.GetLanguageFriendlyName(language); languagesDictionary.Add(friendlyNames[i], language); i++; } Array.Sort(friendlyNames, 1, friendlyNames.Length - 1); MyLanguage ml = new MyLanguage(String.Empty, "None", -1); _languageComboBox.Items.Add(ml); foreach (string friendlyName in friendlyNames) { ml = new MyLanguage(languagesDictionary[friendlyName], friendlyName, -1); _languageComboBox.Items.Add(ml); } List <ViewPerspectiveItem> zoneViewPerspectiveValues = new List <ViewPerspectiveItem>(); zoneViewPerspectiveValues.AddRange(new ViewPerspectiveItem[] { new ViewPerspectiveItem(RasterViewPerspective.TopLeft, "TopLeft"), new ViewPerspectiveItem(RasterViewPerspective.TopLeft90, "TopLeft90"), new ViewPerspectiveItem(RasterViewPerspective.TopLeft180, "TopLeft180"), new ViewPerspectiveItem(RasterViewPerspective.TopLeft270, "TopLeft270") }); _zoneViewPerspectiveComboBox.Items.AddRange(zoneViewPerspectiveValues.ToArray()); List <TextDirectionItem> zoneTextDirectionValues = new List <TextDirectionItem>(); zoneTextDirectionValues.AddRange(new TextDirectionItem[] { new TextDirectionItem(OcrTextDirection.LeftToRight, "LeftToRight"), new TextDirectionItem(OcrTextDirection.TopToBottom, "TopToBottom") }); _zoneTextDirectionComboBox.Items.AddRange(zoneTextDirectionValues.ToArray()); // These events cannot be hooked into from the designer, // so we will do them in here _leftTextBox.GotFocus += new EventHandler(_areaTextBox_GotFocus); _topTextBox.GotFocus += new EventHandler(_areaTextBox_GotFocus); _widthTextBox.GotFocus += new EventHandler(_areaTextBox_GotFocus); _heightTextBox.GotFocus += new EventHandler(_areaTextBox_GotFocus); _leftTextBox.LostFocus += new EventHandler(_areaTextBox_LostFocus); _topTextBox.LostFocus += new EventHandler(_areaTextBox_LostFocus); _widthTextBox.LostFocus += new EventHandler(_areaTextBox_LostFocus); _heightTextBox.LostFocus += new EventHandler(_areaTextBox_LostFocus); _nameTextBox.LostFocus += new EventHandler(_textTextBox_LostFocus); UpdateUIState(); }
public static void Update(int zoneIndex, int wordIndex, string value, IOcrPage ocrPage, List <List <OcrWord> > zoneWords, IOcrPageCharacters ocrPageCharacters) { // Find the zone characters we are looking for // Find the word we are looking for IOcrZoneCharacters zoneCharacters = ocrPageCharacters[zoneIndex]; OcrWord word = zoneWords[zoneIndex][wordIndex]; // OcrCharacter.Bounds does not expect the leading and external leading spaces // used when drawing normal text // First, we need to calculate the size of the original string and then the new // value using the same font. This way, we can calculate the offsets used on the // left and on top so we can find the new word value // We do not support spaces around the word if (value != null) { value = value.Trim(); } // If the value did not change, don't do anything if (value == word.Value) { return; } // Get the first character to use as a template for creating the font OcrCharacter templateCharacter = zoneCharacters[word.FirstCharacterIndex]; float dpiX = ocrPage.DpiX; float dpiY = ocrPage.DpiY; // Use a temporary bitmap object to get its Graphics object using (Bitmap btmp = new Bitmap(1, 1)) { using (Graphics g = Graphics.FromImage(btmp)) { // Do not use anti-aliasing for better calculations g.TextRenderingHint = TextRenderingHint.SingleBitPerPixel; // Create the font used to draw this word using (Font theFont = GetWordFont(templateCharacter, dpiY)) { // Measure the old string and compare against the word bounds reported from // OCR PointF wordPosition = PointF.Empty; float baselineOffset = 0; SizeF oldWordBounds = SizeF.Empty; if (!string.IsNullOrEmpty(word.Value)) { RectangleF ocrWordBounds = Leadtools.Demos.Converters.ConvertRect(word.Bounds.ToRectangle(dpiX, dpiY)); oldWordBounds = g.MeasureString(word.Value, theFont, PointF.Empty, StringFormat.GenericDefault); wordPosition = new PointF(ocrWordBounds.X - (oldWordBounds.Width - ocrWordBounds.Width) / 2, ocrWordBounds.Y - (oldWordBounds.Height - ocrWordBounds.Height) / 2); // Calculate the baseline offset of this font float baselineOffsetPoints = theFont.SizeInPoints / theFont.FontFamily.GetEmHeight(theFont.Style) * theFont.FontFamily.GetCellAscent(theFont.Style); baselineOffset = g.DpiY / 72.0F * baselineOffsetPoints; } // Save the insertion point and the position flags for the last character so we can // re-use it (in case, it has an EndOfLine or EndOfZone flags set) int insertionIndex = word.FirstCharacterIndex; OcrCharacterPosition lastCharacterPosition = zoneCharacters[word.LastCharacterIndex].Position; DeleteWordCharacters(word, zoneCharacters); // Rebuild the zone words zoneWords[zoneIndex].Clear(); zoneWords[zoneIndex].AddRange(zoneCharacters.GetWords((int)dpiX, (int)dpiY, LogicalUnit.Pixel)); if (!string.IsNullOrEmpty(value)) { // Now add the characters of the new word SizeF stringSizeLeft = g.MeasureString(value, theFont, PointF.Empty, StringFormat.GenericDefault); float emSize = theFont.Size * g.DpiY / 72.0F; // The string might have space characters in the middle, we don't want to // add them since most of the OCR engines do not support a space character string[] wordParts = value.Split(new char[] { ' ' }); int wordCharacterIndex = 0; List <OcrCharacter> characters = new List <OcrCharacter>(); foreach (string wordPart in wordParts) { SizeF currentStringSize; // Fix for bug 12953 on FileMaker. if (ocrPage.Document.Engine.EngineType == OcrEngineType.Arabic) { if (stringSizeLeft.Width > oldWordBounds.Width) { wordPosition.X -= Math.Abs(stringSizeLeft.Width - oldWordBounds.Width); } else if (stringSizeLeft.Width < oldWordBounds.Width) { wordPosition.X += Math.Abs(stringSizeLeft.Width - oldWordBounds.Width); } } // Process the characters of this part for (int wordPartCharacterIndex = 0; wordPartCharacterIndex < wordPart.Length; wordPartCharacterIndex++) { // We are going to use a GraphicsPath object to draw character on top // Then use the path GetBounds method to get the exact bounding box we need string characterString = wordPart.Substring(wordPartCharacterIndex, 1); using (GraphicsPath path = new GraphicsPath()) { path.AddString(characterString, theFont.FontFamily, (int)theFont.Style, emSize, wordPosition, StringFormat.GenericDefault); RectangleF bounds = path.GetBounds(); // Build a character and add it OcrCharacter newCharacter = templateCharacter; newCharacter.Code = wordPart[wordPartCharacterIndex]; newCharacter.Bounds = new LogicalRectangle(bounds.Left, bounds.Top, bounds.Width, bounds.Height, LogicalUnit.Pixel); newCharacter.Base = LogicalLength.FromPixels(wordPosition.Y + baselineOffset - bounds.Y); // We will assume this character is not the last one so we clear all the flags newCharacter.Position = OcrCharacterPosition.None; characters.Add(newCharacter); } // Subtract the part of the string we draw from the overall string size so we know the position of the next character currentStringSize = g.MeasureString(value.Substring(wordCharacterIndex + 1), theFont, PointF.Empty, StringFormat.GenericDefault); wordPosition.X += stringSizeLeft.Width - currentStringSize.Width; stringSizeLeft = currentStringSize; wordCharacterIndex++; } // Add EndOfWord to the character we just inserted if (wordCharacterIndex > 0) { OcrCharacter character = characters[characters.Count - 1]; character.Position |= OcrCharacterPosition.EndOfWord; characters[characters.Count - 1] = character; } // Move a space (if any) if (wordCharacterIndex < (value.Length - 1)) { currentStringSize = g.MeasureString(value.Substring(wordCharacterIndex + 1), theFont, PointF.Empty, StringFormat.GenericDefault); wordPosition.X += stringSizeLeft.Width - currentStringSize.Width; stringSizeLeft = currentStringSize; wordCharacterIndex++; } // If this is the last character in the over all word, re-add the original position flags // if any (EndOfLine, EndOfZone, etc) if (wordCharacterIndex == value.Length) { OcrCharacter character = characters[characters.Count - 1]; character.Position |= lastCharacterPosition; characters[characters.Count - 1] = character; } } // Now add these new characters to the zone int index = insertionIndex; foreach (OcrCharacter character in characters) { zoneCharacters.Insert(index++, character); } // Rebuild the zone words zoneWords[zoneIndex].Clear(); zoneWords[zoneIndex].AddRange(zoneCharacters.GetWords((int)dpiX, (int)dpiY, LogicalUnit.Pixel)); } } } } }
public ZoneAnnotationObjectRenderer() : base() { _ocrPage = null; }
private PageResults GetPageConfidence(IOcrPage ocrPage) { IOcrPageCharacters pageCharacters = ocrPage.GetRecognizedCharacters(); double pageConfidence = 0; int certainWords = 0; int totalWords = 0; int totalZoneWords = 0; int textZoneCount = 0; for (int i = 0; i < ocrPage.Zones.Count; i++) { IOcrZoneCharacters zoneCharacters = pageCharacters.FindZoneCharacters(i); if (zoneCharacters.Count == 0) { continue; } textZoneCount++; double zoneConfidence = 0; int characterCount = 0; double wordConfidence = 0; totalZoneWords = 0; bool newWord = true; foreach (var ocrCharacter in zoneCharacters) { if (newWord) { wordConfidence = 0; characterCount = 0; wordConfidence = 1000; } if (ocrCharacter.Confidence < wordConfidence) { wordConfidence = ocrCharacter.Confidence; } characterCount++; if ((ocrCharacter.Position & OcrCharacterPosition.EndOfWord) == OcrCharacterPosition.EndOfWord || (ocrCharacter.Position & OcrCharacterPosition.EndOfLine) == OcrCharacterPosition.EndOfLine) { if (characterCount > 3) { if (ocrCharacter.WordIsCertain) { certainWords++; } totalWords++; totalZoneWords++; zoneConfidence += wordConfidence; } newWord = true; } else { newWord = false; } } if (totalZoneWords > 0) { zoneConfidence /= totalZoneWords; pageConfidence += zoneConfidence; } else { zoneConfidence = 0; pageConfidence += zoneConfidence; } } if (textZoneCount > 0) { pageConfidence /= textZoneCount; } else { pageConfidence = 0; } PageResults results = new PageResults(pageConfidence, certainWords, totalWords); return(results); }
private void LoadImage(bool loadDefaultImage) { ImageFileLoader loader = new ImageFileLoader(); bool bLoaded; loader.OpenDialogInitialPath = _openInitialPath; try { loader.LoadOnlyOnePage = true; if (loadDefaultImage) { if (_ocrEngineType == OcrEngineType.Arabic) bLoaded = loader.Load(this, DemosGlobal.ImagesFolder + @"\ArabicSample.tif", _codecs, 1, -1); else bLoaded = loader.Load(this, DemosGlobal.ImagesFolder + @"\ocr1.tif", _codecs, 1, -1); } else bLoaded = loader.Load(this, _codecs, true) > 0; if (bLoaded) { _openInitialPath = Path.GetDirectoryName(loader.FileName); RasterImage image = loader.Image; if (image.XResolution < 150) image.XResolution = 150; if (image.YResolution < 150) image.YResolution = 150; if (_ocrPage != null) { _ocrPage.Dispose(); _ocrPage = null; } _viewer.Image = image; if (_ocrEngine.IsStarted) _ocrPage = _ocrEngine.CreatePage(image, OcrImageSharingMode.None); _currentHighlightRect = LeadRect.Empty; _recognitionResults.Text = ""; _tsMainZoomComboBox_SelectedIndexChanged(_tsMainZoomComboBox, new EventArgs()); } } catch (Exception ex) { Messager.ShowFileOpenError(this, loader.FileName, ex); } finally { _viewer.Invalidate(); } }
public List <FilledForm> ProcessOcr(ResultsForPrettyJson formResults, List <ImageInfo> fileInfos) { try { var outDir = formResults.OriginalDirectoryName; var retForms = new List <FilledForm>(); var usedMasters = new HashSet <MasterForm>(); Stopwatch stopWatch = new Stopwatch(); stopWatch.Start(); formResults.PagesInPdf = fileInfos.Count; foreach (var ofi in fileInfos) { FilledForm newForm = new FilledForm(); retForms.Add(newForm); newForm.ImageInfoMaster.InitialImage = ofi; newForm.Name = Path.GetFileNameWithoutExtension(ofi.ImageFileInfo.Name); if (ofi.Image == null) { ofi.Image = LoadImageFile(ofi.ImageFileInfo.FullName, 1, -1); } //CleanupImage(ofi.Image); var par = new FormThreadCallParams() { ImageInfo = ofi, StopWatch = stopWatch, Form = newForm }; if (PageTimeoutInSeconds < 50) { Thread t = new Thread(this.PrepareNewFormThreader); t.Start(par); if (!t.Join(TimeSpan.FromSeconds(PageTimeoutInSeconds))) { t.Abort(); formResults.TimedOutPages.Add(newForm.Name); formResults.BestFormConfidence.Add(-1); if (formResults.TimedOutPages.Count > 2 && formResults.PagesMappedToForm == 0) { formResults.Status = $"Form abandoned for timeout after {formResults.BestFormConfidence.Count} pages"; logger.Error(formResults.Status); return(retForms); } continue; } } else { PrepareNewFormThreader(par); } Debug.Assert(par.Attributes != null); var filledFormAttributes = par.Attributes; //List<FormRecognitionResult> results = new List<FormRecognitionResult>(); MasterForm currentMasterBlockForm = null; int bestConfidence = -1; int currentConfidence = 85; foreach (var master in BlockMasterForms) { if (usedMasters.Contains(master)) { continue; } var result = RecognitionEngine.CompareForm(master.Attributes, filledFormAttributes, null, null); //logger.Debug($"Check {master} for {newForm} {stopWatch.ElapsedMilliseconds} {result.Confidence}"); if (result.Confidence > currentConfidence) { currentMasterBlockForm = master; bestConfidence = currentConfidence = result.Confidence; } else if (result.Confidence > bestConfidence) { bestConfidence = result.Confidence; } } formResults.BestFormConfidence.Add(bestConfidence); if (currentMasterBlockForm != null) { formResults.MasterFormPages.Add(currentMasterBlockForm.Properties.Name); formResults.PagesMappedToForm++; logger.Info($"FilledForm matched {newForm.Name} {newForm.Status} {stopWatch.ElapsedMilliseconds} "); newForm.ImageInfoMaster.InitialImage = ofi; var centeredImage = ofi.Image.CloneAll(); CleanupImage(centeredImage); newForm.ImageInfoMaster.CenteredImage = new ImageInfo() { Image = centeredImage }; var omrImage = centeredImage.CloneAll(); PrepareOmrImage(omrImage); newForm.ImageInfoMaster.OmrImage = new ImageInfo() { Image = omrImage }; newForm.Status = "Matched"; newForm.Master = currentMasterBlockForm; var alignment = RecognitionEngine.GetFormAlignment(newForm.Master.Attributes, newForm.Attributes, null); var fields = currentMasterBlockForm.ProcessingPages[0]; var scaler = currentMasterBlockForm.Resolution; var fieldsOnlyImage = RasterImage.Create(centeredImage.Width, centeredImage.Height, centeredImage.BitsPerPixel, 300, RasterColor.White); //fieldsOnlyImage = new RasterImage(RasterMemoryFlags.Conventional, centeredImage.Width, centeredImage.Height, centeredInage.BitsPerPixel, RasterByteOrder.Rgb, RasterViewPerspective.TopLeft, null, null, 0); var subDirField = Path.Combine(outDir, "fields"); var fileNameFieldOnly = Path.Combine(subDirField, newForm.Name + "_fields.jpg"); var googleResultsFile = Path.Combine(subDirField, newForm.Name + "_google.json"); var combined = false; foreach (var field in fields) { var isBlock = field.Name.Contains("block"); var rect200 = alignment[0].AlignRectangle(field.Bounds); scaler = 300; int fudge = isBlock ? 30 : 1; var rect300 = new LeadRect(rect200.Left * 300 / scaler - fudge, rect200.Top * 300 / scaler - fudge, rect200.Width * 300 / scaler + fudge, rect200.Height * 300 / scaler + fudge); try { var imageInfoToUse = newForm.ImageInfoMaster.CenteredImage; var zoneType = OcrZoneType.Text; if (field.GetType() == typeof(OmrFormField)) { imageInfoToUse = newForm.ImageInfoMaster.OmrImage; zoneType = OcrZoneType.Omr; } else if (field.GetType() == typeof(ImageFormField)) { zoneType = OcrZoneType.Graphic; } var image = imageInfoToUse.Image.CloneAll(); var subDir = Path.Combine(outDir, isBlock ? "blocks" : "fields"); var fileName = Path.Combine(subDir, newForm.Name + "_" + field.Name + ".jpg"); var imageField = new ImageField { Field = field, FieldResult = { FieldName = field.Name, IsBlock = isBlock, ImageFile = fileName, Bounds = rect300.ToString(), FieldType = zoneType.ToString(), Error = "None" } }; imageField.Rectangle = new Rectangle(rect300.X, rect300.Y, rect300.Width, rect300.Height); try { EnsurePathExists(subDir); CropCommand command = new CropCommand { Rectangle = rect300 }; command.Run(image); RasterCodecs.Save(image, fileName, RasterImageFormat.Jpeg, bitsPerPixel: 8); if (!isBlock && zoneType == OcrZoneType.Text && !combined) { try { ; var combiner = new CombineCommand(); //combiner.DestinationImage = fieldsOnlyImage; combiner.SourceImage = image.Clone(); combiner.DestinationRectangle = rect300; var regionBounds = image.GetRegionBounds(null); combiner.SourcePoint = new LeadPoint(regionBounds.X, regionBounds.Y); //combiner.Flags = CombineCommandFlags.OperationAdd | CombineCommandFlags.Destination0 | CombineCommandFlags.Source1 | CombineCommandFlags.Destination0 ; combiner.Flags = CombineCommandFlags.OperationOr | CombineCommandFlags.Destination0;; // |CombineFastCommandFlags.OperationAverage; combiner.Run(fieldsOnlyImage); //combined = true; } catch (Exception exCombine) { logger.Error(exCombine, $"error combining field {field.Name} {rect300}"); } } var imageInfo = new ImageInfo() { Image = image, ImageFileInfo = new FileInfo(fileName) }; imageField.ImageInfo = imageInfo; if (!isBlock && zoneType != OcrZoneType.Graphic) { using (IOcrPage ocrPage = OcrEngine.CreatePage(image, OcrImageSharingMode.AutoDispose)) { OcrZone ocrZone = new OcrZone { ZoneType = zoneType, Bounds = new LeadRect(fudge, fudge, image.ImageSize.Width - fudge, image.ImageSize.Height - fudge) }; ocrPage.Zones.Add(ocrZone); ocrPage.Recognize(null); if (zoneType == OcrZoneType.Omr) { if (field.Name.Contains("C2NGVD1929")) { logger.Info(ocrZone.Bounds); } GetOmrReading(ocrPage, field, imageField); } else if (zoneType == OcrZoneType.Text) { var resultsPage = GetPageConfidence(ocrPage); imageField.FieldResult.Confidence = resultsPage.Confidence; char[] crlf = { '\r', '\n' }; imageField.FieldResult.Text = ocrPage.GetText(0).TrimEnd(crlf); } } } logger.Info( $"field {field.Name} {rect300} [{imageField.FieldResult.Text}] confidence: {imageField.FieldResult.Confidence}"); } catch (Exception exField) { logger.Error(exField, $"Error processing {field.Name}"); formResults.FieldsWithError++; imageField.FieldResult.Error = exField.Message; } newForm.ImageFields.Add(imageField); formResults.OcrFields.Add(imageField.FieldResult); formResults.Status = "FormMatched"; } catch (Exception ex) { logger.Error(ex, $"Error on field {field.Name} {rect300}"); newForm.Status = $"Error|Field {field.Name} {rect300}: [{ex.Message}]"; } } RasterCodecs.Save(PrepareOmrImage(fieldsOnlyImage), fileNameFieldOnly, RasterImageFormat.Jpeg, bitsPerPixel: 8); var googleResults = GoogleOcr(fileNameFieldOnly); if (googleResults.Count > 0) { var json = JsonConvert.SerializeObject(googleResults, Formatting.Indented); File.WriteAllText(googleResultsFile, json); MergeGoogleOcr(newForm, googleResults); } usedMasters.Add(currentMasterBlockForm); } else { newForm.Status = "Unmatched|No MasterForm match"; } logger.Info($"FilledForm processed {newForm.Name} {newForm.Status} {stopWatch.ElapsedMilliseconds} "); if (usedMasters.Count == BlockMasterForms.Count) { logger.Info("found all master forms"); break; } } stopWatch.Stop(); return(retForms); } catch (Exception ex) { logger.Error(ex, "Untrapped error found"); return(null); } }
private void LoadImage(bool loadDefaultImage) { ImageFileLoader loader = new ImageFileLoader(); bool bLoaded; loader.OpenDialogInitialPath = _openInitialPath; try { loader.LoadOnlyOnePage = true; if (loadDefaultImage) { if (_ocrEngineType == OcrEngineType.OmniPageArabic) { bLoaded = loader.Load(this, DemosGlobal.ImagesFolder + @"\ArabicSample.tif", _codecs, 1, -1); } else { bLoaded = loader.Load(this, DemosGlobal.ImagesFolder + @"\ocr1.tif", _codecs, 1, -1); } } else { bLoaded = loader.Load(this, _codecs, true) > 0; } if (bLoaded) { _openInitialPath = Path.GetDirectoryName(loader.FileName); RasterImage image = loader.Image; if (image.XResolution < 150) { image.XResolution = 150; } if (image.YResolution < 150) { image.YResolution = 150; } if (_ocrPage != null) { _ocrPage.Dispose(); _ocrPage = null; } _viewer.Image = image; if (_ocrEngine.IsStarted) { _ocrPage = _ocrEngine.CreatePage(image, OcrImageSharingMode.None); } _currentHighlightRect = LeadRect.Empty; _recognitionResults.Text = ""; _tsMainZoomComboBox_SelectedIndexChanged(_tsMainZoomComboBox, new EventArgs()); } } catch (Exception ex) { Messager.ShowFileOpenError(this, loader.FileName, ex); } finally { _viewer.Invalidate(); } }
private void DoLoadAndRecognizeDocument(OcrProgressDialog dlg, Dictionary <string, object> args) { // Perform load and recognize here OcrProgressCallback callback = dlg.OcrProgressCallback; IOcrDocument ocrDocument = null; try { string documentFileName = args["documentFileName"] as string; ocrDocument = _ocrEngine.DocumentManager.CreateDocument("", OcrCreateDocumentOptions.InMemory); IOcrPage ocrPage = null; if (!dlg.IsCanceled) { // If we are not using a progress bar, update the description text if (callback == null) { dlg.UpdateDescription("Loading the document (first page only)..."); } ocrPage = ocrDocument.Pages.AddPage(documentFileName, callback); } if (!dlg.IsCanceled) { // If we are not using a progress bar, update the description text if (callback == null) { dlg.UpdateDescription("Recognizing the page(s) of the document..."); } ocrPage.Recognize(callback); } if (!dlg.IsCanceled) { // We did not cancel, use this document SetDocument(ocrDocument, documentFileName); ocrDocument = null; } } catch (Exception ex) { ShowError(ex); } finally { if (callback == null) { dlg.EndOperation(); } // Clean up if (ocrDocument != null) { ocrDocument.Dispose(); } } }