private void _pagesListBox_SelectedIndexChanged(object sender, EventArgs e) { // Get the recognized words of the selected page _wordsListBox.Items.Clear(); IOcrPage ocrPage = _ocrDocument.Pages[_pagesListBox.SelectedIndex]; if (ocrPage.IsRecognized) { IOcrPageCharacters pageCharacters = ocrPage.GetRecognizedCharacters(); if (pageCharacters == null) { return; } foreach (IOcrZoneCharacters zoneCharacters in pageCharacters) { ICollection <OcrWord> words = zoneCharacters.GetWords(); foreach (OcrWord word in words) { _wordsListBox.Items.Add(word.Value); } } } }
private void BuildWordLists() { _ocrPageCharacters = _ocrPage.GetRecognizedCharacters(); _ocrZoneWords = new List <List <OcrWord> >(); // Build the words foreach (IOcrZoneCharacters zoneCharacters in _ocrPageCharacters) { List <OcrWord> words = new List <OcrWord>(); words.AddRange(zoneCharacters.GetWords()); _ocrZoneWords.Add(words); } _selectedZoneIndex = -1; _selectedWordIndex = -1; }
private void GetOmrReading(IOcrPage ocrPage, FormField field, ImageField imageField, int retry = 1) { IOcrPageCharacters pageCharacters = ocrPage.GetRecognizedCharacters(); if (pageCharacters == null) { logger.Warn($"could not read OMR for ${field} "); imageField.FieldResult.Confidence = 0; imageField.FieldResult.Text = ""; } else { IOcrZoneCharacters zoneCharacters = pageCharacters[0]; if (zoneCharacters.Count > 0) { OcrCharacter omrCharacter = zoneCharacters[0]; imageField.FieldResult.Text = omrCharacter.Code.ToString(); imageField.FieldResult.IsFilled = omrCharacter.Code == FilledChar; imageField.FieldResult.Confidence = omrCharacter.Confidence; // often on a fill we get the line from the box, so we retry more narrowly if (imageField.FieldResult.IsFilled) { if (retry > 0) { var orgZone = ocrPage.Zones[0]; orgZone.Bounds = ChangeBoundsRatio(orgZone.Bounds, 0.66); ocrPage.Recognize(null); GetOmrReading(ocrPage, field, imageField, 0); logger.Info($"FILLED {field.Name}"); } } } else { imageField.FieldResult.Text = ""; } } }
public void ZoneToControls(int index) { // Fill the controls from the current zone if (index != -1) { OcrZone zone = _zones[index]; _nameTextBox.Text = zone.Name; // Convert the bounds to pixels LeadRect bounds = zone.Bounds; _leftTextBox.Text = bounds.X.ToString(); _topTextBox.Text = bounds.Y.ToString(); _widthTextBox.Text = bounds.Width.ToString(); _heightTextBox.Text = bounds.Height.ToString(); // Disable these events when changing the combo boxes selected items once the "UpdateZonesControl" gets activated this._typeComboBox.SelectedIndexChanged -= new System.EventHandler(this._propertiesComboBox_SelectedIndexChanged); this._languageComboBox.SelectedIndexChanged -= new System.EventHandler(this._propertiesComboBox_SelectedIndexChanged); this._zoneViewPerspectiveComboBox.SelectedIndexChanged -= new System.EventHandler(this._propertiesComboBox_SelectedIndexChanged); this._zoneTextDirectionComboBox.SelectedIndexChanged -= new System.EventHandler(this._propertiesComboBox_SelectedIndexChanged); _typeComboBox.SelectedItem = zone.ZoneType; for (int i = 0; i < _languageComboBox.Items.Count; i++) { MyLanguage ml = (MyLanguage)_languageComboBox.Items[i]; if (zone.Language == null || zone.Language == String.Empty) { if (ml.Language == String.Empty) { _languageComboBox.SelectedItem = ml; break; } } else { if (ml.Language == zone.Language) { _languageComboBox.SelectedItem = ml; break; } } } _zoneViewPerspectiveComboBox.SelectedIndex = 0; foreach (ViewPerspectiveItem item in _zoneViewPerspectiveComboBox.Items) { if (item.ViewPerspective == zone.ViewPerspective) { _zoneViewPerspectiveComboBox.SelectedItem = item; break; } } _zoneTextDirectionComboBox.SelectedIndex = 0; foreach (TextDirectionItem item in _zoneTextDirectionComboBox.Items) { if (item.TextDirection == zone.TextDirection) { _zoneTextDirectionComboBox.SelectedItem = item; break; } } this._typeComboBox.SelectedIndexChanged += new System.EventHandler(this._propertiesComboBox_SelectedIndexChanged); this._languageComboBox.SelectedIndexChanged += new System.EventHandler(this._propertiesComboBox_SelectedIndexChanged); this._zoneViewPerspectiveComboBox.SelectedIndexChanged += new System.EventHandler(this._propertiesComboBox_SelectedIndexChanged); this._zoneTextDirectionComboBox.SelectedIndexChanged += new System.EventHandler(this._propertiesComboBox_SelectedIndexChanged); if (zone.ZoneType == OcrZoneType.Omr) { StringBuilder sb = new StringBuilder(); if (!_ocrPage.IsRecognized) { sb.Append("Unfilled (0% certain)"); } else { IOcrPageCharacters pageCharacters = _ocrPage.GetRecognizedCharacters(); if (pageCharacters == null || pageCharacters.Count == 0 || zone.Id >= pageCharacters.Count) { sb.Append("Unfilled (0% certain)"); } else { IOcrZoneCharacters zoneCharacters = pageCharacters[zone.Id]; if (zoneCharacters.Count > 0) { OcrCharacter omrCharacter = zoneCharacters[0]; char filledChar = _ocrEngine.ZoneManager.OmrOptions.GetStateRecognitionCharacter(OcrOmrZoneState.Filled); char unfilledChar = _ocrEngine.ZoneManager.OmrOptions.GetStateRecognitionCharacter(OcrOmrZoneState.Unfilled); if (omrCharacter.Code == filledChar) { sb.Append("Filled"); } else { sb.Append("Unfilled"); } sb.AppendFormat(" ({0}% certain)", omrCharacter.Confidence); } else { sb.AppendFormat("Unfilled (0% certain)"); } } } _omrStatusLabel.Text = sb.ToString(); } else { _omrStatusLabel.Text = string.Empty; } if ((zone.CharacterFilters & OcrZoneCharacterFilters.Digit) == OcrZoneCharacterFilters.Digit) { _digitCheckBox.Checked = true; } else { _digitCheckBox.Checked = false; } if ((zone.CharacterFilters & OcrZoneCharacterFilters.Plus) == OcrZoneCharacterFilters.Plus) { _plusCheckBox.Checked = true; } else { _plusCheckBox.Checked = false; } } else { _nameTextBox.Text = string.Empty; _leftTextBox.Text = string.Empty; _topTextBox.Text = string.Empty; _widthTextBox.Text = string.Empty; _heightTextBox.Text = string.Empty; _typeComboBox.SelectedIndex = 0; _languageComboBox.SelectedIndex = 0; _zoneViewPerspectiveComboBox.SelectedIndex = 0; _zoneTextDirectionComboBox.SelectedIndex = 0; _omrStatusLabel.Text = string.Empty; _digitCheckBox.Checked = false; _plusCheckBox.Checked = false; } }
public static void Update(int zoneIndex, int wordIndex, string value, IOcrPage ocrPage, List <List <OcrWord> > zoneWords, IOcrPageCharacters ocrPageCharacters) { // Find the zone characters we are looking for // Find the word we are looking for IOcrZoneCharacters zoneCharacters = ocrPageCharacters[zoneIndex]; OcrWord word = zoneWords[zoneIndex][wordIndex]; // OcrCharacter.Bounds does not expect the leading and external leading spaces // used when drawing normal text // First, we need to calculate the size of the original string and then the new // value using the same font. This way, we can calculate the offsets used on the // left and on top so we can find the new word value // We do not support spaces around the word if (value != null) { value = value.Trim(); } // If the value did not change, don't do anything if (value == word.Value) { return; } // Get the first character to use as a template for creating the font OcrCharacter templateCharacter = zoneCharacters[word.FirstCharacterIndex]; float dpiX = ocrPage.DpiX; float dpiY = ocrPage.DpiY; // Use a temporary bitmap object to get its Graphics object using (Bitmap btmp = new Bitmap(1, 1)) { using (Graphics g = Graphics.FromImage(btmp)) { // Do not use anti-aliasing for better calculations g.TextRenderingHint = TextRenderingHint.SingleBitPerPixel; // Create the font used to draw this word using (Font theFont = GetWordFont(templateCharacter, dpiY)) { // Measure the old string and compare against the word bounds reported from // OCR PointF wordPosition = PointF.Empty; float baselineOffset = 0; SizeF oldWordBounds = SizeF.Empty; if (!string.IsNullOrEmpty(word.Value)) { RectangleF ocrWordBounds = Leadtools.Demos.Converters.ConvertRect(word.Bounds.ToRectangle(dpiX, dpiY)); oldWordBounds = g.MeasureString(word.Value, theFont, PointF.Empty, StringFormat.GenericDefault); wordPosition = new PointF(ocrWordBounds.X - (oldWordBounds.Width - ocrWordBounds.Width) / 2, ocrWordBounds.Y - (oldWordBounds.Height - ocrWordBounds.Height) / 2); // Calculate the baseline offset of this font float baselineOffsetPoints = theFont.SizeInPoints / theFont.FontFamily.GetEmHeight(theFont.Style) * theFont.FontFamily.GetCellAscent(theFont.Style); baselineOffset = g.DpiY / 72.0F * baselineOffsetPoints; } // Save the insertion point and the position flags for the last character so we can // re-use it (in case, it has an EndOfLine or EndOfZone flags set) int insertionIndex = word.FirstCharacterIndex; OcrCharacterPosition lastCharacterPosition = zoneCharacters[word.LastCharacterIndex].Position; DeleteWordCharacters(word, zoneCharacters); // Rebuild the zone words zoneWords[zoneIndex].Clear(); zoneWords[zoneIndex].AddRange(zoneCharacters.GetWords((int)dpiX, (int)dpiY, LogicalUnit.Pixel)); if (!string.IsNullOrEmpty(value)) { // Now add the characters of the new word SizeF stringSizeLeft = g.MeasureString(value, theFont, PointF.Empty, StringFormat.GenericDefault); float emSize = theFont.Size * g.DpiY / 72.0F; // The string might have space characters in the middle, we don't want to // add them since most of the OCR engines do not support a space character string[] wordParts = value.Split(new char[] { ' ' }); int wordCharacterIndex = 0; List <OcrCharacter> characters = new List <OcrCharacter>(); foreach (string wordPart in wordParts) { SizeF currentStringSize; // Fix for bug 12953 on FileMaker. if (ocrPage.Document.Engine.EngineType == OcrEngineType.Arabic) { if (stringSizeLeft.Width > oldWordBounds.Width) { wordPosition.X -= Math.Abs(stringSizeLeft.Width - oldWordBounds.Width); } else if (stringSizeLeft.Width < oldWordBounds.Width) { wordPosition.X += Math.Abs(stringSizeLeft.Width - oldWordBounds.Width); } } // Process the characters of this part for (int wordPartCharacterIndex = 0; wordPartCharacterIndex < wordPart.Length; wordPartCharacterIndex++) { // We are going to use a GraphicsPath object to draw character on top // Then use the path GetBounds method to get the exact bounding box we need string characterString = wordPart.Substring(wordPartCharacterIndex, 1); using (GraphicsPath path = new GraphicsPath()) { path.AddString(characterString, theFont.FontFamily, (int)theFont.Style, emSize, wordPosition, StringFormat.GenericDefault); RectangleF bounds = path.GetBounds(); // Build a character and add it OcrCharacter newCharacter = templateCharacter; newCharacter.Code = wordPart[wordPartCharacterIndex]; newCharacter.Bounds = new LogicalRectangle(bounds.Left, bounds.Top, bounds.Width, bounds.Height, LogicalUnit.Pixel); newCharacter.Base = LogicalLength.FromPixels(wordPosition.Y + baselineOffset - bounds.Y); // We will assume this character is not the last one so we clear all the flags newCharacter.Position = OcrCharacterPosition.None; characters.Add(newCharacter); } // Subtract the part of the string we draw from the overall string size so we know the position of the next character currentStringSize = g.MeasureString(value.Substring(wordCharacterIndex + 1), theFont, PointF.Empty, StringFormat.GenericDefault); wordPosition.X += stringSizeLeft.Width - currentStringSize.Width; stringSizeLeft = currentStringSize; wordCharacterIndex++; } // Add EndOfWord to the character we just inserted if (wordCharacterIndex > 0) { OcrCharacter character = characters[characters.Count - 1]; character.Position |= OcrCharacterPosition.EndOfWord; characters[characters.Count - 1] = character; } // Move a space (if any) if (wordCharacterIndex < (value.Length - 1)) { currentStringSize = g.MeasureString(value.Substring(wordCharacterIndex + 1), theFont, PointF.Empty, StringFormat.GenericDefault); wordPosition.X += stringSizeLeft.Width - currentStringSize.Width; stringSizeLeft = currentStringSize; wordCharacterIndex++; } // If this is the last character in the over all word, re-add the original position flags // if any (EndOfLine, EndOfZone, etc) if (wordCharacterIndex == value.Length) { OcrCharacter character = characters[characters.Count - 1]; character.Position |= lastCharacterPosition; characters[characters.Count - 1] = character; } } // Now add these new characters to the zone int index = insertionIndex; foreach (OcrCharacter character in characters) { zoneCharacters.Insert(index++, character); } // Rebuild the zone words zoneWords[zoneIndex].Clear(); zoneWords[zoneIndex].AddRange(zoneCharacters.GetWords((int)dpiX, (int)dpiY, LogicalUnit.Pixel)); } } } } }
private PageResults GetPageConfidence(IOcrPage ocrPage) { IOcrPageCharacters pageCharacters = ocrPage.GetRecognizedCharacters(); double pageConfidence = 0; int certainWords = 0; int totalWords = 0; int totalZoneWords = 0; int textZoneCount = 0; for (int i = 0; i < ocrPage.Zones.Count; i++) { IOcrZoneCharacters zoneCharacters = pageCharacters.FindZoneCharacters(i); if (zoneCharacters.Count == 0) { continue; } textZoneCount++; double zoneConfidence = 0; int characterCount = 0; double wordConfidence = 0; totalZoneWords = 0; bool newWord = true; foreach (var ocrCharacter in zoneCharacters) { if (newWord) { wordConfidence = 0; characterCount = 0; wordConfidence = 1000; } if (ocrCharacter.Confidence < wordConfidence) { wordConfidence = ocrCharacter.Confidence; } characterCount++; if ((ocrCharacter.Position & OcrCharacterPosition.EndOfWord) == OcrCharacterPosition.EndOfWord || (ocrCharacter.Position & OcrCharacterPosition.EndOfLine) == OcrCharacterPosition.EndOfLine) { if (characterCount > 3) { if (ocrCharacter.WordIsCertain) { certainWords++; } totalWords++; totalZoneWords++; zoneConfidence += wordConfidence; } newWord = true; } else { newWord = false; } } if (totalZoneWords > 0) { zoneConfidence /= totalZoneWords; pageConfidence += zoneConfidence; } else { zoneConfidence = 0; pageConfidence += zoneConfidence; } } if (textZoneCount > 0) { pageConfidence /= textZoneCount; } else { pageConfidence = 0; } PageResults results = new PageResults(pageConfidence, certainWords, totalWords); return(results); }