コード例 #1
0
ファイル: OcrWordUpdater.cs プロジェクト: sakpung/webstudy
        private static void DeleteWordCharacters(OcrWord word, IOcrZoneCharacters zoneCharacters)
        {
            // Merge the position flags of the last character we are deleting with the one from the
            // previous word (if any), so if the word we are deleting has EndOfLine or EndOfZone, we
            // save this info in the previous word
            int firstCharacterIndex = word.FirstCharacterIndex;
            int lastCharacterIndex  = word.LastCharacterIndex;

            if (firstCharacterIndex > 0)
            {
                OcrCharacterPosition position      = zoneCharacters[lastCharacterIndex].Position;
                OcrCharacter         tempCharacter = zoneCharacters[firstCharacterIndex - 1];
                tempCharacter.Position |= position;
                zoneCharacters[firstCharacterIndex - 1] = tempCharacter;
            }

            // Remove the characters
            int toRemoveCount = lastCharacterIndex - firstCharacterIndex + 1;

            while (toRemoveCount > 0)
            {
                zoneCharacters.RemoveAt(firstCharacterIndex);
                toRemoveCount--;
            }
        }
コード例 #2
0
        private void GetOmrReading(IOcrPage ocrPage, FormField field, ImageField imageField, int retry = 1)
        {
            IOcrPageCharacters pageCharacters = ocrPage.GetRecognizedCharacters();

            if (pageCharacters == null)
            {
                logger.Warn($"could not read OMR for ${field} ");
                imageField.FieldResult.Confidence = 0;
                imageField.FieldResult.Text       = "";
            }
            else
            {
                IOcrZoneCharacters zoneCharacters = pageCharacters[0];
                if (zoneCharacters.Count > 0)
                {
                    OcrCharacter omrCharacter = zoneCharacters[0];
                    imageField.FieldResult.Text       = omrCharacter.Code.ToString();
                    imageField.FieldResult.IsFilled   = omrCharacter.Code == FilledChar;
                    imageField.FieldResult.Confidence = omrCharacter.Confidence;
                    // often on a fill we get the line from the box, so we retry more narrowly
                    if (imageField.FieldResult.IsFilled)
                    {
                        if (retry > 0)
                        {
                            var orgZone = ocrPage.Zones[0];
                            orgZone.Bounds = ChangeBoundsRatio(orgZone.Bounds, 0.66);
                            ocrPage.Recognize(null);
                            GetOmrReading(ocrPage, field, imageField, 0);
                            logger.Info($"FILLED {field.Name}");
                        }
                    }
                }
                else
                {
                    imageField.FieldResult.Text = "";
                }
            }
        }
コード例 #3
0
        public void ZoneToControls(int index)
        {
            // Fill the controls from the current zone
            if (index != -1)
            {
                OcrZone zone = _zones[index];
                _nameTextBox.Text = zone.Name;

                // Convert the bounds to pixels
                LeadRect bounds = zone.Bounds;
                _leftTextBox.Text   = bounds.X.ToString();
                _topTextBox.Text    = bounds.Y.ToString();
                _widthTextBox.Text  = bounds.Width.ToString();
                _heightTextBox.Text = bounds.Height.ToString();

                // Disable these events when changing the combo boxes selected items once the "UpdateZonesControl" gets activated
                this._typeComboBox.SelectedIndexChanged                -= new System.EventHandler(this._propertiesComboBox_SelectedIndexChanged);
                this._languageComboBox.SelectedIndexChanged            -= new System.EventHandler(this._propertiesComboBox_SelectedIndexChanged);
                this._zoneViewPerspectiveComboBox.SelectedIndexChanged -= new System.EventHandler(this._propertiesComboBox_SelectedIndexChanged);
                this._zoneTextDirectionComboBox.SelectedIndexChanged   -= new System.EventHandler(this._propertiesComboBox_SelectedIndexChanged);

                _typeComboBox.SelectedItem = zone.ZoneType;

                for (int i = 0; i < _languageComboBox.Items.Count; i++)
                {
                    MyLanguage ml = (MyLanguage)_languageComboBox.Items[i];
                    if (zone.Language == null || zone.Language == String.Empty)
                    {
                        if (ml.Language == String.Empty)
                        {
                            _languageComboBox.SelectedItem = ml;
                            break;
                        }
                    }
                    else
                    {
                        if (ml.Language == zone.Language)
                        {
                            _languageComboBox.SelectedItem = ml;
                            break;
                        }
                    }
                }

                _zoneViewPerspectiveComboBox.SelectedIndex = 0;
                foreach (ViewPerspectiveItem item in _zoneViewPerspectiveComboBox.Items)
                {
                    if (item.ViewPerspective == zone.ViewPerspective)
                    {
                        _zoneViewPerspectiveComboBox.SelectedItem = item;
                        break;
                    }
                }

                _zoneTextDirectionComboBox.SelectedIndex = 0;
                foreach (TextDirectionItem item in _zoneTextDirectionComboBox.Items)
                {
                    if (item.TextDirection == zone.TextDirection)
                    {
                        _zoneTextDirectionComboBox.SelectedItem = item;
                        break;
                    }
                }

                this._typeComboBox.SelectedIndexChanged                += new System.EventHandler(this._propertiesComboBox_SelectedIndexChanged);
                this._languageComboBox.SelectedIndexChanged            += new System.EventHandler(this._propertiesComboBox_SelectedIndexChanged);
                this._zoneViewPerspectiveComboBox.SelectedIndexChanged += new System.EventHandler(this._propertiesComboBox_SelectedIndexChanged);
                this._zoneTextDirectionComboBox.SelectedIndexChanged   += new System.EventHandler(this._propertiesComboBox_SelectedIndexChanged);

                if (zone.ZoneType == OcrZoneType.Omr)
                {
                    StringBuilder sb = new StringBuilder();

                    if (!_ocrPage.IsRecognized)
                    {
                        sb.Append("Unfilled (0% certain)");
                    }
                    else
                    {
                        IOcrPageCharacters pageCharacters = _ocrPage.GetRecognizedCharacters();
                        if (pageCharacters == null || pageCharacters.Count == 0 || zone.Id >= pageCharacters.Count)
                        {
                            sb.Append("Unfilled (0% certain)");
                        }
                        else
                        {
                            IOcrZoneCharacters zoneCharacters = pageCharacters[zone.Id];
                            if (zoneCharacters.Count > 0)
                            {
                                OcrCharacter omrCharacter = zoneCharacters[0];
                                char         filledChar   = _ocrEngine.ZoneManager.OmrOptions.GetStateRecognitionCharacter(OcrOmrZoneState.Filled);
                                char         unfilledChar = _ocrEngine.ZoneManager.OmrOptions.GetStateRecognitionCharacter(OcrOmrZoneState.Unfilled);
                                if (omrCharacter.Code == filledChar)
                                {
                                    sb.Append("Filled");
                                }
                                else
                                {
                                    sb.Append("Unfilled");
                                }

                                sb.AppendFormat(" ({0}% certain)", omrCharacter.Confidence);
                            }
                            else
                            {
                                sb.AppendFormat("Unfilled (0% certain)");
                            }
                        }
                    }

                    _omrStatusLabel.Text = sb.ToString();
                }
                else
                {
                    _omrStatusLabel.Text = string.Empty;
                }

                if ((zone.CharacterFilters & OcrZoneCharacterFilters.Digit) == OcrZoneCharacterFilters.Digit)
                {
                    _digitCheckBox.Checked = true;
                }
                else
                {
                    _digitCheckBox.Checked = false;
                }

                if ((zone.CharacterFilters & OcrZoneCharacterFilters.Plus) == OcrZoneCharacterFilters.Plus)
                {
                    _plusCheckBox.Checked = true;
                }
                else
                {
                    _plusCheckBox.Checked = false;
                }
            }
            else
            {
                _nameTextBox.Text = string.Empty;

                _leftTextBox.Text   = string.Empty;
                _topTextBox.Text    = string.Empty;
                _widthTextBox.Text  = string.Empty;
                _heightTextBox.Text = string.Empty;

                _typeComboBox.SelectedIndex                = 0;
                _languageComboBox.SelectedIndex            = 0;
                _zoneViewPerspectiveComboBox.SelectedIndex = 0;
                _zoneTextDirectionComboBox.SelectedIndex   = 0;
                _omrStatusLabel.Text = string.Empty;

                _digitCheckBox.Checked = false;
                _plusCheckBox.Checked  = false;
            }
        }
コード例 #4
0
ファイル: OcrWordUpdater.cs プロジェクト: sakpung/webstudy
        public static void Update(int zoneIndex, int wordIndex, string value, IOcrPage ocrPage, List <List <OcrWord> > zoneWords, IOcrPageCharacters ocrPageCharacters)
        {
            // Find the zone characters we are looking for
            // Find the word we are looking for
            IOcrZoneCharacters zoneCharacters = ocrPageCharacters[zoneIndex];
            OcrWord            word           = zoneWords[zoneIndex][wordIndex];

            // OcrCharacter.Bounds does not expect the leading and external leading spaces
            // used when drawing normal text

            // First, we need to calculate the size of the original string and then the new
            // value using the same font. This way, we can calculate the offsets used on the
            // left and on top so we can find the new word value

            // We do not support spaces around the word
            if (value != null)
            {
                value = value.Trim();
            }

            // If the value did not change, don't do anything
            if (value == word.Value)
            {
                return;
            }

            // Get the first character to use as a template for creating the font
            OcrCharacter templateCharacter = zoneCharacters[word.FirstCharacterIndex];

            float dpiX = ocrPage.DpiX;
            float dpiY = ocrPage.DpiY;

            // Use a temporary bitmap object to get its Graphics object
            using (Bitmap btmp = new Bitmap(1, 1))
            {
                using (Graphics g = Graphics.FromImage(btmp))
                {
                    // Do not use anti-aliasing for better calculations
                    g.TextRenderingHint = TextRenderingHint.SingleBitPerPixel;

                    // Create the font used to draw this word
                    using (Font theFont = GetWordFont(templateCharacter, dpiY))
                    {
                        // Measure the old string and compare against the word bounds reported from
                        // OCR

                        PointF wordPosition   = PointF.Empty;
                        float  baselineOffset = 0;

                        SizeF oldWordBounds = SizeF.Empty;
                        if (!string.IsNullOrEmpty(word.Value))
                        {
                            RectangleF ocrWordBounds = Leadtools.Demos.Converters.ConvertRect(word.Bounds.ToRectangle(dpiX, dpiY));
                            oldWordBounds = g.MeasureString(word.Value, theFont, PointF.Empty, StringFormat.GenericDefault);

                            wordPosition = new PointF(ocrWordBounds.X - (oldWordBounds.Width - ocrWordBounds.Width) / 2, ocrWordBounds.Y - (oldWordBounds.Height - ocrWordBounds.Height) / 2);

                            // Calculate the baseline offset of this font
                            float baselineOffsetPoints = theFont.SizeInPoints / theFont.FontFamily.GetEmHeight(theFont.Style) * theFont.FontFamily.GetCellAscent(theFont.Style);
                            baselineOffset = g.DpiY / 72.0F * baselineOffsetPoints;
                        }

                        // Save the insertion point and the position flags for the last character so we can
                        // re-use it (in case, it has an EndOfLine or EndOfZone flags set)
                        int insertionIndex = word.FirstCharacterIndex;
                        OcrCharacterPosition lastCharacterPosition = zoneCharacters[word.LastCharacterIndex].Position;
                        DeleteWordCharacters(word, zoneCharacters);

                        // Rebuild the zone words
                        zoneWords[zoneIndex].Clear();
                        zoneWords[zoneIndex].AddRange(zoneCharacters.GetWords((int)dpiX, (int)dpiY, LogicalUnit.Pixel));

                        if (!string.IsNullOrEmpty(value))
                        {
                            // Now add the characters of the new word
                            SizeF stringSizeLeft = g.MeasureString(value, theFont, PointF.Empty, StringFormat.GenericDefault);
                            float emSize         = theFont.Size * g.DpiY / 72.0F;

                            // The string might have space characters in the middle, we don't want to
                            // add them since most of the OCR engines do not support a space character
                            string[] wordParts          = value.Split(new char[] { ' ' });
                            int      wordCharacterIndex = 0;

                            List <OcrCharacter> characters = new List <OcrCharacter>();

                            foreach (string wordPart in wordParts)
                            {
                                SizeF currentStringSize;
                                // Fix for bug 12953 on FileMaker.
                                if (ocrPage.Document.Engine.EngineType == OcrEngineType.Arabic)
                                {
                                    if (stringSizeLeft.Width > oldWordBounds.Width)
                                    {
                                        wordPosition.X -= Math.Abs(stringSizeLeft.Width - oldWordBounds.Width);
                                    }
                                    else if (stringSizeLeft.Width < oldWordBounds.Width)
                                    {
                                        wordPosition.X += Math.Abs(stringSizeLeft.Width - oldWordBounds.Width);
                                    }
                                }

                                // Process the characters of this part
                                for (int wordPartCharacterIndex = 0; wordPartCharacterIndex < wordPart.Length; wordPartCharacterIndex++)
                                {
                                    // We are going to use a GraphicsPath object to draw character on top
                                    // Then use the path GetBounds method to get the exact bounding box we need

                                    string characterString = wordPart.Substring(wordPartCharacterIndex, 1);

                                    using (GraphicsPath path = new GraphicsPath())
                                    {
                                        path.AddString(characterString, theFont.FontFamily, (int)theFont.Style, emSize, wordPosition, StringFormat.GenericDefault);

                                        RectangleF bounds = path.GetBounds();

                                        // Build a character and add it
                                        OcrCharacter newCharacter = templateCharacter;
                                        newCharacter.Code   = wordPart[wordPartCharacterIndex];
                                        newCharacter.Bounds = new LogicalRectangle(bounds.Left, bounds.Top, bounds.Width, bounds.Height, LogicalUnit.Pixel);
                                        newCharacter.Base   = LogicalLength.FromPixels(wordPosition.Y + baselineOffset - bounds.Y);

                                        // We will assume this character is not the last one so we clear all the flags
                                        newCharacter.Position = OcrCharacterPosition.None;

                                        characters.Add(newCharacter);
                                    }

                                    // Subtract the part of the string we draw from the overall string size so we know the position of the next character
                                    currentStringSize = g.MeasureString(value.Substring(wordCharacterIndex + 1), theFont, PointF.Empty, StringFormat.GenericDefault);
                                    wordPosition.X   += stringSizeLeft.Width - currentStringSize.Width;
                                    stringSizeLeft    = currentStringSize;
                                    wordCharacterIndex++;
                                }

                                // Add EndOfWord to the character we just inserted
                                if (wordCharacterIndex > 0)
                                {
                                    OcrCharacter character = characters[characters.Count - 1];
                                    character.Position |= OcrCharacterPosition.EndOfWord;
                                    characters[characters.Count - 1] = character;
                                }

                                // Move a space (if any)
                                if (wordCharacterIndex < (value.Length - 1))
                                {
                                    currentStringSize = g.MeasureString(value.Substring(wordCharacterIndex + 1), theFont, PointF.Empty, StringFormat.GenericDefault);
                                    wordPosition.X   += stringSizeLeft.Width - currentStringSize.Width;
                                    stringSizeLeft    = currentStringSize;
                                    wordCharacterIndex++;
                                }

                                // If this is the last character in the over all word, re-add the original position flags
                                // if any (EndOfLine, EndOfZone, etc)
                                if (wordCharacterIndex == value.Length)
                                {
                                    OcrCharacter character = characters[characters.Count - 1];
                                    character.Position |= lastCharacterPosition;
                                    characters[characters.Count - 1] = character;
                                }
                            }

                            // Now add these new characters to the zone
                            int index = insertionIndex;
                            foreach (OcrCharacter character in characters)
                            {
                                zoneCharacters.Insert(index++, character);
                            }

                            // Rebuild the zone words
                            zoneWords[zoneIndex].Clear();
                            zoneWords[zoneIndex].AddRange(zoneCharacters.GetWords((int)dpiX, (int)dpiY, LogicalUnit.Pixel));
                        }
                    }
                }
            }
        }
コード例 #5
0
        private PageResults GetPageConfidence(IOcrPage ocrPage)
        {
            IOcrPageCharacters pageCharacters = ocrPage.GetRecognizedCharacters();
            double             pageConfidence = 0;
            int certainWords   = 0;
            int totalWords     = 0;
            int totalZoneWords = 0;
            int textZoneCount  = 0;

            for (int i = 0; i < ocrPage.Zones.Count; i++)
            {
                IOcrZoneCharacters zoneCharacters = pageCharacters.FindZoneCharacters(i);
                if (zoneCharacters.Count == 0)
                {
                    continue;
                }

                textZoneCount++;
                double zoneConfidence = 0;
                int    characterCount = 0;
                double wordConfidence = 0;
                totalZoneWords = 0;
                bool newWord = true;
                foreach (var ocrCharacter in zoneCharacters)
                {
                    if (newWord)
                    {
                        wordConfidence = 0;
                        characterCount = 0;
                        wordConfidence = 1000;
                    }
                    if (ocrCharacter.Confidence < wordConfidence)
                    {
                        wordConfidence = ocrCharacter.Confidence;
                    }
                    characterCount++;

                    if ((ocrCharacter.Position & OcrCharacterPosition.EndOfWord) == OcrCharacterPosition.EndOfWord || (ocrCharacter.Position & OcrCharacterPosition.EndOfLine) == OcrCharacterPosition.EndOfLine)
                    {
                        if (characterCount > 3)
                        {
                            if (ocrCharacter.WordIsCertain)
                            {
                                certainWords++;
                            }
                            totalWords++;
                            totalZoneWords++;
                            zoneConfidence += wordConfidence;
                        }

                        newWord = true;
                    }
                    else
                    {
                        newWord = false;
                    }
                }

                if (totalZoneWords > 0)
                {
                    zoneConfidence /= totalZoneWords;
                    pageConfidence += zoneConfidence;
                }
                else
                {
                    zoneConfidence  = 0;
                    pageConfidence += zoneConfidence;
                }
            }
            if (textZoneCount > 0)
            {
                pageConfidence /= textZoneCount;
            }
            else
            {
                pageConfidence = 0;
            }

            PageResults results = new PageResults(pageConfidence, certainWords, totalWords);

            return(results);
        }