コード例 #1
0
ファイル: OcrWordUpdater.cs プロジェクト: sakpung/webstudy
        private static void DeleteWordCharacters(OcrWord word, IOcrZoneCharacters zoneCharacters)
        {
            // Merge the position flags of the last character we are deleting with the one from the
            // previous word (if any), so if the word we are deleting has EndOfLine or EndOfZone, we
            // save this info in the previous word
            int firstCharacterIndex = word.FirstCharacterIndex;
            int lastCharacterIndex  = word.LastCharacterIndex;

            if (firstCharacterIndex > 0)
            {
                OcrCharacterPosition position      = zoneCharacters[lastCharacterIndex].Position;
                OcrCharacter         tempCharacter = zoneCharacters[firstCharacterIndex - 1];
                tempCharacter.Position |= position;
                zoneCharacters[firstCharacterIndex - 1] = tempCharacter;
            }

            // Remove the characters
            int toRemoveCount = lastCharacterIndex - firstCharacterIndex + 1;

            while (toRemoveCount > 0)
            {
                zoneCharacters.RemoveAt(firstCharacterIndex);
                toRemoveCount--;
            }
        }
コード例 #2
0
ファイル: SubtitleCaption.cs プロジェクト: Rupan/SupRip
        /*
         * public SubtitleImage GetImage()
         * {
         *      Bitmap b = GetBitmap();
         *      Debugger.Print(Scan());
         *      return new SubtitleImage(GetBitmap());
         * }*/

        public string Scan()
        {
            string bitmapName = Path.GetTempPath() + "suprip_temp.png";

            MODI.Document md = new MODI.Document();
            GetBitmap().Save(bitmapName);

            md.Create(bitmapName);

            md.OCR(MODI.MiLANGUAGES.miLANG_ENGLISH, true, true);

            MODI.Image  image  = (MODI.Image)md.Images[0];
            MODI.Layout layout = image.Layout;

            string scanned = "";

            ocrWords = new List <OcrWord>();
            for (int j = 0; j < layout.Words.Count; j++)
            {
                // Get this word and deal with it.
                MODI.Word word = (MODI.Word)layout.Words[j];

                OcrWord w = new OcrWord(word.RecognitionConfidence, word.Text);
                ocrWords.Add(w);

                string text = word.Text;
                scanned += text + " ";
                int rc = word.RecognitionConfidence;
            }
            md.Close(false);

            return(scanned);
        }
コード例 #3
0
 public void AddWord(OcrWord word)
 {
     if (!words.Keys.Contains(word.getText()))
     {
         this.words[word.getText()] = new List <OcrWord>();
     }
     this.words[word.getText()].Add(word);
 }
コード例 #4
0
        public bool FindWord(string searchWord, out Point position)
        {
            bool  result      = false;
            float minDistance = float.MaxValue;
            Point center      = new Point(bitmap.Width / 2, bitmap.Height / 2);

            position = new Point(0, 0);

            debugText += ocrResult.Text;

            // We remove whitespace in both the search word and the recognition result
            // to allow searching for longer phrases
            searchWord = Sanitize(searchWord);
            if (searchWord.Length > 0)
            {
                OcrResultIterator i = new OcrResultIterator(ocrResult);
                //foreach (OcrLine ocrLine in ocrResult.Lines)
                {
                    //foreach (OcrWord ocrWord in ocrLine.Words)
                    while (!i.Done())
                    {
                        //string word = ocrWord.Text;

                        // Sometimes the engine splits words when it shouldn't
                        // Append following words until we have enough text to possibly fit the search word
                        OcrWord           ocrWord = i.NextWord();
                        string            word    = Sanitize(ocrWord.Text);
                        OcrResultIterator j       = new OcrResultIterator(i);
                        while (word.Length < searchWord.Length && !j.Done())
                        {
                            ocrWord = j.NextWord();
                            word   += Sanitize(ocrWord.Text);
                        }

                        debugText += "\r\nWord: '" + word + "'";
                        if (word.Contains(searchWord))
                        {
                            debugText += "\r\n*** Word match!";
                            Windows.Foundation.Rect rect = ocrWord.BoundingRect;
                            Point wordCenter             = new Point((int)(rect.Left + rect.Right) / 2, (int)(rect.Top + rect.Bottom) / 2);
                            int   dx       = wordCenter.X - center.X;
                            int   dy       = wordCenter.Y - center.Y;
                            float distance = (float)Math.Sqrt(dx * dx + dy * dy);
                            if (distance <= minDistance)
                            {
                                debugText  += "\r\n*** Closest!";
                                position    = wordCenter;
                                result      = true;
                                minDistance = distance;
                            }
                        }
                    }
                }
            }

            return(result);
        }
コード例 #5
0
        private void SelectWord(int wordZoneIndex, int wordIndex, OcrWord word)
        {
            _selectedZoneIndex = wordZoneIndex;
            _selectedWordIndex = wordIndex;
            _wordTextBox.Text  = word.Value;

            _imageViewer.Invalidate();
            UpdateUIState();
        }
コード例 #6
0
        private OcrWord ParseWord(JToken token, OcrResult result)
        {
            var word = new OcrWord()
            {
                Location = ParseBox(token, result),
                Text     = Convert.ToString(token["text"])
            };

            return(word);
        }
コード例 #7
0
        /// <summary>
        /// This method detects words (ids) in ocr result with coordinates.
        /// </summary>
        /// <param name="ocrResults"></param>
        /// <param name="receipt"></param>
        private void DetectWords(OcrResult ocrResults, Receipt receipt)
        {
            _logger.Debug($"Detecting words for receipt: {receipt.GetName()}");
            //page object
            mutex.WaitOne();
            foreach (var page in ocrResults.Pages)
            {
                //page -> paragraphs
                foreach (OcrResult.OcrParagraph ocrParagraph in page.Paragraphs)
                {
                    //paragraph -> lines
                    foreach (var line in ocrParagraph.Lines)
                    {
                        receipt.AddRow(line.Text);

                        //line -> words
                        foreach (var word in line.Words)
                        {
                            double num;
                            string wordContent = word.Text;
                            if (double.TryParse(wordContent, out num) && !wordContent.Contains(",") && !wordContent.StartsWith("0"))//if number
                            {
                                string id = num.ToString();

                                if (id.Length == 13 && (id.StartsWith("7") || id.StartsWith("129")) && !id.StartsWith("780") && !id.StartsWith("761") && !id.StartsWith("762") && !id.StartsWith("729"))
                                {
                                    id = id.Substring(Math.Max(0, id.Length - 10));
                                }

                                //not contains
                                if (!receipt.GetWordsList().ContainsKey(id))
                                {
                                    receipt.AddWord(new OcrWord(word.X, word.Y, word.Width, word.Height, id)); //add word to receipt object
                                }

                                //new
                                //contains
                                if (receipt.GetWordsList().ContainsKey(id))
                                {
                                    OcrWord result = receipt.GetWordsList()[id].Find(item => Math.Abs(item.getY() - word.Y) < 50);
                                    if (result == null)
                                    {
                                        receipt.GetWordsList()[id].Add(new OcrWord(word.X, word.Y, word.Width, word.Height, id));
                                    }
                                }
                            }
                        }
                    }
                }
            }
            mutex.ReleaseMutex();
            _logger.Debug($"Succesful Detecting words for receipt: {receipt.GetName()}");
        }
コード例 #8
0
        private void HighlightWord(Graphics g, int zoneIndex, int wordIndex, Brush b, Pen p)
        {
            OcrWord word = _ocrZoneWords[zoneIndex][wordIndex];

            // Get the word bounding rectangle and convert to physical so we can draw it on the viewer surface
            LeadRect rc = LeadRectD.FromLTRB(word.Bounds.Left, word.Bounds.Top, word.Bounds.Right, word.Bounds.Bottom).ToLeadRect();

            rc = _imageViewer.ConvertRect(null, ImageViewerCoordinateType.Image, ImageViewerCoordinateType.Control, rc);

            // Make the rectangle a little bit bigger for visibility purposes
            rc.Inflate(_wordEdge, _wordEdge);

            g.FillRectangle(b, Leadtools.Demos.Converters.ConvertRect(rc));
            g.DrawRectangle(p, rc.X, rc.Y, rc.Width - 1, rc.Height - 1);
        }
コード例 #9
0
        public OcrWord NextWord()
        {
            OcrWord word = result.Lines[lineIndex].Words[wordIndex];

            wordIndex++;
            charIndex = 0;

            // I guess there could be lines with zero words?
            while (lineIndex < result.Lines.Count &&
                   wordIndex >= result.Lines[lineIndex].Words.Count)
            {
                lineIndex++;
                wordIndex = 0;
            }

            return(word);
        }
コード例 #10
0
        public OcrResult DoParse(string jsonResult, ImageInfo info)
        {
            var words  = new List <OcrWord>();
            var json   = JObject.Parse(jsonResult);
            var lang   = default(string);
            var result = new OcrResult()
            {
                Info = info, TextAngle = 0
            };
            var mainRegion = new OcrRegion()
            {
                Id    = 1,
                Code  = OcrLoaderHelper.GetRegionCode(1),
                Words = words
            };
            var annotations = json["responses"].Value <JArray>()[0]["textAnnotations"].Value <JArray>();
            var wordId      = 1;

            foreach (var ann in annotations)
            {
                if (string.IsNullOrWhiteSpace(lang))
                {
                    lang = ann["locale"].Value <string>();
                }
                if (wordId != 1)
                {
                    var word = new OcrWord()
                    {
                        Id       = wordId,
                        Location = GetLocation(ann, info),
                        Text     = ann["description"].Value <string>()
                    };
                    words.Add(word);
                }
                wordId++;
            }
            mainRegion.Location        = new OcrLocation();
            mainRegion.Location.X      = mainRegion.Words.Min(i => i.Location.X);
            mainRegion.Location.Width  = mainRegion.Words.Max(i => i.Location.XBound) - mainRegion.Location.X;
            mainRegion.Location.Y      = mainRegion.Words.Min(i => i.Location.Y);
            mainRegion.Location.Height = mainRegion.Words.Max(i => i.Location.YBound) - mainRegion.Location.Y;
            result.Regions.Add(mainRegion);
            result.Words = words;
            return(result);
        }
コード例 #11
0
        private void FindWordUnderPoint(LeadPoint pt)
        {
            foreach (List <OcrWord> zoneWords in _ocrZoneWords)
            {
                for (int wordIndex = 0; wordIndex < zoneWords.Count; wordIndex++)
                {
                    OcrWord word = zoneWords[wordIndex];

                    RectangleF rc = Leadtools.Demos.Converters.ConvertRect(word.Bounds);
                    rc.Inflate(_wordEdge, _wordEdge);

                    if (rc.Contains(new PointF(pt.X, pt.Y)))
                    {
                        // Found a word, select it and exit
                        SelectWord(_ocrZoneWords.IndexOf(zoneWords), wordIndex, word);
                        return;
                    }
                }
            }

            // No word was selected, de-select the last word
            SelectWord(-1, -1, new OcrWord());
        }
コード例 #12
0
ファイル: WordOverlay.cs プロジェクト: togeljoss/microsoft
        public WordOverlay(OcrWord ocrWord)
        {
            word = ocrWord;

            UpdateProps(word.BoundingRect);
        }
コード例 #13
0
 public OCRYokoText(OcrWord word)
 {
     Text = word.Text;
     Rect = word.BoundingRect;
 }
コード例 #14
0
ファイル: OcrWordUpdater.cs プロジェクト: sakpung/webstudy
        public static void Update(int zoneIndex, int wordIndex, string value, IOcrPage ocrPage, List <List <OcrWord> > zoneWords, IOcrPageCharacters ocrPageCharacters)
        {
            // Find the zone characters we are looking for
            // Find the word we are looking for
            IOcrZoneCharacters zoneCharacters = ocrPageCharacters[zoneIndex];
            OcrWord            word           = zoneWords[zoneIndex][wordIndex];

            // OcrCharacter.Bounds does not expect the leading and external leading spaces
            // used when drawing normal text

            // First, we need to calculate the size of the original string and then the new
            // value using the same font. This way, we can calculate the offsets used on the
            // left and on top so we can find the new word value

            // We do not support spaces around the word
            if (value != null)
            {
                value = value.Trim();
            }

            // If the value did not change, don't do anything
            if (value == word.Value)
            {
                return;
            }

            // Get the first character to use as a template for creating the font
            OcrCharacter templateCharacter = zoneCharacters[word.FirstCharacterIndex];

            float dpiX = ocrPage.DpiX;
            float dpiY = ocrPage.DpiY;

            // Use a temporary bitmap object to get its Graphics object
            using (Bitmap btmp = new Bitmap(1, 1))
            {
                using (Graphics g = Graphics.FromImage(btmp))
                {
                    // Do not use anti-aliasing for better calculations
                    g.TextRenderingHint = TextRenderingHint.SingleBitPerPixel;

                    // Create the font used to draw this word
                    using (Font theFont = GetWordFont(templateCharacter, dpiY))
                    {
                        // Measure the old string and compare against the word bounds reported from
                        // OCR

                        PointF wordPosition   = PointF.Empty;
                        float  baselineOffset = 0;

                        SizeF oldWordBounds = SizeF.Empty;
                        if (!string.IsNullOrEmpty(word.Value))
                        {
                            RectangleF ocrWordBounds = Leadtools.Demos.Converters.ConvertRect(word.Bounds.ToRectangle(dpiX, dpiY));
                            oldWordBounds = g.MeasureString(word.Value, theFont, PointF.Empty, StringFormat.GenericDefault);

                            wordPosition = new PointF(ocrWordBounds.X - (oldWordBounds.Width - ocrWordBounds.Width) / 2, ocrWordBounds.Y - (oldWordBounds.Height - ocrWordBounds.Height) / 2);

                            // Calculate the baseline offset of this font
                            float baselineOffsetPoints = theFont.SizeInPoints / theFont.FontFamily.GetEmHeight(theFont.Style) * theFont.FontFamily.GetCellAscent(theFont.Style);
                            baselineOffset = g.DpiY / 72.0F * baselineOffsetPoints;
                        }

                        // Save the insertion point and the position flags for the last character so we can
                        // re-use it (in case, it has an EndOfLine or EndOfZone flags set)
                        int insertionIndex = word.FirstCharacterIndex;
                        OcrCharacterPosition lastCharacterPosition = zoneCharacters[word.LastCharacterIndex].Position;
                        DeleteWordCharacters(word, zoneCharacters);

                        // Rebuild the zone words
                        zoneWords[zoneIndex].Clear();
                        zoneWords[zoneIndex].AddRange(zoneCharacters.GetWords((int)dpiX, (int)dpiY, LogicalUnit.Pixel));

                        if (!string.IsNullOrEmpty(value))
                        {
                            // Now add the characters of the new word
                            SizeF stringSizeLeft = g.MeasureString(value, theFont, PointF.Empty, StringFormat.GenericDefault);
                            float emSize         = theFont.Size * g.DpiY / 72.0F;

                            // The string might have space characters in the middle, we don't want to
                            // add them since most of the OCR engines do not support a space character
                            string[] wordParts          = value.Split(new char[] { ' ' });
                            int      wordCharacterIndex = 0;

                            List <OcrCharacter> characters = new List <OcrCharacter>();

                            foreach (string wordPart in wordParts)
                            {
                                SizeF currentStringSize;
                                // Fix for bug 12953 on FileMaker.
                                if (ocrPage.Document.Engine.EngineType == OcrEngineType.Arabic)
                                {
                                    if (stringSizeLeft.Width > oldWordBounds.Width)
                                    {
                                        wordPosition.X -= Math.Abs(stringSizeLeft.Width - oldWordBounds.Width);
                                    }
                                    else if (stringSizeLeft.Width < oldWordBounds.Width)
                                    {
                                        wordPosition.X += Math.Abs(stringSizeLeft.Width - oldWordBounds.Width);
                                    }
                                }

                                // Process the characters of this part
                                for (int wordPartCharacterIndex = 0; wordPartCharacterIndex < wordPart.Length; wordPartCharacterIndex++)
                                {
                                    // We are going to use a GraphicsPath object to draw character on top
                                    // Then use the path GetBounds method to get the exact bounding box we need

                                    string characterString = wordPart.Substring(wordPartCharacterIndex, 1);

                                    using (GraphicsPath path = new GraphicsPath())
                                    {
                                        path.AddString(characterString, theFont.FontFamily, (int)theFont.Style, emSize, wordPosition, StringFormat.GenericDefault);

                                        RectangleF bounds = path.GetBounds();

                                        // Build a character and add it
                                        OcrCharacter newCharacter = templateCharacter;
                                        newCharacter.Code   = wordPart[wordPartCharacterIndex];
                                        newCharacter.Bounds = new LogicalRectangle(bounds.Left, bounds.Top, bounds.Width, bounds.Height, LogicalUnit.Pixel);
                                        newCharacter.Base   = LogicalLength.FromPixels(wordPosition.Y + baselineOffset - bounds.Y);

                                        // We will assume this character is not the last one so we clear all the flags
                                        newCharacter.Position = OcrCharacterPosition.None;

                                        characters.Add(newCharacter);
                                    }

                                    // Subtract the part of the string we draw from the overall string size so we know the position of the next character
                                    currentStringSize = g.MeasureString(value.Substring(wordCharacterIndex + 1), theFont, PointF.Empty, StringFormat.GenericDefault);
                                    wordPosition.X   += stringSizeLeft.Width - currentStringSize.Width;
                                    stringSizeLeft    = currentStringSize;
                                    wordCharacterIndex++;
                                }

                                // Add EndOfWord to the character we just inserted
                                if (wordCharacterIndex > 0)
                                {
                                    OcrCharacter character = characters[characters.Count - 1];
                                    character.Position |= OcrCharacterPosition.EndOfWord;
                                    characters[characters.Count - 1] = character;
                                }

                                // Move a space (if any)
                                if (wordCharacterIndex < (value.Length - 1))
                                {
                                    currentStringSize = g.MeasureString(value.Substring(wordCharacterIndex + 1), theFont, PointF.Empty, StringFormat.GenericDefault);
                                    wordPosition.X   += stringSizeLeft.Width - currentStringSize.Width;
                                    stringSizeLeft    = currentStringSize;
                                    wordCharacterIndex++;
                                }

                                // If this is the last character in the over all word, re-add the original position flags
                                // if any (EndOfLine, EndOfZone, etc)
                                if (wordCharacterIndex == value.Length)
                                {
                                    OcrCharacter character = characters[characters.Count - 1];
                                    character.Position |= lastCharacterPosition;
                                    characters[characters.Count - 1] = character;
                                }
                            }

                            // Now add these new characters to the zone
                            int index = insertionIndex;
                            foreach (OcrCharacter character in characters)
                            {
                                zoneCharacters.Insert(index++, character);
                            }

                            // Rebuild the zone words
                            zoneWords[zoneIndex].Clear();
                            zoneWords[zoneIndex].AddRange(zoneCharacters.GetWords((int)dpiX, (int)dpiY, LogicalUnit.Pixel));
                        }
                    }
                }
            }
        }
コード例 #15
0
        public WordOverlay(OcrWord ocrWord)
        {
            word = ocrWord;

            UpdateProps(word.BoundingRect);
        }
コード例 #16
0
ファイル: SubtitleCaption.cs プロジェクト: peterdk/SupRip
        /*
        public SubtitleImage GetImage()
        {
            Bitmap b = GetBitmap();
            Debugger.Print(Scan());
            return new SubtitleImage(GetBitmap());
        }*/
        public string Scan()
        {
            string bitmapName = Path.GetTempPath() + "suprip_temp.png";
            MODI.Document md = new MODI.Document();
            GetBitmap().Save(bitmapName);

            md.Create(bitmapName);

            md.OCR(MODI.MiLANGUAGES.miLANG_ENGLISH, true, true);

            MODI.Image image = (MODI.Image)md.Images[0];
            MODI.Layout layout = image.Layout;

            string scanned = "";

            ocrWords = new List<OcrWord>();
            for (int j = 0; j < layout.Words.Count; j++)
            {
                // Get this word and deal with it.
                MODI.Word word = (MODI.Word)layout.Words[j];

                OcrWord w = new OcrWord(word.RecognitionConfidence, word.Text);
                ocrWords.Add(w);

                string text = word.Text;
                scanned += text + " ";
                int rc = word.RecognitionConfidence;
            }
            md.Close(false);

            return scanned;
        }