Exemple #1
0
        public List <Rectangle> GetTextRects(Bitmap currentImage)
        {
            string          tessPath = System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location) + "\\tessdata";
            TesseractEngine tess     = new TesseractEngine(tessPath, "eng");

            Page newPage = tess.Process(currentImage, PageSegMode.AutoOsd);

            ResultIterator   iterator     = newPage.GetIterator();
            string           totalText    = newPage.GetText();
            List <Rectangle> currentRects = new List <Rectangle>();

            for (int i = 0; i < totalText.Length; i++)
            {
                Rect   foundRect   = new Rect();
                string symbolText  = iterator.GetText(PageIteratorLevel.Symbol);
                bool   hasText     = symbolText != "" && symbolText != null;
                bool   onlyLetters = false;
                if (hasText)
                {
                    onlyLetters = symbolText.ToCharArray().All(s => char.IsLetter(s));
                }

                bool gotBoundingBox = iterator.TryGetBoundingBox(PageIteratorLevel.Symbol, out foundRect);

                if (hasText && onlyLetters && gotBoundingBox)
                {
                    currentRects.Add(new Rectangle(foundRect.X1, foundRect.Y1, foundRect.X2 - foundRect.X1, foundRect.Y2 - foundRect.Y1));
                }
                iterator.Next(PageIteratorLevel.Symbol);
            }
            tess.Dispose();
            iterator.Dispose();

            return(currentRects);
        }
Exemple #2
0
        private int RunTesseract(Mat img)
        {
            int conf = 0;

            using (TessBaseAPI engine = new TessBaseAPI(@".\tessdata", _lang, OcrEngineMode.TESSERACT_LSTM_COMBINED))
            {
                engine.InitForAnalysePage();
                engine.Init(null, _lang);
                engine.SetImage(new UIntPtr(BitConverter.ToUInt64(BitConverter.GetBytes(img.Data.ToInt64()), 0)), img.Size().Width, img.Size().Height, img.Channels(), (int)img.Step1());
                engine.Recognize();
                ResultIterator iterator = engine.GetIterator();

                IterateFullPage(iterator, ref _textLines);
                iterator.Dispose();
                conf = engine.MeanTextConf;
            }
            return(conf);
        }
Exemple #3
0
        private void GetPageData(TesseractEngine engine, Pix pageData, string language, ATAPY.Document.Data.Core.Page page)
        {
            ResultIterator resultIterator = null;

            try
            {
                using (var tessPage = engine.Process(pageData))
                {
                    tessPage.Recognize();
                    resultIterator = tessPage.GetIterator();
                    resultIterator.Begin();

                    do
                    {
                        var text = resultIterator.GetText(PageIteratorLevel.Word);
                        if (TextIsValid(text) && resultIterator.TryGetBoundingBox(PageIteratorLevel.Word, out var rect))
                        {
                            var rectW = GetRect(rect);
                            var area  = new TextArea(rectW, text, page);
                            page.TextAreas.Add(area);
                            var chars    = new System.Windows.Rect[text.Length];
                            int charIter = 0;
                            do
                            {
                                if (resultIterator.TryGetBoundingBox(PageIteratorLevel.Symbol, out var sRect))
                                {
                                    chars[charIter] = GetRect(sRect);
                                }
                                charIter++;
                            } while (resultIterator.Next(PageIteratorLevel.Word, PageIteratorLevel.Symbol));
                            area.SetCharProperties(chars);
                        }
                    } while (resultIterator.Next(PageIteratorLevel.Word));
                }
            }
            finally
            {
                resultIterator?.Dispose();
            }
            //return page;
        }
        /*
         * int LookingTextMarker(RecognazeRule Rule, Page page, out ResultIterator BestLineIter, out int word)
         * {
         *      word = -1;
         *      BestLineIter = null;
         *      int BestDistance = 10000;
         *
         *      ResultIterator LineIter = page.GetIterator();
         *      string[] Words = Rule.TextMarker.Split(new char[] {' '}, StringSplitOptions.RemoveEmptyEntries);
         *      int NumberOfWords = Words.Length;
         *      LineIter.Begin();
         *      do
         *      {
         *              int CurrentWordNumber = -1;
         *              int CurrentBestDistance = 10000;
         *              string Line = LineIter.GetText(PageIteratorLevel.TextLine);
         *              if(Line == null)
         *                      continue;
         *              string[] WordsOfLine = Line.Split(new char[] {' '}, StringSplitOptions.None);
         *              if(WordsOfLine.Length < NumberOfWords)
         *                      continue;
         *
         *              for(int shift = 0; shift <= WordsOfLine.Length - NumberOfWords; shift++)
         *              {
         *                      int PassDistance = 0;
         *                      for(int i = 0; i < NumberOfWords; i++)
         *                      {
         *                              PassDistance += FuzzyStringComparer.GetDistanceLevenshtein(WordsOfLine[shift + i],
         *                                                                                            Words[i],
         *                                                                                            StringComparison.CurrentCultureIgnoreCase);
         *                      }
         *                      if(PassDistance < CurrentBestDistance)
         *                      {
         *                              CurrentBestDistance = PassDistance;
         *                              CurrentWordNumber = shift + 1;
         *                      }
         *              }
         *              if(CurrentBestDistance < BestDistance)
         *              {
         *                      AddToLog ("new best");
         *                      AddToLog (LineIter.GetText(PageIteratorLevel.Word));
         *                      word = CurrentWordNumber;
         *                      if(BestLineIter != null)
         *                              BestLineIter.Dispose();
         *                      BestLineIter = LineIter.Clone();
         *                      AddToLog (BestLineIter.GetText(PageIteratorLevel.TextLine));
         *                      BestDistance = CurrentBestDistance;
         *              }
         *      } while( LineIter.Next(PageIteratorLevel.TextLine));
         *      LineIter.Dispose();
         *      return BestDistance;
         * } */

        int GetTextPosition(string Text, Page page, out int PosX, out int PosY, out double AngleRad, RecognazeRule[] AfterMarkerRules)
        {
            int BestDistance = 10000;

            PosX     = -1;
            PosY     = -1;
            AngleRad = 0;
            logger.Debug("Marker zone text:{0}", page.GetText());
            ResultIterator LineIter = page.GetIterator();

            string[] Words         = Text.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
            int      NumberOfWords = Words.Length;

            LineIter.Begin();
            do
            {
                int    CurrentWordNumber   = -1;
                int    CurrentAfterWord    = 0;
                int    CurrentBestDistance = 10000;
                string Line = LineIter.GetText(PageIteratorLevel.TextLine);

                if (Line == null || Line == "")
                {
                    continue;
                }
                Line = Line.Trim();
                string[] WordsOfLine = Line.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);

                if (WordsOfLine.Length == 0)
                {
                    continue;
                }
                for (int shift = 0; shift < WordsOfLine.Length; shift++)
                {
                    for (int i = 1; i <= NumberOfWords && i <= WordsOfLine.Length - shift; i++)
                    {
                        string passString = String.Join(" ", WordsOfLine, shift, i);

                        int PassDistance = FuzzyStringComparer.GetDistanceLevenshtein(passString,
                                                                                      Text,
                                                                                      StringComparison.CurrentCultureIgnoreCase);
                        if (PassDistance < CurrentBestDistance)
                        {
                            CurrentBestDistance = PassDistance;
                            CurrentWordNumber   = shift;
                            CurrentAfterWord    = shift + i;
                        }
                    }
                }
                if (CurrentBestDistance < BestDistance)
                {
                    logger.Debug("new best");
                    logger.Debug(LineIter.GetText(PageIteratorLevel.TextLine).Trim());
                    //Заполняем поля данными после маркера.
                    foreach (RecognazeRule rule in AfterMarkerRules)
                    {
                        if (rule.NextAfterTextMarker && WordsOfLine.Length > CurrentAfterWord + rule.ShiftWordsCount)
                        {
                            rule.AfterTextMarkerValue = WordsOfLine[CurrentAfterWord + rule.ShiftWordsCount];
                        }
                    }

                    BestDistance = CurrentBestDistance;
                    for (int i = 0; i < CurrentWordNumber; i++)
                    {
                        LineIter.Next(PageIteratorLevel.Word);
                    }
                    Rect Box;
                    LineIter.TryGetBoundingBox(PageIteratorLevel.Word, out Box);
                    PosX = Box.X1;
                    PosY = Box.Y1;
                    logger.Debug("Position X1:{0} Y1:{1} X2:{2} Y2:{3}", Box.X1, Box.Y1, Box.X2, Box.Y2);
                    LineIter.TryGetBaseline(PageIteratorLevel.Word, out Box);
                    logger.Debug("BaseLine X1:{0} Y1:{1} X2:{2} Y2:{3}", Box.X1, Box.Y1, Box.X2, Box.Y2);
                    AngleRad = Math.Atan2(Box.Y2 - Box.Y1, Box.X2 - Box.X1);                     //угл наклона базовой линии.
                    double AngleGrad = AngleRad * (180 / Math.PI);
                    logger.Debug("Angle rad:{0} grad:{1}", AngleRad, AngleGrad);

                    //Получаем уровень распознования полей в маркере.
                    int  iterAlreadyShifted = CurrentWordNumber - CurrentAfterWord;
                    bool stopIteration      = false;
                    foreach (RecognazeRule rule in AfterMarkerRules.Where(x => x.NextAfterTextMarker).OrderBy(x => x.ShiftWordsCount))
                    {
                        while (iterAlreadyShifted < rule.ShiftWordsCount)
                        {
                            if (LineIter.IsAtFinalOf(PageIteratorLevel.TextLine, PageIteratorLevel.Word))
                            {
                                stopIteration = true;
                                break;
                            }
                            LineIter.Next(PageIteratorLevel.Word);
                            iterAlreadyShifted++;
                        }
                        if (stopIteration)
                        {
                            break;
                        }
                        rule.AfterTextMarkerConfidence = LineIter.GetConfidence(PageIteratorLevel.Word);
                        logger.Debug("Cлово {0} со сдвигом {1} имеет точность {2}.", LineIter.GetText(PageIteratorLevel.Word), rule.ShiftWordsCount, rule.AfterTextMarkerConfidence);
                    }
                }
            }while(LineIter.Next(PageIteratorLevel.TextLine));
            LineIter.Dispose();
            return(BestDistance);
        }