예제 #1
0
        private static TextBlockItem ExtractParaphraph(ResultIterator iter)
        {
            var text = new StringBuilder();

            do
            {
                do
                {
                    text.Append(iter.GetText(PageIteratorLevel.Word));
                    text.Append(" ");

                    if (iter.IsAtFinalOf(PageIteratorLevel.TextLine, PageIteratorLevel.Word))
                    {
                        text.Append(Environment.NewLine);
                    }
                }while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));

                if (iter.IsAtFinalOf(PageIteratorLevel.Para, PageIteratorLevel.TextLine))
                {
                    text.Append(Environment.NewLine);
                }
            }while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine));

            return(new TextBlockItem
            {
                Text = text.ToString()
            });
        }
예제 #2
0
        public static string RecognizeBlocks(byte[] imageBytes)
        {
            StringBuilder sb = new StringBuilder();

            try
            {
                using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
                {
                    using (Pix pix = Pix.LoadFromMemory(imageBytes))
                    {
                        using (var page = engine.Process(pix))
                        {
                            sb.AppendLine("Text (iterator):");
                            using (ResultIterator iter = page.GetIterator())
                            {
                                iter.Begin();

                                do
                                {
                                    do
                                    {
                                        do
                                        {
                                            do
                                            {
                                                if (iter.IsAtBeginningOf(PageIteratorLevel.Block))
                                                {
                                                    sb.AppendLine("<BLOCK>");
                                                }

                                                sb.Append(iter.GetText(PageIteratorLevel.Word));
                                                sb.Append(" ");

                                                if (iter.IsAtFinalOf(PageIteratorLevel.TextLine, PageIteratorLevel.Word))
                                                {
                                                    sb.AppendLine();
                                                }
                                            } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));

                                            if (iter.IsAtFinalOf(PageIteratorLevel.Para, PageIteratorLevel.TextLine))
                                            {
                                                sb.AppendLine();
                                            }
                                        } while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine));
                                    } while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.Para));
                                } while (iter.Next(PageIteratorLevel.Block));
                            }
                        }
                    }
                }
            }
            catch (Exception e)
            {
                Trace.TraceError(e.ToString());
            }

            return(sb.ToString());
        }
예제 #3
0
        private static void IterateBlocks(Page page)
        {
            Console.WriteLine("Text (iterator):");

            using (ResultIterator iter = page.GetIterator())
            {
                iter.Begin();

                do
                {
                    do
                    {
                        do
                        {
                            do
                            {
                                if (iter.IsAtBeginningOf(PageIteratorLevel.Block))
                                {
                                    Console.WriteLine("<BLOCK>");
                                }

                                Console.Write(iter.GetText(PageIteratorLevel.Word));
                                Console.Write(" ");

                                if (iter.IsAtFinalOf(PageIteratorLevel.TextLine, PageIteratorLevel.Word))
                                {
                                    Console.WriteLine();
                                }
                            }while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));

                            if (iter.IsAtFinalOf(PageIteratorLevel.Para, PageIteratorLevel.TextLine))
                            {
                                Console.WriteLine();
                            }
                        }while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine));
                    }while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.Para));
                }while (iter.Next(PageIteratorLevel.Block));
            }
        }
예제 #4
0
        /*
         * int LookingTextMarker(RecognazeRule Rule, Page page, out ResultIterator BestLineIter, out int word)
         * {
         *      word = -1;
         *      BestLineIter = null;
         *      int BestDistance = 10000;
         *
         *      ResultIterator LineIter = page.GetIterator();
         *      string[] Words = Rule.TextMarker.Split(new char[] {' '}, StringSplitOptions.RemoveEmptyEntries);
         *      int NumberOfWords = Words.Length;
         *      LineIter.Begin();
         *      do
         *      {
         *              int CurrentWordNumber = -1;
         *              int CurrentBestDistance = 10000;
         *              string Line = LineIter.GetText(PageIteratorLevel.TextLine);
         *              if(Line == null)
         *                      continue;
         *              string[] WordsOfLine = Line.Split(new char[] {' '}, StringSplitOptions.None);
         *              if(WordsOfLine.Length < NumberOfWords)
         *                      continue;
         *
         *              for(int shift = 0; shift <= WordsOfLine.Length - NumberOfWords; shift++)
         *              {
         *                      int PassDistance = 0;
         *                      for(int i = 0; i < NumberOfWords; i++)
         *                      {
         *                              PassDistance += FuzzyStringComparer.GetDistanceLevenshtein(WordsOfLine[shift + i],
         *                                                                                            Words[i],
         *                                                                                            StringComparison.CurrentCultureIgnoreCase);
         *                      }
         *                      if(PassDistance < CurrentBestDistance)
         *                      {
         *                              CurrentBestDistance = PassDistance;
         *                              CurrentWordNumber = shift + 1;
         *                      }
         *              }
         *              if(CurrentBestDistance < BestDistance)
         *              {
         *                      AddToLog ("new best");
         *                      AddToLog (LineIter.GetText(PageIteratorLevel.Word));
         *                      word = CurrentWordNumber;
         *                      if(BestLineIter != null)
         *                              BestLineIter.Dispose();
         *                      BestLineIter = LineIter.Clone();
         *                      AddToLog (BestLineIter.GetText(PageIteratorLevel.TextLine));
         *                      BestDistance = CurrentBestDistance;
         *              }
         *      } while( LineIter.Next(PageIteratorLevel.TextLine));
         *      LineIter.Dispose();
         *      return BestDistance;
         * } */

        int GetTextPosition(string Text, Page page, out int PosX, out int PosY, out double AngleRad, RecognazeRule[] AfterMarkerRules)
        {
            int BestDistance = 10000;

            PosX     = -1;
            PosY     = -1;
            AngleRad = 0;
            logger.Debug("Marker zone text:{0}", page.GetText());
            ResultIterator LineIter = page.GetIterator();

            string[] Words         = Text.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
            int      NumberOfWords = Words.Length;

            LineIter.Begin();
            do
            {
                int    CurrentWordNumber   = -1;
                int    CurrentAfterWord    = 0;
                int    CurrentBestDistance = 10000;
                string Line = LineIter.GetText(PageIteratorLevel.TextLine);

                if (Line == null || Line == "")
                {
                    continue;
                }
                Line = Line.Trim();
                string[] WordsOfLine = Line.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);

                if (WordsOfLine.Length == 0)
                {
                    continue;
                }
                for (int shift = 0; shift < WordsOfLine.Length; shift++)
                {
                    for (int i = 1; i <= NumberOfWords && i <= WordsOfLine.Length - shift; i++)
                    {
                        string passString = String.Join(" ", WordsOfLine, shift, i);

                        int PassDistance = FuzzyStringComparer.GetDistanceLevenshtein(passString,
                                                                                      Text,
                                                                                      StringComparison.CurrentCultureIgnoreCase);
                        if (PassDistance < CurrentBestDistance)
                        {
                            CurrentBestDistance = PassDistance;
                            CurrentWordNumber   = shift;
                            CurrentAfterWord    = shift + i;
                        }
                    }
                }
                if (CurrentBestDistance < BestDistance)
                {
                    logger.Debug("new best");
                    logger.Debug(LineIter.GetText(PageIteratorLevel.TextLine).Trim());
                    //Заполняем поля данными после маркера.
                    foreach (RecognazeRule rule in AfterMarkerRules)
                    {
                        if (rule.NextAfterTextMarker && WordsOfLine.Length > CurrentAfterWord + rule.ShiftWordsCount)
                        {
                            rule.AfterTextMarkerValue = WordsOfLine[CurrentAfterWord + rule.ShiftWordsCount];
                        }
                    }

                    BestDistance = CurrentBestDistance;
                    for (int i = 0; i < CurrentWordNumber; i++)
                    {
                        LineIter.Next(PageIteratorLevel.Word);
                    }
                    Rect Box;
                    LineIter.TryGetBoundingBox(PageIteratorLevel.Word, out Box);
                    PosX = Box.X1;
                    PosY = Box.Y1;
                    logger.Debug("Position X1:{0} Y1:{1} X2:{2} Y2:{3}", Box.X1, Box.Y1, Box.X2, Box.Y2);
                    LineIter.TryGetBaseline(PageIteratorLevel.Word, out Box);
                    logger.Debug("BaseLine X1:{0} Y1:{1} X2:{2} Y2:{3}", Box.X1, Box.Y1, Box.X2, Box.Y2);
                    AngleRad = Math.Atan2(Box.Y2 - Box.Y1, Box.X2 - Box.X1);                     //угл наклона базовой линии.
                    double AngleGrad = AngleRad * (180 / Math.PI);
                    logger.Debug("Angle rad:{0} grad:{1}", AngleRad, AngleGrad);

                    //Получаем уровень распознования полей в маркере.
                    int  iterAlreadyShifted = CurrentWordNumber - CurrentAfterWord;
                    bool stopIteration      = false;
                    foreach (RecognazeRule rule in AfterMarkerRules.Where(x => x.NextAfterTextMarker).OrderBy(x => x.ShiftWordsCount))
                    {
                        while (iterAlreadyShifted < rule.ShiftWordsCount)
                        {
                            if (LineIter.IsAtFinalOf(PageIteratorLevel.TextLine, PageIteratorLevel.Word))
                            {
                                stopIteration = true;
                                break;
                            }
                            LineIter.Next(PageIteratorLevel.Word);
                            iterAlreadyShifted++;
                        }
                        if (stopIteration)
                        {
                            break;
                        }
                        rule.AfterTextMarkerConfidence = LineIter.GetConfidence(PageIteratorLevel.Word);
                        logger.Debug("Cлово {0} со сдвигом {1} имеет точность {2}.", LineIter.GetText(PageIteratorLevel.Word), rule.ShiftWordsCount, rule.AfterTextMarkerConfidence);
                    }
                }
            }while(LineIter.Next(PageIteratorLevel.TextLine));
            LineIter.Dispose();
            return(BestDistance);
        }