Exemple #1
0
        private PageShape ExtractResults(Rectangle pageBounds, int horizontalResolution, int verticalResolution)
        {
            List <TextBlockShape> textBlocks = new List <TextBlockShape>();

            using (ResultIterator iterator = NativeMethods.TessBaseAPIGetIterator(this.handle))
            {
                TextBlockShape textBlock = ExtractTextBlock();
                if (textBlock != null)
                {
                    textBlocks.Add(textBlock);
                }

                TextBlockShape ExtractTextBlock()
                {
                    Rectangle bounds = iterator.GetBoundingBox(PageIteratorLevel.TextBlock);

                    if (bounds.IsEmpty)
                    {
                        return(null);
                    }

                    List <Shape> shapes = new List <Shape>();

                    do
                    {
                        PolyBlockType type = iterator.GetBlockType();
                        switch (type)
                        {
                        case PolyBlockType.HorizontalLine:
                        case PolyBlockType.VerticalLine:
                            bounds = iterator.GetBoundingBox(PageIteratorLevel.TextBlock);
                            if (!bounds.IsEmpty)
                            {
                                LineShape line = type == PolyBlockType.HorizontalLine ?
                                                 new LineShape(bounds, bounds.Height, LineTypes.Horizontal) :
                                                 new LineShape(bounds, bounds.Width, LineTypes.Vertical);

                                shapes.Add(line);
                            }

                            break;

                        default:
                            do
                            {
                                ParagraphShape shape = ExtractParagraph();
                                if (shape != null)
                                {
                                    shapes.Add(shape);
                                }
                            }while (!iterator.IsAtFinalElement(PageIteratorLevel.TextBlock, PageIteratorLevel.Paragraph) &&
                                    iterator.Next(PageIteratorLevel.Paragraph));
                            break;
                        }
                    }while (iterator.Next(PageIteratorLevel.TextBlock));

                    return(shapes.Count > 0 ? new TextBlockShape(bounds, shapes) : null);
                }

                ParagraphShape ExtractParagraph()
                {
                    Rectangle bounds = iterator.GetBoundingBox(PageIteratorLevel.Paragraph);

                    if (bounds.IsEmpty)
                    {
                        return(null);
                    }

                    List <TextLineShape> shapes = new List <TextLineShape>();

                    do
                    {
                        TextLineShape shape = ExtractTextLine();
                        if (shape != null)
                        {
                            shapes.Add(shape);
                        }
                    }while (!iterator.IsAtFinalElement(PageIteratorLevel.Paragraph, PageIteratorLevel.TextLine) &&
                            iterator.Next(PageIteratorLevel.TextLine));

                    return(shapes.Count > 0 ? new ParagraphShape(bounds, shapes) : null);
                }

                TextLineShape ExtractTextLine()
                {
                    Rectangle bounds = iterator.GetBoundingBox(PageIteratorLevel.TextLine);

                    if (bounds.IsEmpty)
                    {
                        return(null);
                    }

                    List <TextShape> shapes = new List <TextShape>();

                    do
                    {
                        TextShape shape = ExtractWord();
                        if (shape != null)
                        {
                            shapes.Add(shape);
                        }
                    }while (!iterator.IsAtFinalElement(PageIteratorLevel.TextLine, PageIteratorLevel.Word) &&
                            iterator.Next(PageIteratorLevel.Word));

                    return(shapes.Count > 0 ? new TextLineShape(bounds, shapes) : null);
                }

                TextShape ExtractWord()
                {
                    Rectangle bounds = iterator.GetBoundingBox(PageIteratorLevel.Word);

                    if (bounds.IsEmpty)
                    {
                        return(null);
                    }

                    /*List<CharacterAnswer> ^ characters = gcnew List<CharacterAnswer>();
                     * do
                     * {
                     *  CharacterAnswer character;
                     *  if (ExtractCharacter(iterator, character))
                     *  {
                     *      characters->Add(character);
                     *  }
                     *
                     *  if (iterator->IsAtFinalElement(level, RIL_SYMBOL))
                     *  {
                     *      break;
                     *  }
                     * }
                     * while (iterator->Next(RIL_SYMBOL));
                     *
                     * if (!Enumerable::Any(characters))
                     * {
                     *  return nullptr;
                     * }
                     *
                     * int confidence = MakeConfidence(iterator->Confidence(level));*/

                    string text = iterator.GetUTF8Text(PageIteratorLevel.Word);

                    if (string.IsNullOrWhiteSpace(text))
                    {
                        return(null);
                    }

                    float confidence = iterator.GetConfidence(PageIteratorLevel.Word) / 100.0f;

                    return(new TextShape(bounds, text, confidence));
                }
            }

            PageShape page = new PageShape(pageBounds, horizontalResolution, verticalResolution);

            page.AddShapes(textBlocks);
            return(page);
        }