Example #1
0
        private void FillDocumentPage(ATAPY.Document.Data.Core.Document document, string language, TesseractEngine engine, Pix pageData)
        {
            var page = new ATAPY.Document.Data.Core.Page();

            page.Bound = new System.Windows.Rect(0, 0, pageData.Width, pageData.Height);
            document.Pages.Add(page);
            GetPageData(engine, pageData, language, page);
            page.AnalyzeData();
        }
Example #2
0
        private void GetPageData(TesseractEngine engine, Pix pageData, string language, ATAPY.Document.Data.Core.Page page)
        {
            ResultIterator resultIterator = null;

            try
            {
                using (var tessPage = engine.Process(pageData))
                {
                    tessPage.Recognize();
                    resultIterator = tessPage.GetIterator();
                    resultIterator.Begin();

                    do
                    {
                        var text = resultIterator.GetText(PageIteratorLevel.Word);
                        if (TextIsValid(text) && resultIterator.TryGetBoundingBox(PageIteratorLevel.Word, out var rect))
                        {
                            var rectW = GetRect(rect);
                            var area  = new TextArea(rectW, text, page);
                            page.TextAreas.Add(area);
                            var chars    = new System.Windows.Rect[text.Length];
                            int charIter = 0;
                            do
                            {
                                if (resultIterator.TryGetBoundingBox(PageIteratorLevel.Symbol, out var sRect))
                                {
                                    chars[charIter] = GetRect(sRect);
                                }
                                charIter++;
                            } while (resultIterator.Next(PageIteratorLevel.Word, PageIteratorLevel.Symbol));
                            area.SetCharProperties(chars);
                        }
                    } while (resultIterator.Next(PageIteratorLevel.Word));
                }
            }
            finally
            {
                resultIterator?.Dispose();
            }
            //return page;
        }