private void FillDocumentPage(ATAPY.Document.Data.Core.Document document, string language, TesseractEngine engine, Pix pageData) { var page = new ATAPY.Document.Data.Core.Page(); page.Bound = new System.Windows.Rect(0, 0, pageData.Width, pageData.Height); document.Pages.Add(page); GetPageData(engine, pageData, language, page); page.AnalyzeData(); }
private void GetPageData(TesseractEngine engine, Pix pageData, string language, ATAPY.Document.Data.Core.Page page) { ResultIterator resultIterator = null; try { using (var tessPage = engine.Process(pageData)) { tessPage.Recognize(); resultIterator = tessPage.GetIterator(); resultIterator.Begin(); do { var text = resultIterator.GetText(PageIteratorLevel.Word); if (TextIsValid(text) && resultIterator.TryGetBoundingBox(PageIteratorLevel.Word, out var rect)) { var rectW = GetRect(rect); var area = new TextArea(rectW, text, page); page.TextAreas.Add(area); var chars = new System.Windows.Rect[text.Length]; int charIter = 0; do { if (resultIterator.TryGetBoundingBox(PageIteratorLevel.Symbol, out var sRect)) { chars[charIter] = GetRect(sRect); } charIter++; } while (resultIterator.Next(PageIteratorLevel.Word, PageIteratorLevel.Symbol)); area.SetCharProperties(chars); } } while (resultIterator.Next(PageIteratorLevel.Word)); } } finally { resultIterator?.Dispose(); } //return page; }