private void FillDocumentPage(ATAPY.Document.Data.Core.Document document, string language, TesseractEngine engine, Pix pageData) { var page = new ATAPY.Document.Data.Core.Page(); page.Bound = new System.Windows.Rect(0, 0, pageData.Width, pageData.Height); document.Pages.Add(page); GetPageData(engine, pageData, language, page); page.AnalyzeData(); }
public Document GetDocument(ATAPY.Common.IO.File image, string language) { if (string.IsNullOrEmpty(language)) { throw new ArgumentException("No Languages specified!"); } var document = new ATAPY.Document.Data.Core.Document(); document.Scale = Scale; document.SourceFormat = E_SourceFormat.HOCR; document.SourceFile = image.Clone(); FillPagesData(document, image.FullPath, language); return(document); }
private void FillPagesData(ATAPY.Document.Data.Core.Document document, string pathToImage, string language) { var extension = Path.GetExtension(pathToImage); using (var engine = new TesseractEngine(ENGINE_DATAPATH, language)) { if (IsSinglePageImage(extension)) { Pix pageData = null; try { pageData = Pix.LoadFromFile(pathToImage); FillDocumentPage(document, language, engine, pageData); } finally { pageData?.Dispose(); } } else if (IsMultiPageImage(extension)) { PixArray pixes = null; try { pixes = PixArray.LoadMultiPageTiffFromFile(pathToImage); foreach (Pix pageData in pixes) { FillDocumentPage(document, language, engine, pageData); } } finally { pixes?.Dispose(); } } else { throw new FormatException("Please specify path to the image file"); } } }