Esempio n. 1
0
        private void FillDocumentPage(ATAPY.Document.Data.Core.Document document, string language, TesseractEngine engine, Pix pageData)
        {
            var page = new ATAPY.Document.Data.Core.Page();

            page.Bound = new System.Windows.Rect(0, 0, pageData.Width, pageData.Height);
            document.Pages.Add(page);
            GetPageData(engine, pageData, language, page);
            page.AnalyzeData();
        }
Esempio n. 2
0
        public Document GetDocument(ATAPY.Common.IO.File image, string language)
        {
            if (string.IsNullOrEmpty(language))
            {
                throw new ArgumentException("No Languages specified!");
            }

            var document = new ATAPY.Document.Data.Core.Document();

            document.Scale        = Scale;
            document.SourceFormat = E_SourceFormat.HOCR;
            document.SourceFile   = image.Clone();

            FillPagesData(document, image.FullPath, language);

            return(document);
        }
Esempio n. 3
0
        private void FillPagesData(ATAPY.Document.Data.Core.Document document, string pathToImage, string language)
        {
            var extension = Path.GetExtension(pathToImage);

            using (var engine = new TesseractEngine(ENGINE_DATAPATH, language))
            {
                if (IsSinglePageImage(extension))
                {
                    Pix pageData = null;
                    try
                    {
                        pageData = Pix.LoadFromFile(pathToImage);
                        FillDocumentPage(document, language, engine, pageData);
                    }
                    finally
                    {
                        pageData?.Dispose();
                    }
                }
                else if (IsMultiPageImage(extension))
                {
                    PixArray pixes = null;
                    try
                    {
                        pixes = PixArray.LoadMultiPageTiffFromFile(pathToImage);
                        foreach (Pix pageData in pixes)
                        {
                            FillDocumentPage(document, language, engine, pageData);
                        }
                    }
                    finally
                    {
                        pixes?.Dispose();
                    }
                }
                else
                {
                    throw new FormatException("Please specify path to the image file");
                }
            }
        }