public static ResultIterator Process(this TessBaseAPI tessBaseAPI, string inputFile, bool createPdf = false)
        {
            tessBaseAPI.SetPageSegMode(PageSegmentationMode.AUTO);
            var pix = tessBaseAPI.SetImage(inputFile);

            tessBaseAPI.Recognize();

            //if create pdf export pdf
            if (createPdf)
            {
                //ensure input name is set
                tessBaseAPI.SetInputName(inputFile);

                var    fileInfo     = new System.IO.FileInfo(inputFile);
                string tessDataPath = string.Format("{0}", tessBaseAPI.GetDatapath());
                string outputName   = fileInfo.FullName.Replace(fileInfo.Extension, string.Empty); //input name.pdf

                // ensure the data directory exist
                if (!System.IO.Directory.Exists(tessDataPath))
                {
                    throw new System.IO.DirectoryNotFoundException(string.Format("tessData path {0} does not exist", tessDataPath));
                }

                // call pdf renderer and export pdf
                using (var pdfRenderer = new PdfRenderer(outputName, tessDataPath, false))
                {
                    pdfRenderer.BeginDocument("tesseract.net searchable Pdf generation");
                    pdfRenderer.AddImage(tessBaseAPI);
                    pdfRenderer.EndDocument();
                }
            }

            pix.Dispose();
            return(tessBaseAPI.GetIterator());
        }
Example #2
0
        static void example4()
        {
            string               dataPath  = "./tessdata/";
            string               language  = "eng";
            string               inputFile = "./input.png";
            OcrEngineMode        oem       = OcrEngineMode.DEFAULT;
            PageSegmentationMode psm       = PageSegmentationMode.AUTO_OSD;

            TessBaseAPI tessBaseAPI = new TessBaseAPI();

            // Initialize tesseract-ocr
            if (!tessBaseAPI.Init(dataPath, language, oem))
            {
                throw new Exception("Could not initialize tesseract.");
            }

            // Set the Page Segmentation mode
            tessBaseAPI.SetPageSegMode(psm);

            // Set the input image
            Pix pix = tessBaseAPI.SetImage(inputFile);

            // Recognize image
            tessBaseAPI.Recognize();

            //ensure input name is set
            tessBaseAPI.SetInputName(inputFile);

            var    fileInfo     = new System.IO.FileInfo(inputFile);
            string tessDataPath = tessBaseAPI.GetDatapath();
            string outputName   = fileInfo.FullName.Replace(fileInfo.Extension, string.Empty); //input name.pdf

            // call pdf renderer and export pdf
            using (var pdfRenderer = new PdfRenderer(outputName, tessDataPath, false))
            {
                pdfRenderer.BeginDocument("tesseract.net searchable Pdf generation");
                pdfRenderer.AddImage(tessBaseAPI);
                pdfRenderer.EndDocument();
            }

            tessBaseAPI.Dispose();
            pix.Dispose();
        }