예제 #1
0
        static void example1()
        {
            string               dataPath = ".//";
            string               language = "eng";
            OcrEngineMode        oem      = OcrEngineMode.DEFAULT;
            PageSegmentationMode psm      = PageSegmentationMode.AUTO_OSD;

            TessBaseAPI tessBaseAPI = new TessBaseAPI();

            // Initialize tesseract-ocr
            if (!tessBaseAPI.Init(dataPath, language, oem))
            {
                throw new Exception("Could not initialize tesseract.");
            }

            // Set the Page Segmentation mode
            tessBaseAPI.SetPageSegMode(psm);
        }
예제 #2
0
        static void example4()
        {
            string               dataPath  = "./tessdata/";
            string               language  = "eng";
            string               inputFile = "./input.png";
            OcrEngineMode        oem       = OcrEngineMode.DEFAULT;
            PageSegmentationMode psm       = PageSegmentationMode.AUTO_OSD;

            TessBaseAPI tessBaseAPI = new TessBaseAPI();

            // Initialize tesseract-ocr
            if (!tessBaseAPI.Init(dataPath, language, oem))
            {
                throw new Exception("Could not initialize tesseract.");
            }

            // Set the Page Segmentation mode
            tessBaseAPI.SetPageSegMode(psm);

            // Set the input image
            Pix pix = tessBaseAPI.SetImage(inputFile);

            // Recognize image
            tessBaseAPI.Recognize();

            //ensure input name is set
            tessBaseAPI.SetInputName(inputFile);

            var    fileInfo     = new System.IO.FileInfo(inputFile);
            string tessDataPath = tessBaseAPI.GetDatapath();
            string outputName   = fileInfo.FullName.Replace(fileInfo.Extension, string.Empty); //input name.pdf

            // call pdf renderer and export pdf
            using (var pdfRenderer = new PdfRenderer(outputName, tessDataPath, false))
            {
                pdfRenderer.BeginDocument("tesseract.net searchable Pdf generation");
                pdfRenderer.AddImage(tessBaseAPI);
                pdfRenderer.EndDocument();
            }

            tessBaseAPI.Dispose();
            pix.Dispose();
        }
예제 #3
0
        static void example3()
        {
            string dataPath = "./tessdata/";
            //string language = "eng";
            string               language  = "chi_sim";
            string               inputFile = "./input.png";
            OcrEngineMode        oem       = OcrEngineMode.DEFAULT;
            PageSegmentationMode psm       = PageSegmentationMode.AUTO_OSD;

            TessBaseAPI tessBaseAPI = new TessBaseAPI();

            // Initialize tesseract-ocr
            if (!tessBaseAPI.Init(dataPath, language, oem))
            {
                throw new Exception("Could not initialize tesseract.");
            }

            // Set the Page Segmentation mode
            tessBaseAPI.SetPageSegMode(psm);

            // Set the input image
            Pix pix = tessBaseAPI.SetImage(inputFile);

            // Recognize image
            tessBaseAPI.Recognize();

            ResultIterator resultIterator = tessBaseAPI.GetIterator();

            // extract text from result iterator
            StringBuilder     stringBuilder     = new StringBuilder();
            PageIteratorLevel pageIteratorLevel = PageIteratorLevel.RIL_PARA;

            do
            {
                stringBuilder.Append(resultIterator.GetUTF8Text(pageIteratorLevel));
            } while (resultIterator.Next(pageIteratorLevel));

            tessBaseAPI.Dispose();
            pix.Dispose();
        }
예제 #4
0
        static void Main(string[] args)
        {
            if (args.Length == 0)
            {
                printUsage("");
                return;
            }
            if ((args.Length == 1 && args[0] == "-v") || (args.Length == 1 && args[0] == "--version"))
            {
                Console.WriteLine(string.Format("tesseract-{0}", TessBaseAPI.Version()));
                Console.ReadKey();
                return;
            }

            string lang   = "eng";
            string image  = "";
            string output = "";

            PageSegMode pagesegmode = PageSegMode.PSM_AUTO;
            int         arg         = 0;
            int         argc        = args.Length;

            while (arg < argc && (output == string.Empty || args[arg].StartsWith("-")))
            {
                if (args[arg] == "-l" && arg + 1 < argc)
                {
                    lang = args[arg + 1];
                    ++arg;
                }
                else if (args[arg] == "-psm" && arg + 1 < argc)
                {
                    //assign pagesegmode

                    ++arg;
                }
                else if (image == string.Empty)
                {
                    image = args[arg];
                }
                else if (output == string.Empty)
                {
                    output = args[arg];
                }
                ++arg;
            }

            if (string.IsNullOrEmpty(output))
            {
                printUsage(args[0]);
                return;
            }

            TessBaseAPI api = new TessBaseAPI();

            api.SetOutputName(output);
            api.SetPageSegMode(pagesegmode);
            api.Init(args[0],
                     lang,
                     OcrEngineMode.OEM_DEFAULT,
                     args[arg - 1],
                     argc - arg,
                     null,
                     null,
                     false);
            Console.WriteLine(string.Format("Tesseract Open Source OCR Engine v{0} with Leptonica\n",
                                            TessBaseAPI.Version()));

            string text_out = string.Empty;

            if (!api.ProcessPages(image, null, 0, ref text_out))
            {
                Console.WriteLine("Error during processing.\n");
            }

            bool output_hocr = false;

            api.GetBoolVariable("tessedit_create_hocr", ref output_hocr);
            bool output_box = false;

            api.GetBoolVariable("tessedit_create_boxfile", ref output_box);
            string outfile = output;

            outfile += output_hocr ? ".html" : output_box ? ".box" : ".txt";

            File.WriteAllText(outfile, text_out);
        }
예제 #5
0
        static void Main(string[] args)
        {
            if (args.Length == 0)
            {
                printUsage("");
                return;
            }
            if ((args.Length == 1 && args[0] == "-v") || (args.Length == 1 && args[0] == "--version"))
            {
                Console.WriteLine(string.Format("tesseract-{0}",TessBaseAPI.Version()));
                Console.ReadKey();
                return;
            }

            string lang = "eng";
            string image = "";
            string output = "";

            PageSegMode pagesegmode = PageSegMode.PSM_AUTO;
            int arg = 0;
            int argc = args.Length;
            while (arg < argc && (output == string.Empty || args[arg].StartsWith("-")))
            {
                if (args[arg] == "-l" && arg + 1 < argc)
                {
                    lang = args[arg + 1];
                    ++arg;
                }
                else if (args[arg] == "-psm" && arg + 1 < argc)
                {
                    //assign pagesegmode

                    ++arg;
                }
                else if (image == string.Empty)
                {
                    image = args[arg];
                }
                else if (output == string.Empty)
                {
                    output = args[arg];
                }
                ++arg;
            }

            if (string.IsNullOrEmpty(output))
            {
                printUsage(args[0]);
                return;
            }

            TessBaseAPI api = new TessBaseAPI();

            api.SetOutputName(output);
            api.SetPageSegMode(pagesegmode);
            api.Init(args[0],
                lang,
                OcrEngineMode.OEM_DEFAULT,
                args[arg-1],
                argc-arg,
                null,
                null,
                false);
            Console.WriteLine(string.Format("Tesseract Open Source OCR Engine v{0} with Leptonica\n",
                TessBaseAPI.Version()));

            string text_out=string.Empty;
            if (!api.ProcessPages(image, null, 0, ref text_out))
            {
                Console.WriteLine("Error during processing.\n");
            }

            bool output_hocr = false;
            api.GetBoolVariable("tessedit_create_hocr", ref output_hocr);
            bool output_box = false;
            api.GetBoolVariable("tessedit_create_boxfile", ref output_box);
            string outfile = output;

            outfile += output_hocr ? ".html" : output_box ? ".box" : ".txt";

            File.WriteAllText(outfile, text_out);
        }