static void example1() { string dataPath = ".//"; string language = "eng"; OcrEngineMode oem = OcrEngineMode.DEFAULT; PageSegmentationMode psm = PageSegmentationMode.AUTO_OSD; TessBaseAPI tessBaseAPI = new TessBaseAPI(); // Initialize tesseract-ocr if (!tessBaseAPI.Init(dataPath, language, oem)) { throw new Exception("Could not initialize tesseract."); } // Set the Page Segmentation mode tessBaseAPI.SetPageSegMode(psm); }
static void example4() { string dataPath = "./tessdata/"; string language = "eng"; string inputFile = "./input.png"; OcrEngineMode oem = OcrEngineMode.DEFAULT; PageSegmentationMode psm = PageSegmentationMode.AUTO_OSD; TessBaseAPI tessBaseAPI = new TessBaseAPI(); // Initialize tesseract-ocr if (!tessBaseAPI.Init(dataPath, language, oem)) { throw new Exception("Could not initialize tesseract."); } // Set the Page Segmentation mode tessBaseAPI.SetPageSegMode(psm); // Set the input image Pix pix = tessBaseAPI.SetImage(inputFile); // Recognize image tessBaseAPI.Recognize(); //ensure input name is set tessBaseAPI.SetInputName(inputFile); var fileInfo = new System.IO.FileInfo(inputFile); string tessDataPath = tessBaseAPI.GetDatapath(); string outputName = fileInfo.FullName.Replace(fileInfo.Extension, string.Empty); //input name.pdf // call pdf renderer and export pdf using (var pdfRenderer = new PdfRenderer(outputName, tessDataPath, false)) { pdfRenderer.BeginDocument("tesseract.net searchable Pdf generation"); pdfRenderer.AddImage(tessBaseAPI); pdfRenderer.EndDocument(); } tessBaseAPI.Dispose(); pix.Dispose(); }
static void example3() { string dataPath = "./tessdata/"; //string language = "eng"; string language = "chi_sim"; string inputFile = "./input.png"; OcrEngineMode oem = OcrEngineMode.DEFAULT; PageSegmentationMode psm = PageSegmentationMode.AUTO_OSD; TessBaseAPI tessBaseAPI = new TessBaseAPI(); // Initialize tesseract-ocr if (!tessBaseAPI.Init(dataPath, language, oem)) { throw new Exception("Could not initialize tesseract."); } // Set the Page Segmentation mode tessBaseAPI.SetPageSegMode(psm); // Set the input image Pix pix = tessBaseAPI.SetImage(inputFile); // Recognize image tessBaseAPI.Recognize(); ResultIterator resultIterator = tessBaseAPI.GetIterator(); // extract text from result iterator StringBuilder stringBuilder = new StringBuilder(); PageIteratorLevel pageIteratorLevel = PageIteratorLevel.RIL_PARA; do { stringBuilder.Append(resultIterator.GetUTF8Text(pageIteratorLevel)); } while (resultIterator.Next(pageIteratorLevel)); tessBaseAPI.Dispose(); pix.Dispose(); }
static void Main(string[] args) { if (args.Length == 0) { printUsage(""); return; } if ((args.Length == 1 && args[0] == "-v") || (args.Length == 1 && args[0] == "--version")) { Console.WriteLine(string.Format("tesseract-{0}", TessBaseAPI.Version())); Console.ReadKey(); return; } string lang = "eng"; string image = ""; string output = ""; PageSegMode pagesegmode = PageSegMode.PSM_AUTO; int arg = 0; int argc = args.Length; while (arg < argc && (output == string.Empty || args[arg].StartsWith("-"))) { if (args[arg] == "-l" && arg + 1 < argc) { lang = args[arg + 1]; ++arg; } else if (args[arg] == "-psm" && arg + 1 < argc) { //assign pagesegmode ++arg; } else if (image == string.Empty) { image = args[arg]; } else if (output == string.Empty) { output = args[arg]; } ++arg; } if (string.IsNullOrEmpty(output)) { printUsage(args[0]); return; } TessBaseAPI api = new TessBaseAPI(); api.SetOutputName(output); api.SetPageSegMode(pagesegmode); api.Init(args[0], lang, OcrEngineMode.OEM_DEFAULT, args[arg - 1], argc - arg, null, null, false); Console.WriteLine(string.Format("Tesseract Open Source OCR Engine v{0} with Leptonica\n", TessBaseAPI.Version())); string text_out = string.Empty; if (!api.ProcessPages(image, null, 0, ref text_out)) { Console.WriteLine("Error during processing.\n"); } bool output_hocr = false; api.GetBoolVariable("tessedit_create_hocr", ref output_hocr); bool output_box = false; api.GetBoolVariable("tessedit_create_boxfile", ref output_box); string outfile = output; outfile += output_hocr ? ".html" : output_box ? ".box" : ".txt"; File.WriteAllText(outfile, text_out); }
static void Main(string[] args) { if (args.Length == 0) { printUsage(""); return; } if ((args.Length == 1 && args[0] == "-v") || (args.Length == 1 && args[0] == "--version")) { Console.WriteLine(string.Format("tesseract-{0}",TessBaseAPI.Version())); Console.ReadKey(); return; } string lang = "eng"; string image = ""; string output = ""; PageSegMode pagesegmode = PageSegMode.PSM_AUTO; int arg = 0; int argc = args.Length; while (arg < argc && (output == string.Empty || args[arg].StartsWith("-"))) { if (args[arg] == "-l" && arg + 1 < argc) { lang = args[arg + 1]; ++arg; } else if (args[arg] == "-psm" && arg + 1 < argc) { //assign pagesegmode ++arg; } else if (image == string.Empty) { image = args[arg]; } else if (output == string.Empty) { output = args[arg]; } ++arg; } if (string.IsNullOrEmpty(output)) { printUsage(args[0]); return; } TessBaseAPI api = new TessBaseAPI(); api.SetOutputName(output); api.SetPageSegMode(pagesegmode); api.Init(args[0], lang, OcrEngineMode.OEM_DEFAULT, args[arg-1], argc-arg, null, null, false); Console.WriteLine(string.Format("Tesseract Open Source OCR Engine v{0} with Leptonica\n", TessBaseAPI.Version())); string text_out=string.Empty; if (!api.ProcessPages(image, null, 0, ref text_out)) { Console.WriteLine("Error during processing.\n"); } bool output_hocr = false; api.GetBoolVariable("tessedit_create_hocr", ref output_hocr); bool output_box = false; api.GetBoolVariable("tessedit_create_boxfile", ref output_box); string outfile = output; outfile += output_hocr ? ".html" : output_box ? ".box" : ".txt"; File.WriteAllText(outfile, text_out); }