public static void Main(string[] args) { var testImagePath = "./phototest.tif"; if (args.Length > 0) { testImagePath = args[0]; } try { var logger = new FormattedConsoleLogger(); var resultPrinter = new ResultPrinter(logger); using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) { using (var img = Pix.LoadFromFile(testImagePath)) { using (logger.Begin("Process image")) { var i = 1; using (var page = engine.Process(img)) { var text = page.GetText(); logger.Log("Text: {0}", text); logger.Log("Mean confidence: {0}", page.GetMeanConfidence()); using (var iter = page.GetIterator()) { iter.Begin(); do { if (i % 2 == 0) { using (logger.Begin("Line {0}", i)) { do { using (logger.Begin("Word Iteration")) { if (iter.IsAtBeginningOf(PageIteratorLevel.Block)) { logger.Log("New block"); } if (iter.IsAtBeginningOf(PageIteratorLevel.Para)) { logger.Log("New paragraph"); } if (iter.IsAtBeginningOf(PageIteratorLevel.TextLine)) { logger.Log("New line"); } logger.Log("word: " + iter.GetText(PageIteratorLevel.Word)); ResultIterator testiter = iter.Clone (); logger.Log("from clone: " + testiter.GetText(PageIteratorLevel.Word)); } } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word)); } } i++; } while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine)); } } } } } } catch (Exception e) { Trace.TraceError(e.ToString()); Console.WriteLine("Unexpected Error: " + e.Message); Console.WriteLine("Details: "); Console.WriteLine(e.ToString()); } Console.Write("Press any key to continue . . . "); Console.ReadKey(true); }
/// <summary> /// pulls marked from param inputFilePath image /// </summary> /// <param name="fileBytes">Byte array of file data</param> /// <returns>encodes text</returns> public static string getText(byte[] fileBytes) { string text = "", rootPath = HostingEnvironment.ApplicationPhysicalPath; BitmapToPixConverter b = new BitmapToPixConverter(); using (Stream memStream = new MemoryStream(fileBytes)) using (Bitmap image = (Bitmap)Image.FromStream(memStream)) using (TesseractEngine ocr = new TesseractEngine(rootPath, "eng", EngineMode.TesseractOnly)) { image.SetResolution(300, 300); ocr.SetVariable("tessedit_char_whitelist", "\\/|#"); Pix p = b.Convert(image); p = p.ConvertRGBToGray(); Page page = ocr.Process(p, PageSegMode.Auto); text = page.GetText(); p.Dispose(); page.Dispose(); } return text; }
public string OCRTiff(string path) { String PageText; String Result = String.Empty; try { using (TesseractEngine TE = new TesseractEngine(TessDataPath, "eng", EngineMode.Default)) { using (Pix Image = Pix.LoadFromFile(TiffPath)) { PageText = TE.Process(Image).GetText(); } } String TextFilePath = string.Format("{0}{1}", TiffPath.Substring(0, TiffPath.Length - 4), ".txt"); File.WriteAllText(TextFilePath, PageText); Result = "Sucess!"; } catch (Exception Exception) { Result = String.Format("Error: {0}", Exception.Message); } return Result; }
private OCREngine() { m_ocrEngine = new TesseractEngine(Configuration.Instance.OCR.TesseractDataPath, "eng", EngineMode.TesseractAndCube); m_jsonCardsFilePath = Configuration.Instance.JSONCardsFilePath; m_cards = new List<string>(); }
public void imageOCRTest(FileDialog openImageDialog ) { DialogResult ImageResult = openImageDialog.ShowDialog(); if (ImageResult == DialogResult.OK) { String testImagePath = openImageDialog.FileName; try { using (var tEngine = new TesseractEngine("C:\\Users\\yeghiakoronian\\Documents\\visual studio 2013\\Projects\\NLP Genre Recogition\\NLP Genre Recogition\\tessdata", "eng", EngineMode.Default)) //creating the tesseract OCR engine with English as the language { using (var img = Pix.LoadFromFile(testImagePath)) // Load of the image file from the Pix object which is a wrapper for Leptonica PIX structure { using (var page = tEngine.Process(img)) //process the specified image { String text = page.GetText(); //Gets the image's content as plain text. MessageBox.Show(text); getGenreOfSong(text); // Console.ReadKey(); } } } } catch (IOException) { MessageBox.Show("Woops Cant Open The File", "COMP 6781: NLP", MessageBoxButtons.OK, MessageBoxIcon.Exclamation); } } }
public static string ReadText(string inputImagePath) { var srcImage = new Mat(inputImagePath); List <Bitmap> bitmap; var bbox = Cv2.SelectROIs("Select Text Boxes. Enter for confirm, Esc for finish", srcImage); Cv2.DestroyWindow("Select Text Boxes. Enter for confirm, Esc for finish"); if (bbox == null || bbox.Length == 0) { bitmap = new List <Bitmap>() { OpenCvSharp.Extensions.BitmapConverter.ToBitmap(srcImage) } } ; else { bitmap = bbox.Select(x => OpenCvSharp.Extensions.BitmapConverter.ToBitmap(srcImage.SubMat(x))).ToList(); } using (var ocr = new Tesseract.TesseractEngine(@"../../../../OpenCVHavrylov/Data/tessdata", "eng")) { var res = ""; int i = 1; foreach (var img in bitmap) { using (var page = ocr.Process(img)) { res += $"[Block {i}]: {page.GetText()}"; i++; } } return(res); } }
private string BeginRecognize(string pathToImage) { try { if (!File.Exists(pathToImage)) return "Image not found"; using (var engine = new TesseractEngine(_pathToTestData, Language, EngineMode.Default)) { using (var img = Pix.LoadFromFile(pathToImage)) { using (var page = engine.Process(img)) { var resultText = page.GetText(); if (!String.IsNullOrEmpty(resultText)) return resultText; } } } } catch (Exception e) { Trace.TraceError(e.ToString()); MessageBox.Show(e.StackTrace); return null; } return null; }
private static void Main(string[] args) { Console.Write("Input image path:"); string input = Console.ReadLine(); Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); T.TesseractEngine engine = new T.TesseractEngine("tessdata", "tur", T.EngineMode.TesseractAndLstm); stopwatch.Stop(); Console.WriteLine("Engine creation :" + stopwatch.ElapsedMilliseconds.ToString() + " ms"); stopwatch.Restart(); T.Pix image = T.Pix.LoadFromFile(input); T.Page page = engine.Process(image); string text = page.GetText(); stopwatch.Stop(); Console.WriteLine("Process time :" + stopwatch.ElapsedMilliseconds.ToString() + " ms"); Console.Write("Result: " + text); Console.Read(); }
/// <summary> /// Constructor /// </summary> public CardRecognizer() { //Initialize common filter sequence , this sequence generally will be applied commonSeq = new FiltersSequence(); commonSeq.Add(Grayscale.CommonAlgorithms.BT709); commonSeq.Add(new BradleyLocalThresholding()); commonSeq.Add(new DifferenceEdgeDetector()); //Load Templates From Resources , //Templates will be used for template matching j = miranda.ui.Properties.Resources.J; k = miranda.ui.Properties.Resources.K; q = miranda.ui.Properties.Resources.Q; clubs = miranda.ui.Properties.Resources.Clubs; diamonds = miranda.ui.Properties.Resources.Diamonds; spades = miranda.ui.Properties.Resources.Spades; hearts = miranda.ui.Properties.Resources.Hearts; try { _engine = new TesseractEngine(@"./tessdata", "rus", EngineMode.Default); //_engine.SetVariable("tessedit_char_whitelist", "$.,0123456789"); } catch (Exception ex) { Trace.TraceError(ex.ToString()); Ex.Report(ex); } }
public WrapperTesseract(string path, string lng) { //Path should be same as TESSDATA folder //This path should always end with a "/" or "\", e.g., TESSDATA_PREFIX="/usr/share/tesseract-ocr/" ////// Emgu Tesseract(Tesseract3.1)/////// // _ocr = new Emgu.CV.OCR.Tesseract(path, lng, Emgu.CV.OCR.Tesseract.OcrEngineMode.OEM_TESSERACT_CUBE_COMBINED); // _ocr.SetVariable("tessedit_char_whitelist", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"); // _ocr.SetVariable("user_words_suffix", "user-words"); // _ocr.SetVariable("chop_enable", "1"); // _ocr.SetVariable("tessedit_char_blacklist", "¢§+~»~`!@#$%^&*()_+-={}[]|\\:\";\'<>?,./"); ////// Emgu Tesseract(Tesseract3.1)/////// Log.WriteLine("Setting Tesseract traindata and language"); ////// Tesseract 3.2 //////// if (lng=="eng") _engine = new TesseractEngine(@"./tessdata3/", lng, EngineMode.TesseractAndCube); else _engine = new TesseractEngine(@"./tessdata3", lng, EngineMode.Default); Log.WriteLine("Tesseract Version: " + _engine.Version); //_engine.SetVariable("tessedit_char_whitelist", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"); //_engine.SetVariable("tessedit_char_blacklist", "¢§+~»~`!@#$%^&*()_+-={}[]|\\:\";\'<>?,./"); ////// Tesseract 3.2 //////// }
/// <summary> /// 文字認識 /// </summary> private void AnalyzeString() { // Bitmapを処理、適度に切り取った方がOCRを扱いやすい System.Drawing.Rectangle rect = new System.Drawing.Rectangle(20, 90, 450, 100); SrcImg = SrcImg.Clone(rect, SrcImg.PixelFormat); // 確認用の画面に画像を出力 this.Dispatcher.Invoke(() => { this._Image_crop.Source = ConvertBitmapToIS(SrcImg); }); // 以下OCR string langPath = @"C:\tessdata"; string lngStr = "eng"; //画像ファイルでテストするならパス指定 //var img = new Bitmap(@"C:\test.jpg"); var img = SrcImg; using (var tesseract = new Tesseract.TesseractEngine(langPath, lngStr)) { // OCRの実行 Pix pix = PixConverter.ToPix(img); Tesseract.Page page = tesseract.Process(pix); //表示 Console.WriteLine(page.GetText()); Console.ReadLine(); // err? } }
public static string ocr(Bitmap CropedPrescription, int x, int y, int width, int height) { string text; // var PrescriptionImage = CropedPrescription; using (var engine = new TesseractEngine(@"C:\Program Files\Tesseract-OCR\tessdata\", "kor", EngineMode.Default)) { engine.SetVariable("tessedit_char_whitelist", "0123456789-."); // 숫자와 . - 만 인식하도록 설정 var roi = new Rect(x, y, width, height); // region of interest 좌표를 생성하고 // using (var img = Pix.LoadFromFile(PrescriptionImage)) // { using (var page = engine.Process(CropedPrescription, roi, PageSegMode.SingleLine)) { text = page.GetText(); System.IO.File.WriteAllText(@"C:\Program Files\PLOCR\textrecognition.html", text); // 인식한 글자를 html 형식으로 저장한다. // text = TextProcess.RemoveWhiteSpace(text); // Console.WriteLine("인식한 문자: \n{0}\n", text); // Console.Read(); } // } } return text; }
public override string ExtractContent(int? pageNumber) { var dataPath = Config.TessDataPath; if (!Directory.Exists(dataPath)) { throw new ArgumentException("Path does not exist or access is denied.", nameof(dataPath)); } using (var engine = new TesseractEngine(dataPath, "eng", EngineMode.Default)) { using (var memoryStream = new MemoryStream(Buffer)) { // have to load Pix via a bitmap since Pix doesn't support loading a stream. using (var image = new Bitmap(memoryStream)) { using (var pix = PixConverter.ToPix(image)) { using (var page = engine.Process(pix)) { return page.GetText(); } } } } } }
/// <summary> /// Charactor Recognition Proccess /// </summary> /// <param name="imgPath"></param> /// <param name="dataDirPath"></param> /// <param name="lang"></param> /// <returns></returns> private static string Character_recognition_Process(string imgPath, string dataDirPath, string lang) { if (!System.IO.File.Exists(imgPath)) { //Console.Error.WriteLine("画像のパスに画像が見つかりませんでした"); return("画像のパスに画像が見つかりませんでした"); } //Reading language learning data string traindedDataPath = System.IO.Path.Combine(dataDirPath, lang + ".traineddata"); if (!System.IO.File.Exists(traindedDataPath)) { //Console.Error.WriteLine(lang + ".traineddataがみつかりませんでした"); return(lang + ".traineddataがみつかりませんでした"); } // Character recognition using (TesseractEngine tesseract = new Tesseract.TesseractEngine(dataDirPath, lang)) { // Import an image file var img = new System.Drawing.Bitmap(imgPath); // Specifying a character tesseract.SetVariable("tessedit_char_whitelist", "1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.,"); // PERFORM OCR Tesseract.Page page = tesseract.Process(img); //Console.WriteLine(page.GetText()); return(page.GetText()); } }
public static string GetOCRText(string engpath, Image imgdata) { string rst = ""; try { Directory.SetCurrentDirectory(System.IO.Path.GetDirectoryName(Environment.GetCommandLineArgs()[0])); using (var engine = new TesseractEngine(engpath, "eng", EngineMode.Default)) { using (Bitmap image = new Bitmap(imgdata)) { using (var pix = PixConverter.ToPix(image)) { using (var page = engine.Process(pix)) { rst = page.GetText().Trim(); } } } } } catch (System.Exception ex) { ScratchModel.WriteLogFile("----Error----", "------ GetOCRText1 -------", ""); } return rst; }
private void btnOCR_Click(object sender, EventArgs e) { try { string testImagePath = this.txtFile.Text.Trim(); using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) { using (var img = Pix.LoadFromFile(testImagePath)) { System.Diagnostics.Debug.WriteLine("Process image"); var i = 1; using (var page = engine.Process(img)) { var text = page.GetText(); System.Diagnostics.Debug.WriteLine("Text: {0}", text); System.Diagnostics.Debug.WriteLine("Mean confidence: {0}", page.GetMeanConfidence()); using (var iter = page.GetIterator()) { iter.Begin(); do { if (i % 2 == 0) { System.Diagnostics.Debug.WriteLine("Line {0}", i); do { System.Diagnostics.Debug.WriteLine("Word Iteration"); if (iter.IsAtBeginningOf(PageIteratorLevel.Block)) { System.Diagnostics.Debug.WriteLine("New block"); } if (iter.IsAtBeginningOf(PageIteratorLevel.Para)) { System.Diagnostics.Debug.WriteLine("New paragraph"); } if (iter.IsAtBeginningOf(PageIteratorLevel.TextLine)) { System.Diagnostics.Debug.WriteLine("New line"); } System.Diagnostics.Debug.WriteLine("word: " + iter.GetText(PageIteratorLevel.Word)); } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word)); } i++; } while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine)); } } } } } catch (Exception ex) { Console.WriteLine("Unexpected Error: " + ex.Message); Console.WriteLine("Details: "); Console.WriteLine(ex.ToString()); } }
// 辨識驗證碼 private string parseCaptchaStr(Image image) { Tesseract.TesseractEngine ocr = new Tesseract.TesseractEngine(@"tessdata\", "yzufont"); ocr.SetVariable("tessedit_char_whitelist", "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"); Page tmpPage = ocr.Process(new Bitmap(image), pageSegMode: ocr.DefaultPageSegMode); return(Regex.Replace(tmpPage.GetText(), @"\s+", String.Empty)); }
internal Page(TesseractEngine engine, Pix image, string imageName, Rect regionOfInterest, PageSegMode pageSegmentMode) { Engine = engine; Image = image; ImageName = imageName; RegionOfInterest = regionOfInterest; PageSegmentMode = pageSegmentMode; }
/// <summary> /// Creates OCR engine /// </summary> /// <returns></returns> private Tesseract.TesseractEngine CreateEngine() { Tesseract.TesseractEngine tesseractEngine = new Tesseract.TesseractEngine("tessdata", GetLanguageNamesString(RecognitionLanguage1, RecognitionLanguage2), EngineMode.ToEngineMode()); if (SearchForDigitsOnly) { tesseractEngine.SetVariable("tessedit_char_whitelist", "0123456789"); } return(tesseractEngine); }
/// <summary> /// Processes the specified image path. /// </summary> /// <param name="bitmap">Bitmap.</param> /// <returns></returns> public string Process(Bitmap bitmap) { using (Tesseract.TesseractEngine engine = CreateEngine()) { engine.DefaultPageSegMode = (PageSegMode)DefaultSegmentationMode; using (Page page = engine.Process(bitmap)) return(page.GetText()); } }
public void TestInitialize() { // OCRを行うオブジェクトの生成 // 言語データの場所と言語名を引数で指定する var path = testPath + @"\tessdata"; // 英語なら"eng" 「○○.traineddata」の○○の部分 tesseractJpn = new TesseractEngine(path, "jpn"); tesseractEng = new TesseractEngine(path, "eng"); }
//// Do training for all existing trained Data public SVM(string TrainedDataInputFile) { _engine = new TesseractEngine(@"./tessdata3", "eng", EngineMode.TesseractAndCube); _engine.SetVariable("tessedit_char_whitelist", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"); _engine.SetVariable("tessedit_char_blacklist", "¢§+~»~`!@#$%^&*()_+-={}[]|\\:\";\'<>?,./"); string[] TrainedData = Directory.GetFiles(TrainedDataInputFile, "*.png"); double[][] inputs = new double[TrainedData.Length][]; /// double[] InputArray = new double[784]; int[] Outputs = new int[TrainedData.Length]; for (int i = 0; i < TrainedData.Length; i++) { string filename = Path.GetFileNameWithoutExtension(TrainedData[i]); Bitmap TrainingImage = new Bitmap(TrainedData[i]); string[] split = filename.Split('.'); for (int j = 0; j < 28; j++) { for (int k = 0; k < 28; k++) { if ((!TrainingImage.GetPixel(j, k).Name.Equals("ffffffff"))) InputArray[j * 28 + k] = 1; else InputArray[j * 28 + k] = 0; } } inputs[i] = InputArray; Outputs[i] = Convert.ToInt32(split[0]); InputArray = new double[784]; } IKernel kernel; kernel = new Polynomial(2, 0); ksvm = new MulticlassSupportVectorMachine(784, kernel, 2); MulticlassSupportVectorLearning ml = new MulticlassSupportVectorLearning(ksvm, inputs, Outputs); double complexity = 1; ///// set these three parameters Carefuly later double epsilon = 0.001; double tolerance = 0.2; ml.Algorithm = (svm, classInputs, classOutputs, i, j) => { var smo = new SequentialMinimalOptimization(svm, classInputs, classOutputs); smo.Complexity = complexity; /// Cost parameter for SVM smo.Epsilon = epsilon; smo.Tolerance = tolerance; return smo; }; // Train the machines. It should take a while. double error = ml.Run(); }
private string OCR(Bitmap b) { string res = ""; using (var engine = new TesseractEngine(@"tessdata", "eng", EngineMode.Default)) { engine.SetVariable("tessedit_char_whitelist", "1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZ"); engine.SetVariable("tessedit_unrej_any_wd", true); using (var page = engine.Process(b, PageSegMode.SingleLine)) res = page.GetText(); } return res; }
public static async Task init() { try { string exe_dir = (new FileInfo(System.Reflection.Assembly.GetEntryAssembly().Location)).Directory.FullName; string tessdata_dir = Path.Combine(exe_dir, "tessdata"); engine = new TesseractEngine(tessdata_dir, "eng", EngineMode.TesseractAndCube); } catch (Exception e) { Trace.TraceError(e.ToString()); Console.WriteLine("Unexpected Error: " + e.Message); Console.WriteLine("Details: "); Console.WriteLine(e.ToString()); } }
private void LoadButton_OnClick(object sender, RoutedEventArgs e) { var fileDialog = new OpenFileDialog(); var showDialog = fileDialog.ShowDialog(); if (showDialog != null && showDialog.Value) { //@"D:\HandwriteExpressionRecognition\HandwriteExpressionRecognition\HandwriteExpressionRecognition.Desktop\TestData\123456789.bmp" var tessEngine = new Tesseract.TesseractEngine("tessdata", "eng", EngineMode.Default); var page = tessEngine.Process( Pix.LoadFromFile(fileDialog.FileName), PageSegMode.Auto); TextBlock.Text = page.GetText(); } }
private static string RecognizeTextFromImage(Image image) { string recognizedText; using (var ocrEngine = new TesseractEngine(@"./tessdata", "eng")) using (Bitmap bitmap = new Bitmap(image)) using (Bitmap monochromeBitmap = ImageUtils.Convert24BitToMonochrome(bitmap)) using (var page = ocrEngine.Process(monochromeBitmap)) { recognizedText = page.GetText(); Trace.TraceInformation("Text recognized with mean confidence: {0:N3}", page.GetMeanConfidence()); } return recognizedText; }
public static string AnalyzeFileHOCR(string p, string fileName) { var data = System.IO.Path.Combine(p, "tessdata"); using (var engine = new TesseractEngine(data, "fra", EngineMode.Default)) { using (var img = Pix.LoadFromFile(fileName)) { using (var page = engine.Process(img)) { return page.GetHOCRText(0); } } } }
public vhConsole(IConfig cfg, String uHash) { vhConsole.uHash = uHash; config = cfg; //var info = MyInfo().Result; //vhConsole.uHash = (string)info["uhash"]; if (!Directory.Exists(cfg.tessdata)) { cfg.logger.Log("Cannot find tessdata path: {0}", Path.GetFullPath(cfg.tessdata)); //throw new Exception(); } engine = new Tesseract.TesseractEngine(cfg.tessdata, "eng"); }
static void Main(string[] args) { var image = new Bitmap(@"C:\\Users\\E566281\\Documents\\2007_AEB_E-pdf-1.pdf"); var ocr = new Tesseract.TesseractEngine("C:\\Users\\E566281\\Documents\\2007_AEB_E-pdf-1.pdf", "eng"); var result = ocr.Process(image, null); foreach (var s in result.GetHOCRText(1, false)) { Console.WriteLine(s); } //var result = ocr ( image, Rectangle.Empty); //foreach (tessnet2.Word word in result) //{ // Console.writeline(word.text); //} }
/// <summary> /// Gets all the numbers seperately from image. /// </summary> /// <param name="imagePath">The image path.</param> /// <returns></returns> public static List<int> GetAllNumbersFromImage(string imagePath) { var numbersList = new List<int>(); if (string.IsNullOrWhiteSpace(imagePath) == false && System.IO.File.Exists(imagePath) == true) { try { using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) { using (var img = Pix.LoadFromFile(imagePath)) { using (var page = engine.Process(img)) { if (string.IsNullOrWhiteSpace(page.GetText()) == false) { page.GetText() .Split(' ') .ToList() .ForEach(text => { if (string.IsNullOrWhiteSpace(text) == false) { var numberValue = 0; text = regex.Replace(text, string.Empty); //remove non alpha numeric characters text = text.ToLowerInvariant().Replace('i', '1').Replace('o', '0'); //to fix wrong interpretation if (int.TryParse(text, out numberValue) == false) { Console.WriteLine("Unable to process the file : " + imagePath); } else { numbersList.Add(numberValue); } } }); } } } } } catch (Exception ex) { Console.WriteLine("Unexpected Error: " + ex.Message); } } return numbersList; }
public static string imageToText(string tiffPath) { string text = ""; int stop = 0; try { using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) { using (var img = Pix.LoadFromFile(tiffPath)) { var i = 1; using (var page = engine.Process(img)) { text = page.GetText(); Debug.WriteLine("Mean confidence: {0}", page.GetMeanConfidence()); using (var iter = page.GetIterator()) { iter.Begin(); do { if (i % 2 == 0) { do { text = text + iter.GetText(PageIteratorLevel.Word); } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word)); } i++; } while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine)); iter.Dispose(); } } } } } catch (Exception e) { Debug.WriteLine("Unexpected Error: " + e.Message); Debug.WriteLine("Details: "); Debug.WriteLine(e.ToString()); } return text; }
private void BTN_Save_Click_1(object sender, EventArgs e) { string filename = path + @"\img_ocr" + count.ToString(); camera.Capture(filename); Bitmap img = (Bitmap)Bitmap.FromFile(filename + ".jpg"); img.RotateFlip(RotateFlipType.Rotate180FlipY); Tesseract.TesseractEngine tesseract = new Tesseract.TesseractEngine("./dataset", "eng", Tesseract.EngineMode.TesseractOnly); var page = tesseract.Process(img); listBox1.Items.Add(page.GetText()); img.Dispose(); File.Delete(filename + ".jpg"); count++; }
static void Main(string[] args) { while (true) { videoSource = new VideoCaptureDevice(new FilterInfoCollection(FilterCategory.VideoInputDevice)[0].MonikerString); videoSource.NewFrame += CaptureFrame; videoSource.Start(); Thread.Sleep(1000); videoSource.Stop(); var engine = new TesseractEngine(@"tessdata", "letsgodigital", EngineMode.Default); var image = new ElitechStc1000Image("test3.jpg", engine); var temp = image.GetTemperature(); Console.WriteLine(temp); try { var sensorEvent = new SensorEvent { Temperature = temp }; Console.WriteLine("{0} > Sending temperature: {1}", DateTime.Now, temp); var sink = new AzuresStreamAnalytics() .WithConnectionString(ConfigurationManager.AppSettings["EventHubSendConnection"]); sink.Send(sensorEvent); } catch (Exception exception) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("{0} > Exception: {1}", DateTime.Now, exception.Message); Console.ResetColor(); } Thread.Sleep(2000); } }
public string GetOCRText(string engpath, Image imgdata) { string rst = ""; using (var engine = new TesseractEngine(engpath, "eng", EngineMode.Default)) { using (Bitmap image = new Bitmap(imgdata)) { using (var pix = PixConverter.ToPix(image)) { using (var page = engine.Process(pix)) { //meanConfidenceLabel.InnerText = String.Format("{0:P}", page.GetMeanConfidence()); rst = page.GetText().Trim(); } } } } return rst; }
public void Test() { var testImagePath = "../../phototest.tif"; try { using (var engine = new TesseractEngine(@"../../tessdata", "eng", EngineMode.Default)) { using (var img = Pix.LoadFromFile(testImagePath)) { var i = 1; using (var page = engine.Process(img)) { var text = page.GetText(); using (var iter = page.GetIterator()) { iter.Begin(); do { if (i % 2 == 0) { do { Console.WriteLine("word: " + iter.GetText(PageIteratorLevel.Word)); } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word)); } i++; } while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine)); } } } } } catch (Exception e) { Console.WriteLine("Unexpected Error: " + e.Message); Console.WriteLine("Details: "); Console.WriteLine(e.ToString()); } }
public string GetText(Bitmap image) { using (var tesseract = new TesseractEngine(_dataPath, EngineLang, EngineMode.Default)) { tesseract.SetVariable("tessedit_write_images", true); using (var pix = PixConverter.ToPix(image)) { using (var page = tesseract.Process(pix)) { var text = RemoveSpecialCharacters(page.GetText()); #if DEBUG System.Diagnostics.Debug.WriteLine("Confibialidade: " + page.GetMeanConfidence()); System.Diagnostics.Debug.WriteLine("Placa: " + text); #endif return text; } } } }
public string DownloadAndProcessImage(Uri uri, Rect regionPercentage) { var engine = new TesseractEngine(_tessdataPath, "hun"); byte[] imageData; try { var client = new WebClient(); imageData = client.DownloadData(uri); StatisticsCollector.GetInstance().IncrementOutgoingWebRequestCount(); _log.Debug("Image downloaded from `" + uri + "`. Size: " + imageData.Length + " Byte(s)."); } catch (Exception e) { _log.Error("Failed to download image from `" + uri + "`. Error: e"); return ""; } var bitmap = new Bitmap(new MemoryStream(imageData)); _log.Debug("Image size: " + bitmap.Size); // Calculate image region var region = new Tesseract.Rect( (int) (regionPercentage.Left*bitmap.Width), (int) (regionPercentage.Top*bitmap.Height), (int) (regionPercentage.Width*bitmap.Width), (int) (regionPercentage.Height*bitmap.Height)); _log.Debug("OCR region: " + region); Page page = engine.Process(bitmap, region, PageSegMode.Auto); string text = page.GetText(); _log.Debug("Processed text length: " + text.Length); StatisticsCollector.GetInstance().IncrementOcredPageCount(); return text; }
// 특정 좌표 지역을 받아서 그 부분만 판독하는 함수 public static string hocr(Bitmap source, int x, int y, int width, int height) { string htext; // var PrescriptionImage = CropedPrescription; using (var engine = new TesseractEngine(@"C:\Program Files\Tesseract-OCR\tessdata\", "kor", EngineMode.Default)) { // using (var img = Pix.LoadFromFile(PrescriptionImage) // { var roi = new Rect(x, y, width, height); // region of interest 좌표를 생성하고 using (var page = engine.Process(source, roi, PageSegMode.Auto)) { htext = page.GetHOCRText(3); System.IO.File.WriteAllText(@"C:\Program Files\PLOCR\textrecognition.html", htext); // 인식한 글자를 html 형식으로 저장한다. // Console.WriteLine(htext); // Console.Read(); } // } return htext; } }
/// <summary> /// Método responsavel por realizar a quebra do captcha /// </summary> /// <param name="image">Byte Array da imagem captcha</param> /// <param name="config">Objeto contendo informações relevantes para solucionar o captcha</param> /// <returns>String com a solução do captcha</returns> public string Break(byte[] image, CaptchaConfig config) { var tempfilename = Path.GetTempFileName(); File.WriteAllBytes(tempfilename, image); var stbResult = new StringBuilder(); using (var engine = new TesseractEngine(ConfigManager.GetInstance().CAMINHO_ARQUIVO_IDIOMA, IDIOMA_PORTUGUES, EngineMode.Default)) using (var img = Pix.LoadFromFile(tempfilename)) { var i = 1; using (var page = engine.Process(img, null)) { stbResult.AppendLine(page.GetText()); if (config.NumeroDePalavras > 1) { using (var iter = page.GetIterator()) { iter.Begin(); do { if (i % 2 == 0) { do { stbResult.AppendLine(iter.GetText(PageIteratorLevel.Word)); } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word)); } i++; } while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine)); } } } } return stbResult.ToString().Replace("\r", string.Empty).Replace("\n", string.Empty); ; }
private void OcrRecognize(string path) { _tesseractEngine = new TesseractEngine(tessData, Lang, EngineMode.Default); _words = new List<Line>(); var img = Pix.LoadFromFile(path); var page = _tesseractEngine.Process(img); var text = page.GetText(); using (var iter = page.GetIterator()) { iter.Begin(); Rect rect = new Rect(); do { iter.TryGetBoundingBox(PageIteratorLevel.TextLine, out rect); _words.Add(new Line(rect.X1, rect.Y1, rect.Width, rect.Height, iter.GetText(PageIteratorLevel.TextLine),path)); //System.Diagnostics.Debug.WriteLine("X:{0}, Y:{1}", rect.X1, rect.Y1); } while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.TextLine)); } img.Dispose(); page.Dispose(); _tesseractEngine.Dispose(); }
public static string AnalyzeFile(string key, string p, string fileName) { var data = System.IO.Path.Combine(p, "tessdata"); using (var engine = new TesseractEngine(data, "fra", EngineMode.Default)) { string bitmapPath = System.IO.Path.Combine(p, Guid.NewGuid().ToString() + System.IO.Path.GetExtension(fileName)); System.IO.MemoryStream ms = new System.IO.MemoryStream(Utility.Security.AES.DecryptFile(key, fileName)); System.Drawing.Bitmap bitmap = new System.Drawing.Bitmap(ms); bitmap = Treatment.SetContrast(bitmap, 20); bitmap.Save(bitmapPath); ms.Close(); using (var img = Pix.LoadFromFile(bitmapPath)) { using (var page = engine.Process(img)) { System.IO.File.Delete(bitmapPath); return page.GetText().Trim(); } } } }