public static void PerformOCR(string imageFile, string outputFile, string langCode, string pageSegMode, bool hocr) { IList <Image> imageList; try { imageList = ImageIOHelper.GetImageList(new FileInfo(imageFile)); OCR <Image> ocrEngine = new OCRImages(); ocrEngine.PageSegMode = pageSegMode; ocrEngine.Hocr = hocr; string result = ocrEngine.RecognizeText(imageList, langCode); // skip post-corrections if hocr output if (!hocr) { // postprocess to correct common OCR errors result = Processor.PostProcess(result, langCode); // correct common errors caused by OCR result = TextUtilities.CorrectOCRErrors(result); // correct letter cases result = TextUtilities.CorrectLetterCases(result); } using (StreamWriter sw = new StreamWriter(outputFile, false, new System.Text.UTF8Encoding())) { sw.Write(result); } } finally { imageList = null; } }
private void AutoOCR() { FileInfo imageFile = new FileInfo(queue.Dequeue()); IList <Image> imageList = ImageIOHelper.GetImageList(imageFile); if (imageList == null) { return; } try { this.statusForm.TextBox.BeginInvoke(new UpdateStatusEvent(this.WorkerUpdate), new Object[] { imageFile.FullName }); OCR ocrEngine = new OCR(); string result = ocrEngine.RecognizeText(imageList, -1, curLangCode); // postprocess to correct common OCR errors result = Processor.PostProcess(result, curLangCode); using (StreamWriter sw = new StreamWriter(Path.Combine(outputFolder, imageFile.Name + ".txt"), false, new System.Text.UTF8Encoding())) { sw.Write(result); } } catch (Exception e) { // Sets the UI culture to the selected language. Thread.CurrentThread.CurrentUICulture = new CultureInfo(selectedUILanguage); this.statusForm.TextBox.BeginInvoke(new UpdateStatusEvent(this.WorkerUpdate), new Object[] { " ** " + Properties.Resources.Cannotprocess + imageFile.Name + " **" }); Console.WriteLine(e.StackTrace); } }
private void backgroundWorker3_DoWork(object sender, DoWorkEventArgs e) { string selectedImageFile = (string)e.Argument; FileInfo imageFile = new FileInfo(selectedImageFile); if (selectedImageFile.ToLower().EndsWith(".pdf")) { string workingTiffFileName = null; try { workingTiffFileName = Utilities.ConvertPdf2Tiff(selectedImageFile); imageList = ImageIOHelper.GetImageList(new FileInfo(workingTiffFileName)); } finally { if (workingTiffFileName != null && File.Exists(workingTiffFileName)) { File.Delete(workingTiffFileName); } } } else { imageList = ImageIOHelper.GetImageList(imageFile); } e.Result = imageFile; }
private void backgroundWorkerLoad_DoWork(object sender, DoWorkEventArgs e) { string selectedImageFile = (string)e.Argument; FileInfo imageFile = new FileInfo(selectedImageFile); imageList = ImageIOHelper.GetImageList(imageFile); e.Result = imageFile; }
/// <summary> /// Performs OCR for bulk/batch and console operations. /// </summary> /// <param name="imageFile">Image file</param> /// <param name="outputFile">Output file without extension</param> /// <param name="langCode">language code</param> /// <param name="pageSegMode">page segmentation mode</param> /// <param name="outputFormat">format of output file. Possible values: <code>text</code>, <code>text+</code> (with post-corrections), <code>hocr</code></param> public static void PerformOCR(string imageFile, string outputFile, string langCode, string pageSegMode, string outputFormat) { IList <Image> imageList; try { DirectoryInfo dir = Directory.GetParent(outputFile); if (dir != null && !dir.Exists) { dir.Create(); } bool postprocess = "text+" == outputFormat; OCR <Image> ocrEngine = new OCRImages(); ocrEngine.PageSegMode = pageSegMode; ocrEngine.Language = langCode; ocrEngine.OutputFormat = outputFormat.Replace("+", string.Empty); // convert PDF to TIFF if (imageFile.ToLower().EndsWith(".pdf")) { imageFile = PdfUtilities.ConvertPdf2Tiff(imageFile); } imageList = ImageIOHelper.GetImageList(new FileInfo(imageFile)); string result = ocrEngine.RecognizeText(imageList, imageFile); // post-corrections for text+ output if (postprocess) { // postprocess to correct common OCR errors result = Processor.PostProcess(result, langCode); // correct common errors caused by OCR result = TextUtilities.CorrectOCRErrors(result); // correct letter cases result = TextUtilities.CorrectLetterCases(result); } //if (outputFormat == "pdf") // not yet supported //{ // byte[] bytes = null; // get the byte array // File.WriteAllBytes(outputFile, bytes); //} //else { string filename = outputFile + "." + outputFormat.Replace("+", string.Empty).Replace("text", "txt").Replace("hocr", "html"); using (StreamWriter sw = new StreamWriter(filename, false, new System.Text.UTF8Encoding())) { sw.Write(result); } } } finally { imageList = null; } }
/// <summary> /// Opens image file. /// </summary> /// <param name="selectedImageFile"></param> public void openFile(string selectedImageFile) { FileInfo imageFile = new FileInfo(selectedImageFile); if (selectedImageFile.ToLower().EndsWith(".pdf")) { try { string workingTiffFileName = Utilities.ConvertPdf2Tiff(selectedImageFile); FileInfo workingTiffFile = new FileInfo(workingTiffFileName); imageList = ImageIOHelper.GetImageList(workingTiffFile); workingTiffFile.Delete(); } catch (Exception e) { MessageBox.Show(this, e.Message + "\nPlease install Ghostscript and/or set system path to the library object.", strProgName, MessageBoxButtons.OK, MessageBoxIcon.Error); return; } } else { imageList = ImageIOHelper.GetImageList(imageFile); } if (imageList == null) { MessageBox.Show(this, Properties.Resources.Cannotloadimage, strProgName, MessageBoxButtons.OK, MessageBoxIcon.Error); return; } imageTotal = imageList.Count; imageIndex = 0; displayImage(); this.Text = imageFile.Name + " - " + strProgName; this.toolStripStatusLabel1.Text = null; this.pictureBox1.Deselect(); this.toolStripBtnFitImage.Enabled = true; this.toolStripBtnZoomIn.Enabled = true; this.toolStripBtnZoomOut.Enabled = true; this.toolStripBtnRotateCCW.Enabled = true; this.toolStripBtnRotateCW.Enabled = true; if (imageList.Count == 1) { this.toolStripBtnNext.Enabled = false; this.toolStripBtnPrev.Enabled = false; } else { this.toolStripBtnNext.Enabled = true; this.toolStripBtnPrev.Enabled = true; } setButton(); }
void PerformOCR(string[] args) { try { if (args[0] == "-?" || args[0] == "-help" || args.Length == 1 || args.Length == 3) { Console.WriteLine("Usage: vietocr imagefile outputfile [-l langcode]"); return; } FileInfo imageFile = new FileInfo(args[0]); FileInfo outputFile = new FileInfo(args[1]); if (!imageFile.Exists) { Console.WriteLine("Input file does not exist."); return; } string curLangCode; if (args.Length == 2) { curLangCode = "eng"; //default language } else { curLangCode = args[3]; } IList <Image> imageList = ImageIOHelper.GetImageList(imageFile); OCR <Image> ocrEngine = new OCRImages(); string result = ocrEngine.RecognizeText(imageList, curLangCode); // postprocess to correct common OCR errors result = Processor.PostProcess(result, curLangCode); // correct common errors caused by OCR result = TextUtilities.CorrectOCRErrors(result); // correct letter cases result = TextUtilities.CorrectLetterCases(result); using (StreamWriter sw = new StreamWriter(outputFile.FullName + ".txt", false, new System.Text.UTF8Encoding())) { sw.Write(result); } } catch (Exception e) { Console.WriteLine("Error: " + e.Message); } }
void loadImage(FileInfo imageFile) { try { imageList = ImageIOHelper.GetImageList(imageFile); imageTotal = imageList.Count; imageIndex = 0; } catch (Exception ncde) { Console.Write(ncde.Message); MessageBox.Show(Properties.Resources.Cannotloadimage); } }
/// <summary> /// Opens image file. /// </summary> /// <param name="selectedImageFile"></param> public void openFile(string selectedImageFile) { FileInfo imageFile = new FileInfo(selectedImageFile); imageList = ImageIOHelper.GetImageList(imageFile); if (imageList == null) { MessageBox.Show(Properties.Resources.Cannotloadimage); return; } imageTotal = imageList.Count; imageIndex = 0; displayImage(); this.Text = imageFile.Name + " - " + strProgName; this.toolStripStatusLabel1.Text = null; this.pictureBox1.Deselect(); this.toolStripBtnFitImage.Enabled = true; this.toolStripBtnZoomIn.Enabled = true; this.toolStripBtnZoomOut.Enabled = true; this.toolStripBtnRotateCCW.Enabled = true; this.toolStripBtnRotateCW.Enabled = true; if (imageList.Count == 1) { this.toolStripBtnNext.Enabled = false; this.toolStripBtnPrev.Enabled = false; } else { this.toolStripBtnNext.Enabled = true; this.toolStripBtnPrev.Enabled = true; } setButton(); }
void PerformOCR(string[] args) { try { if (args[0] == "-?" || args[0] == "-help" || args.Length == 1 || args.Length == 3 || args.Length == 5) { Console.WriteLine("Usage: vietocr imagefile outputfile [-l lang] [-psm pagesegmode]"); return; } FileInfo imageFile = new FileInfo(args[0]); FileInfo outputFile = new FileInfo(args[1]); if (!imageFile.Exists) { Console.WriteLine("Input file does not exist."); return; } string curLangCode = "eng"; //default language string psm = "3"; // or alternatively, "PSM_AUTO"; // 3 - Fully automatic page segmentation, but no OSD (default) if (args.Length == 4) { if (args[2].Equals("-l")) { curLangCode = args[3]; } else if (args[2].Equals("-psm")) { psm = args[3]; } } else if (args.Length == 6) { curLangCode = args[3]; psm = args[5]; try { Int16.Parse(psm); } catch { Console.WriteLine("Invalid input value."); return; } } IList <Image> imageList = ImageIOHelper.GetImageList(imageFile); OCR <Image> ocrEngine = new OCRImages(); ocrEngine.PSM = psm; string result = ocrEngine.RecognizeText(imageList, curLangCode); // postprocess to correct common OCR errors result = Processor.PostProcess(result, curLangCode); // correct common errors caused by OCR result = TextUtilities.CorrectOCRErrors(result); // correct letter cases result = TextUtilities.CorrectLetterCases(result); using (StreamWriter sw = new StreamWriter(outputFile.FullName + ".txt", false, new System.Text.UTF8Encoding())) { sw.Write(result); } } catch (Exception e) { Console.WriteLine("Error: " + e.Message); } }