//[System.Diagnostics.DebuggerNonUserCodeAttribute()] private void backgroundWorkerOcr_DoWork(object sender, DoWorkEventArgs e) { // Get the BackgroundWorker that raised this event. BackgroundWorker worker = sender as BackgroundWorker; OCRImageEntity entity = (OCRImageEntity)e.Argument; OCR <Image> ocrEngine = new OCRImages(); ocrEngine.PageSegMode = selectedPSM; ocrEngine.OcrEngineMode = selectedOEM; ocrEngine.Language = entity.Language; // Assign the result of the computation to the Result property of the DoWorkEventArgs // object. This is will be available to the RunWorkerCompleted eventhandler. //e.Result = ocrEngine.RecognizeText(entity.ClonedImages, entity.Lang, entity.Rect, worker, e); IList <Image> images = entity.ClonedImages; for (int i = 0; i < images.Count; i++) { if (worker.CancellationPending) { e.Cancel = true; break; } string result = ocrEngine.RecognizeText(((List <Image>)images).GetRange(i, 1), entity.Inputfilename, entity.Rect, worker, e); worker.ReportProgress(i, result); // i is not really percentage } }
/// <summary> /// Performs OCR for bulk/batch and console operations. /// </summary> /// <param name="imageFile">Image file</param> /// <param name="outputFile">Output file without extension</param> /// <param name="langCode">language code</param> /// <param name="pageSegMode">page segmentation mode</param> /// <param name="outputFormat">format of output file. Possible values: <code>text</code>, <code>text+</code> (with post-corrections), <code>hocr</code></param> /// <param name="deskew">deskew</param> public static void PerformOCR(string imageFile, string outputFile, string langCode, string pageSegMode, string outputFormat, bool deskew) { DirectoryInfo dir = Directory.GetParent(outputFile); if (dir != null && !dir.Exists) { dir.Create(); } bool postprocess = "text+" == outputFormat; OCR <Image> ocrEngine = new OCRImages(); ocrEngine.PageSegMode = pageSegMode; ocrEngine.Language = langCode; ocrEngine.OutputFormat = outputFormat.Replace("+", string.Empty); ocrEngine.OutputFile = outputFile; ocrEngine.Deskew = deskew; // convert PDF to TIFF if (imageFile.ToLower().EndsWith(".pdf")) { imageFile = PdfUtilities.ConvertPdf2Tiff(imageFile); } ocrEngine.ProcessFile(imageFile); // post-corrections for text+ output if (postprocess) { string filename = outputFile + ".txt"; string result = File.ReadAllText(filename); // postprocess to correct common OCR errors result = Processor.PostProcess(result, langCode); // correct letter cases result = TextUtilities.CorrectLetterCases(result); using (StreamWriter sw = new StreamWriter(filename, false, new System.Text.UTF8Encoding())) { sw.Write(result); } } }
protected void setSegmentedRegions() { if (!this.segmentedRegionsToolStripMenuItem.IsChecked || imageList == null || this.buttonActualSize.IsEnabled) { this.imageCanvas.SegmentedRegions = null; return; } OCR <System.Drawing.Image> ocrEngine = new OCRImages(); Dictionary <System.Windows.Media.SolidColorBrush, List <System.Drawing.Rectangle> > map = this.imageCanvas.SegmentedRegions; if (map == null) { map = new Dictionary <System.Windows.Media.SolidColorBrush, List <System.Drawing.Rectangle> >(); } System.Drawing.Bitmap image = (System.Drawing.Bitmap)imageList[imageIndex]; List <System.Drawing.Rectangle> regions; if (toolStripMenuItemBlock.IsChecked) { if (!map.ContainsKey(System.Windows.Media.Brushes.Gray)) { regions = ocrEngine.GetSegmentedRegions(image, PageIteratorLevel.Block); map.Add(System.Windows.Media.Brushes.Gray, regions); } } else { map.Remove(System.Windows.Media.Brushes.Gray); } if (toolStripMenuItemPara.IsChecked) { if (!map.ContainsKey(System.Windows.Media.Brushes.Green)) { regions = ocrEngine.GetSegmentedRegions(image, PageIteratorLevel.Para); map.Add(System.Windows.Media.Brushes.Green, regions); } } else { map.Remove(System.Windows.Media.Brushes.Green); } if (toolStripMenuItemTextLine.IsChecked) { if (!map.ContainsKey(System.Windows.Media.Brushes.Red)) { regions = ocrEngine.GetSegmentedRegions(image, PageIteratorLevel.TextLine); map.Add(System.Windows.Media.Brushes.Red, regions); } } else { map.Remove(System.Windows.Media.Brushes.Red); } if (toolStripMenuItemWord.IsChecked) { if (!map.ContainsKey(System.Windows.Media.Brushes.Blue)) { regions = ocrEngine.GetSegmentedRegions(image, PageIteratorLevel.Word); map.Add(System.Windows.Media.Brushes.Blue, regions); } } else { map.Remove(System.Windows.Media.Brushes.Blue); } if (toolStripMenuItemSymbol.IsChecked) { if (!map.ContainsKey(System.Windows.Media.Brushes.Magenta)) { regions = ocrEngine.GetSegmentedRegions(image, PageIteratorLevel.Symbol); map.Add(System.Windows.Media.Brushes.Magenta, regions); } } else { map.Remove(System.Windows.Media.Brushes.Magenta); } this.imageCanvas.SegmentedRegions = map; }