Exemple #1
0
        //[System.Diagnostics.DebuggerNonUserCodeAttribute()]
        private void backgroundWorkerOcr_DoWork(object sender, DoWorkEventArgs e)
        {
            // Get the BackgroundWorker that raised this event.
            BackgroundWorker worker = sender as BackgroundWorker;

            OCRImageEntity entity    = (OCRImageEntity)e.Argument;
            OCR <Image>    ocrEngine = new OCRImages();

            ocrEngine.PageSegMode   = selectedPSM;
            ocrEngine.OcrEngineMode = selectedOEM;
            ocrEngine.Language      = entity.Language;

            // Assign the result of the computation to the Result property of the DoWorkEventArgs
            // object. This is will be available to the RunWorkerCompleted eventhandler.
            //e.Result = ocrEngine.RecognizeText(entity.ClonedImages, entity.Lang, entity.Rect, worker, e);
            IList <Image> images = entity.ClonedImages;

            for (int i = 0; i < images.Count; i++)
            {
                if (worker.CancellationPending)
                {
                    e.Cancel = true;
                    break;
                }

                string result = ocrEngine.RecognizeText(((List <Image>)images).GetRange(i, 1), entity.Inputfilename, entity.Rect, worker, e);
                worker.ReportProgress(i, result); // i is not really percentage
            }
        }
Exemple #2
0
        /// <summary>
        /// Performs OCR for bulk/batch and console operations.
        /// </summary>
        /// <param name="imageFile">Image file</param>
        /// <param name="outputFile">Output file without extension</param>
        /// <param name="langCode">language code</param>
        /// <param name="pageSegMode">page segmentation mode</param>
        /// <param name="outputFormat">format of output file. Possible values: <code>text</code>, <code>text+</code> (with post-corrections), <code>hocr</code></param>
        /// <param name="deskew">deskew</param>
        public static void PerformOCR(string imageFile, string outputFile, string langCode, string pageSegMode, string outputFormat, bool deskew)
        {
            DirectoryInfo dir = Directory.GetParent(outputFile);

            if (dir != null && !dir.Exists)
            {
                dir.Create();
            }

            bool postprocess = "text+" == outputFormat;

            OCR <Image> ocrEngine = new OCRImages();

            ocrEngine.PageSegMode  = pageSegMode;
            ocrEngine.Language     = langCode;
            ocrEngine.OutputFormat = outputFormat.Replace("+", string.Empty);
            ocrEngine.OutputFile   = outputFile;
            ocrEngine.Deskew       = deskew;

            // convert PDF to TIFF
            if (imageFile.ToLower().EndsWith(".pdf"))
            {
                imageFile = PdfUtilities.ConvertPdf2Tiff(imageFile);
            }

            ocrEngine.ProcessFile(imageFile);

            // post-corrections for text+ output
            if (postprocess)
            {
                string filename = outputFile + ".txt";
                string result   = File.ReadAllText(filename);
                // postprocess to correct common OCR errors
                result = Processor.PostProcess(result, langCode);
                // correct letter cases
                result = TextUtilities.CorrectLetterCases(result);

                using (StreamWriter sw = new StreamWriter(filename, false, new System.Text.UTF8Encoding()))
                {
                    sw.Write(result);
                }
            }
        }
Exemple #3
0
        protected void setSegmentedRegions()
        {
            if (!this.segmentedRegionsToolStripMenuItem.IsChecked || imageList == null || this.buttonActualSize.IsEnabled)
            {
                this.imageCanvas.SegmentedRegions = null;
                return;
            }

            OCR <System.Drawing.Image> ocrEngine = new OCRImages();
            Dictionary <System.Windows.Media.SolidColorBrush, List <System.Drawing.Rectangle> > map = this.imageCanvas.SegmentedRegions;

            if (map == null)
            {
                map = new Dictionary <System.Windows.Media.SolidColorBrush, List <System.Drawing.Rectangle> >();
            }

            System.Drawing.Bitmap image = (System.Drawing.Bitmap)imageList[imageIndex];

            List <System.Drawing.Rectangle> regions;

            if (toolStripMenuItemBlock.IsChecked)
            {
                if (!map.ContainsKey(System.Windows.Media.Brushes.Gray))
                {
                    regions = ocrEngine.GetSegmentedRegions(image, PageIteratorLevel.Block);
                    map.Add(System.Windows.Media.Brushes.Gray, regions);
                }
            }
            else
            {
                map.Remove(System.Windows.Media.Brushes.Gray);
            }

            if (toolStripMenuItemPara.IsChecked)
            {
                if (!map.ContainsKey(System.Windows.Media.Brushes.Green))
                {
                    regions = ocrEngine.GetSegmentedRegions(image, PageIteratorLevel.Para);
                    map.Add(System.Windows.Media.Brushes.Green, regions);
                }
            }
            else
            {
                map.Remove(System.Windows.Media.Brushes.Green);
            }

            if (toolStripMenuItemTextLine.IsChecked)
            {
                if (!map.ContainsKey(System.Windows.Media.Brushes.Red))
                {
                    regions = ocrEngine.GetSegmentedRegions(image, PageIteratorLevel.TextLine);
                    map.Add(System.Windows.Media.Brushes.Red, regions);
                }
            }
            else
            {
                map.Remove(System.Windows.Media.Brushes.Red);
            }

            if (toolStripMenuItemWord.IsChecked)
            {
                if (!map.ContainsKey(System.Windows.Media.Brushes.Blue))
                {
                    regions = ocrEngine.GetSegmentedRegions(image, PageIteratorLevel.Word);
                    map.Add(System.Windows.Media.Brushes.Blue, regions);
                }
            }
            else
            {
                map.Remove(System.Windows.Media.Brushes.Blue);
            }

            if (toolStripMenuItemSymbol.IsChecked)
            {
                if (!map.ContainsKey(System.Windows.Media.Brushes.Magenta))
                {
                    regions = ocrEngine.GetSegmentedRegions(image, PageIteratorLevel.Symbol);
                    map.Add(System.Windows.Media.Brushes.Magenta, regions);
                }
            }
            else
            {
                map.Remove(System.Windows.Media.Brushes.Magenta);
            }

            this.imageCanvas.SegmentedRegions = map;
        }