예제 #1
0
        private static void AnalyseLayout()
        {
            string imageFile = @"D:\Self-Study\OpenSources\Tesseract\original\phototest.tif";
            //imageFile = @"D:\Self-Study\OpenSources\Tesseract\original\eurotext.tif";

            TesseractProcessor processor = new TesseractProcessor();
            processor.InitForAnalysePage();
            processor.SetPageSegMode(ePageSegMode.PSM_AUTO);

            using (Bitmap bmp = Bitmap.FromFile(imageFile) as Bitmap)
            {
                DocumentLayout doc = processor.AnalyseLayout(bmp);
                Console.WriteLine(doc.ToString());

                using (Image tmp = new Bitmap(bmp.Width, bmp.Height)) // prevents one-byte index format
                {
                    using (Graphics grph = Graphics.FromImage(tmp))
                    {
                        Rectangle rect = new Rectangle(0, 0, tmp.Width, tmp.Height);

                        grph.DrawImage(bmp, rect, rect, GraphicsUnit.Pixel);

                        grph.SmoothingMode = System.Drawing.Drawing2D.SmoothingMode.HighQuality;

                        foreach (Block block in doc.Blocks)
                        {
                            DrawBlock(grph, block);
                        }
                    }

                    tmp.Save(@"D:\temp\page_layout_test2.bmp");
                }
            }
        }
예제 #2
0
        private static void AnalyseLayout()
        {
            string imageFile = @"D:\Self-Study\OpenSources\Tesseract\original\phototest.tif";
            //imageFile = @"D:\Self-Study\OpenSources\Tesseract\original\eurotext.tif";

            TesseractProcessor processor = new TesseractProcessor();

            processor.InitForAnalysePage();
            processor.SetPageSegMode(ePageSegMode.PSM_AUTO);

            using (Bitmap bmp = Bitmap.FromFile(imageFile) as Bitmap)
            {
                DocumentLayout doc = processor.AnalyseLayout(bmp);
                Console.WriteLine(doc.ToString());

                using (Image tmp = new Bitmap(bmp.Width, bmp.Height)) // prevents one-byte index format
                {
                    using (Graphics grph = Graphics.FromImage(tmp))
                    {
                        Rectangle rect = new Rectangle(0, 0, tmp.Width, tmp.Height);

                        grph.DrawImage(bmp, rect, rect, GraphicsUnit.Pixel);

                        grph.SmoothingMode = System.Drawing.Drawing2D.SmoothingMode.HighQuality;

                        foreach (Block block in doc.Blocks)
                        {
                            DrawBlock(grph, block);
                        }
                    }

                    tmp.Save(@"D:\temp\page_layout_test2.bmp");
                }
            }
        }
예제 #3
0
        public override string RecognizeText(IList <Image> images, string lang)
        {
            string             tessdata  = Path.Combine(basedir, TESSDATA);
            TesseractProcessor processor = new TesseractProcessor();

            processor.Init(tessdata, lang, oem);
            processor.SetPageSegMode((ePageSegMode)Enum.Parse(typeof(ePageSegMode), PSM));

            StringBuilder strB = new StringBuilder();

            foreach (Image image in images)
            {
                //if (rect != Rectangle.Empty)
                //{
                //    processor.UseROI = true;
                //    processor.ROI = rect;
                //}
                string text = processor.Recognize(image);

                if (text == null)
                {
                    return(String.Empty);
                }
                strB.Append(text);
            }

            return(strB.ToString().Replace("\n", Environment.NewLine));
        }
예제 #4
0
        private void button1_Click(object sender, EventArgs e)
        {
            string d = textBox_Path.Text;

            if (!Directory.Exists(d))
            {
                MessageBox.Show("目录" + d + "不存在");
                return;
            }
            if (!d.EndsWith("\\"))
            {
                d += "\\";
            }
            string d1 = d + "new\\";

            if (!Directory.Exists(d1))
            {
                Directory.CreateDirectory(d1);
            }
            Regex reg = new Regex(@"\d+\.\d\d", RegexOptions.Compiled);

            foreach (string f in Directory.GetFiles(textBox_Path.Text))
            {
                if (!f.ToLower().EndsWith(".png") && !f.ToLower().EndsWith(".jpg") && !f.ToLower().EndsWith(".bmp"))
                {
                    continue;
                }
                Bitmap             bmp     = new Bitmap(f);
                TesseractProcessor process = new TesseractProcessor();
                process.SetPageSegMode(ePageSegMode.PSM_SINGLE_LINE);
                process.Init(System.Environment.CurrentDirectory + "\\", "chi_sim", (int)eOcrEngineMode.OEM_DEFAULT);
                try
                {
                    string result = process.Recognize(bmp);
                    Match  m      = reg.Match(result);
                    if (m.Success)
                    {
                        string amount = m.ToString();
                        File.Copy(f, d1 + amount + Path.GetExtension(f), true);
                    }
                    else
                    {
                        File.Copy(f, d1 + Path.GetFileName(f), true);
                    }
                }
                catch (Exception)
                {
                    MessageBox.Show("识别文件出错:" + f);
                    File.Copy(f, d1 + Path.GetFileName(f), true);
                }
                bmp.Dispose();
            }
            System.Diagnostics.Process.Start(d1);
        }
예제 #5
0
        public string UseTesseract(string imgFile)
        {
            this.ErrMsg = string.Empty;
            string defLang = "eng";

            string strResult = string.Empty;
            try
            {
                using (TesseractProcessor processor = new TesseractProcessor())
                {
                    processor.Init(this.TESSDATA, defLang, this.oem);
                    processor.SetPageSegMode((ePageSegMode)Enum.Parse(typeof(ePageSegMode), this.pageSegMode));

            #if DEBUG
                    System.Diagnostics.Debug.WriteLine("processor:");
                    System.Diagnostics.Debug.WriteLine(processor.GetTesseractEngineVersion());
            #endif

                    string strIndicate = Path.GetFileNameWithoutExtension(imgFile);

                    strResult = processor.Recognize(imgFile);
                    if (!string.IsNullOrEmpty(strResult))
                    {
                        // correct common errors caused by OCR
                        strResult = this.CorrectOCRErrors(strResult);
                        // correct letter cases
                        strResult = this.CorrectLetterCases(strResult);
                    }
                }

                strResult = strResult.Replace("\n", Environment.NewLine);
            }
            catch (Exception ex)
            {
                this.ErrMsg = ex.ToString();
                strResult = string.Empty;
            }

            return strResult;
        }