/* * * https://stackoverflow.com/questions/38336601/ocr-tesseractengine * / * public Bitmap ScaleByPercent(Bitmap imgPhoto, int Percent) * { * float nPercent = ((float)Percent / 100); * * int sourceWidth = imgPhoto.Width; * int sourceHeight = imgPhoto.Height; * var destWidth = (int)(sourceWidth * nPercent); * var destHeight = (int)(sourceHeight * nPercent); * * var bmPhoto = new Bitmap(destWidth, destHeight, * PixelFormat.Format24bppRgb); * bmPhoto.SetResolution(imgPhoto.HorizontalResolution, * imgPhoto.VerticalResolution); * * Graphics grPhoto = Graphics.FromImage(bmPhoto); * grPhoto.InterpolationMode = InterpolationMode.HighQualityBicubic; * * grPhoto.DrawImage(imgPhoto, * new System.Drawing.Rectangle(0, 0, destWidth, destHeight), * new System.Drawing.Rectangle(0, 0, sourceWidth, sourceHeight), * GraphicsUnit.Pixel); * bmPhoto.Save(@"D:\Scale.png", System.Drawing.Imaging.ImageFormat.Png); * grPhoto.Dispose(); * return bmPhoto; * } * * /* * * https://stackoverflow.com/questions/38336601/ocr-tesseractengine * Strickos9 had shown you a partially great way to solve this issue. * But the point is that if you will have to scan text with the same size, * but also there would be some letters included, you will get a bad result. * Also, even with whitelist related only to digits, * you may expierence some problems while scanning * (for example 5 scanned as 6), because Tesseract really struggles * to scan a low quality characters, * so I would highly recommend you to: * Enlarge the image by 2-4 times. * Do some blur if needed to soften the edges of chars. * Process it with 'threshold' or 'adaptive threshold' algorythms * (to clear the blurred pixels and that blue color in the background). * I've answered a similar question HERE, where a person * was also unsatisfied with results * while scanning a low quality picture. * Combined with what Strickos9 offered to you * (if you are going to scan only digits) should provide you a perfect quality of scanning. * You can do this image processing with software like OpenCV or * Matlab (although I've never tried this). * If you are struggling with this, post in comments your further questions. */ Bitmap GrayBitmapSimply(Bitmap b) { Bitmap bitmap = (Bitmap)b.Clone(); UnCodebase ud = new UnCodebase(bitmap); bitmap = ud.GrayByPixels(); bitmap = ud.ReSetBitMap(); return(bitmap); }
public String OcrBitmap_old_one(Bitmap src) { System.Drawing.Bitmap img = src; UnCodebase ud = new UnCodebase(img); img = ud.GrayByPixels(); ud.ClearNoise(128, 2); string str = System.Environment.CurrentDirectory; Bitmap gryBit = img; String grybitFileNm = str + "\\gybit" + ".tif"; gryBit.Save(grybitFileNm, System.Drawing.Imaging.ImageFormat.Tiff); System.Diagnostics.Process exep = new System.Diagnostics.Process(); System.Diagnostics.ProcessStartInfo startInfo = new System.Diagnostics.ProcessStartInfo(); startInfo.FileName = "tesseract.exe"; startInfo.Arguments = "" + grybitFileNm + " output -l fontyp -psm 7"; //MessageBox.Show(startInfo.Arguments); startInfo.CreateNoWindow = true; startInfo.UseShellExecute = false; exep.StartInfo = startInfo; exep.Start(); exep.WaitForExit(); FileStream fs = File.OpenRead(str + "\\output.txt"); int filelength = 0; fs.Seek(0, SeekOrigin.Begin); filelength = (int)fs.Length; //获得文件长度 Byte[] image = new Byte[filelength]; //建立一个字节数组 char[] charData = new char[filelength]; fs.Read(image, 0, filelength); //按字节流读取 Decoder d = Encoding.Default.GetDecoder(); d.GetChars(image, 0, image.Length, charData, 0); Console.WriteLine(charData); fs.Close(); String str_res = new String(charData); return(str_res); }
public void testOcrTesseract2() { TesseractEngine engine = new TesseractEngine(@"../tessdata", "fontyp", EngineMode.TesseractOnly); Bitmap bit_time = getHHMMSSBit(); Bitmap price_bit_time = this.getGrayPriceBit(); System.Drawing.Bitmap img1 = bit_time; System.Drawing.Bitmap img2 = price_bit_time; UnCodebase ud = new UnCodebase(img1); UnCodebase ud2 = new UnCodebase(img2); img1 = ud.GrayByPixels(); img2 = ud2.GrayByPixels(); // ud.ClearNoise(128, 2); string str = System.Environment.CurrentDirectory; Bitmap gryBit = img1; String grybitFileNm = str + "\\HHMMSSBit" + DateTime.Now.ToLongDateString() + ".tif"; gryBit.Save(grybitFileNm, System.Drawing.Imaging.ImageFormat.Tiff); Bitmap gryBit2 = img2; String grybitFileNm2 = str + "\\price_bit_time" + DateTime.Now.ToLongDateString() + ".tif"; gryBit2.Save(grybitFileNm2, System.Drawing.Imaging.ImageFormat.Tiff); var img = PixConverter.ToPix(bit_time); try { engine.SetVariable("tessedit_char_whitelist", "0123456789"); //using (var img = Pix.LoadFromFile("../90400.jpg")) { using (var page = engine.Process(img)) { var text = page.GetText(); MessageBox.Show(text); } } } catch (Exception ex) { MessageBox.Show(ex.ToString()); } }