public static string OCR(Bitmap img) { TesseractEngine ocr = null; string sResult = ""; try { ocr = new TesseractEngine("./tessdata", "eng"); //初始化 (一定要放在tessdata資料夾下) ocr.SetVariable("tessedit_char_whitelist", "0123456789"); //強迫Char List,較準確 Page page = ocr.Process(img, PageSegMode.SingleLine); sResult = page.GetText();//result page.Dispose(); } catch (Exception ex) { //MessageBox.Show(ex.Message); sResult = ""; } finally { ocr?.Dispose(); } return(sResult.Replace(" ", "")); }
public void Dispose() { if (engine != null) { engine.Dispose(); engine = null; } }
public void Dispose() { if (_engine != null) { _engine.Dispose(); _engine = null; } }
private void button1_Click(object sender, EventArgs e) { double width = Math.Floor(img1.Width / img1.HorizontalResolution); double height = Math.Floor(img1.Height / img1.VerticalResolution); label1.Text = img1.HorizontalResolution + " " + img1.VerticalResolution; img1 = Grayscale.CommonAlgorithms.BT709.Apply(img1); Threshold filter = new Threshold(150); filter.ApplyInPlace(img1); DocumentSkewChecker skewChecker = new DocumentSkewChecker(); double angle = skewChecker.GetSkewAngle(img1); RotateBilinear rotationFilter = new RotateBilinear(-angle); rotationFilter.FillColor = Color.White; img1 = rotationFilter.Apply(img1); pictureBox1.Image = img1; TesseractEngine engine = new TesseractEngine(@"C:\Users\moham\source\repos\pfe3\tessdata\", "fra", EngineMode.Default); if (((width == 3) && (height == 2)) || (width == 2 && height == 3)) { if ((width == 2 && height == 3)) { rotationFilter = new RotateBilinear(-90); rotationFilter.FillColor = Color.White; img1 = rotationFilter.Apply(img1); Console.WriteLine("if1"); } string ocrText = engine.Process(img1, new Rect(700, 450, 150, 100)).GetText(); engine.Dispose(); ocrText = ocrText.Replace(" ", " "); Console.WriteLine(ocrText + " " + ocrText.Length); if (ocrText.Length <= 9) { label1.Text = "carte national " + ocrText; } else { rotationFilter = new RotateBilinear(180); rotationFilter.FillColor = Color.White; img1 = rotationFilter.Apply(img1); engine = new TesseractEngine(@"C:\Users\moham\source\repos\pfe3\tessdata\", "fra", EngineMode.Default); ocrText = engine.Process(img1, new Rect(700, 450, 150, 100)).GetText(); engine.Dispose(); ocrText = ocrText.Replace(" ", ""); } label1.Text = "carte national " + ocrText; pictureBox1.Image = img1; engine.Dispose(); } else if ((width == 8 && height == 11) || (width == 11 && height == 8)) { } else { } }
//получение номера серии через с помощью масштабирования страницы с разным шагом(исп-ть, если не находит на исходном масштабе) //sourceImage = входное изображение, stepScale = шаг масштабирования, amountOfSteps = кол-во проходов рескейлинга public string getSeriaNumberViaRescaling(Image sourceImage, float stepScale, int amountOfSteps) { //подключаем распознаватель TesseractEngine ocrRus = new TesseractEngine("./tessdata", "rus", EngineMode.Default); TesseractEngine ocrEng = new TesseractEngine("./tessdata", "eng", EngineMode.Default); //создаем графический фильтр TResizeTool resizeTool = new TResizeTool(); //ставим методику распознавания страницы PageSegMode pSegMode = PageSegMode.SparseText; string seriaNumber = ""; float scaleValue = 1.0f;//исходный масштаб List <string> seriaList = new List <string>(); for (int k = 1; k <= amountOfSteps; k++) { //по индексу отмасш-й страницы берем исходную(без масштаба) Image img = sourceImage; int newWidth = Convert.ToInt32((img.Width) * ((k * stepScale) + scaleValue)); int newHeight = Convert.ToInt32((img.Height) * ((k * stepScale) + scaleValue)); Bitmap imgBitmap = resizeTool.resizeImage(img, newWidth, newHeight); Image rescaledPage = ((Image)imgBitmap); string rescaledPageTextRus = ""; string rescaledPageTextEng = ""; //удаляем процесс, чтобы не было ошибки using (var pageRus = ocrRus.Process(imgBitmap, pSegMode)) { rescaledPageTextRus = pageRus.GetText(); } //удаляем процесс, чтобы не было ошибки using (var pageEng = ocrEng.Process(imgBitmap, pSegMode)) { rescaledPageTextEng = pageEng.GetText(); } seriaList.Add(getSeriaNumber(rescaledPageTextRus, rescaledPageTextEng)); } seriaNumber = highestMatch(seriaList); ocrRus.Dispose(); ocrEng.Dispose(); return(seriaNumber); }
protected virtual void Dispose(bool disposing) { if (disposing) { //remove managed resources } if (_tesseractEngine != null) { _tesseractEngine.Dispose(); } }
protected virtual void Dispose(bool disposing) { if (!disposed) { if (disposing) { _engine.Dispose(); } disposed = true; } }
protected override void OnFormClosing(FormClosingEventArgs e) { base.OnFormClosing(e); foreach (Observer observer in Observers) { observer.Dispose(); } ScreenSaverPreventer.Dispose(); SpeechSynthesizer.SpeakAsyncCancelAll(); SpeechSynthesizer.Dispose(); TesseractEngine.Dispose(); }
protected virtual void Dispose(bool disposing) { // Only dispose once: if (!m_disposed) { // Don't dispose if called from finalizer or if m_ocrEngine is null: if (disposing && m_ocrEngine != null) { m_ocrEngine.Dispose(); m_ocrEngine = null; } m_disposed = true; } }
protected virtual void Dispose(bool disposing) { if (!is_disposed) { if (disposing) { ocr?.Dispose(); ocr = null; streamWriter?.Dispose(); streamWriter = null; } this.is_disposed = true; } }
public void Recognize() { try { string path = @"C:\Users\cf200\source\repos\Scanner\Scanner\bin\Debug\"; TesseractEngine _ocr = new TesseractEngine(path, "chi_sim", EngineMode.Default); Bitmap bit = inImg.Bitmap; //bit = PreprocesImage(bit);//进行图像处理,如果识别率低可试试 Page page = _ocr.Process(bit); recoText = page.GetText();//识别后的内容 page.Dispose(); _ocr.Dispose(); Console.WriteLine(recoText); } catch (Exception ex) { Console.WriteLine(ex.ToString()); } }
public string LerArquivoImagem(string pathImage) { try { string directory = Directory.GetCurrentDirectory(); var engine = new TesseractEngine("./tessdata", "por", EngineMode.Default); var pix = Pix.LoadFromFile(pathImage); var page = engine.Process(pix, PageSegMode.Auto); var meanConfidence = page.GetMeanConfidence(); var text = page.GetText(); var indexTrial = text.IndexOf("notice!)") + 8; string subs = text.Substring(indexTrial + 1); pix.Dispose(); engine.Dispose(); return(subs); } catch (Exception e) { Debug.WriteLine(e.Message); return($"Erro ao tentar ler resultado do documento enviado, erro: {e.Message}"); } }
/// <summary> /// 图片文字识别 /// </summary> /// <param name="imageUrl">图片路径</param> /// <param name="tessdataUrl">语言包路径</param> /// <param name="language">语言包名称</param> /// <param name="variableData">设置识别变量,默认识别数字</param> /// <param name="pageSegMode">PageSegMode</param> /// <returns></returns> public static string Identity(string imageUrl, string tessdataUrl, string language = "eng", string variableData = "0123456789", PageSegMode pageSegMode = PageSegMode.Auto) { Bitmap bitmap = null; TesseractEngine tesseractEngine = null; Page page = null; try { bitmap = new Bitmap(imageUrl); tesseractEngine = new TesseractEngine(tessdataUrl, language, EngineMode.Default); if (!string.IsNullOrEmpty(variableData)) { tesseractEngine.SetVariable("tessedit_char_whitelist", variableData); } page = tesseractEngine.Process(PixConverter.ToPix(bitmap), pageSegMode); return(page.GetText()); } catch { throw; } finally { if (bitmap != null) { bitmap.Dispose(); } if (page != null) { page.Dispose(); } if (tesseractEngine != null) { tesseractEngine.Dispose(); } } }
public void Dispose() { lock (this) { if (engine != null) { //if (cachedPage != null) //{ // try // { // cachedPage.Dispose(); // } // catch (Exception e)//for some reason: Attempted to read or write protected memory. This is often an indication that other memory is corrupt. // { // } // cachedPageBitmap = null; // cachedPage = null; //} engine.Dispose(); engine = null; } } }
public IEnumerable <string> IdentifyData(IEnumerable <Rectangle> coOrdinates) { List <string> data = new List <string>(); try { var img = new Bitmap(pngImage); img.SetResolution(300, 300); foreach (Rectangle rec in coOrdinates) { var ocr = new TesseractEngine(ConfigurationManager.AppSettings.Get("TessarecDirectory"), "eng", EngineMode.Default); var page = ocr.Process(img, new Rect(rec.X, rec.Y, rec.Width, rec.Height)); data.Add(page.GetText().Trim()); ocr.Dispose(); } img.Dispose(); } catch (Exception ex) { throw ex; } return(data); }
public void PerformOcr(List <Tuple <int, int> > textRowLocations) { int DarkPixels; var engine = new TesseractEngine(Program.GetDataPath(@"tessdata"), Program.DBCon.getIniValue <String>(IBE.IBESettingsView.DB_GROUPNAME, "TraineddataFile"), EngineMode.Default); engine.DefaultPageSegMode = PageSegMode.SingleLine; string Stationname_OCR; string StationameAnalysisBase; // delete the old brainerous images - otherwise Brainerous will process older but not relevant images too if (Directory.Exists(Program.GetDataPath(@"Brainerous\images"))) { foreach (string file in Directory.GetFiles(Program.GetDataPath(@"Brainerous\\images"), "*.*")) { File.Delete(file); } } else { Directory.CreateDirectory(Program.GetDataPath(@"\Brainerousimages")); } float level; var text = AnalyseFrameUsingTesseract(_bTrimmedHeader, engine, out level); Stationname_OCR = StripPunctuationFromScannedText(text);// (text + " {" + page.GetMeanConfidence() + "}\r\n"); string[] StationsInSystem = Program.Data.getStations(SystemAtTimeOfScreenshot); if (Program.actualCondition.Station.Equals("", StringComparison.InvariantCultureIgnoreCase)) { StationameAnalysisBase = Stationname_OCR; } else { StationameAnalysisBase = Program.actualCondition.Station; } string headerResult_temp = StationsInSystem.FirstOrDefault(x => x.Equals(StationameAnalysisBase, StringComparison.InvariantCultureIgnoreCase)); if (headerResult_temp == null) { // station not found in database var matchesInStationReferenceList = StationsInSystem.OrderBy(x => _levenshtein.LD2(Stationname_OCR, x)).ToList(); if (matchesInStationReferenceList.Count > 0) { var ld = _levenshtein.LD2(Stationname_OCR, matchesInStationReferenceList[0].ToUpper()); // this depends on the length of the word - this factor works really good double LevenshteinLimit = Math.Round((matchesInStationReferenceList[0].Length * 1.0), 0); if (ld <= LevenshteinLimit) { Stationname_OCR = matchesInStationReferenceList[0]; } } } else { Stationname_OCR = headerResult_temp; } // show station on GUI _callingForm.cOcrCaptureAndCorrect.DisplayResults(Stationname_OCR); var commodityColumnText = new string[textRowLocations.Count(), 8]; var originalBitmaps = new Bitmap[textRowLocations.Count(), 8]; var originalBitmapConfidences = new float[textRowLocations.Count(), 8]; var rowIds = new string[textRowLocations.Count()]; var rowCtr = 0; var bitmapCtr = 0; foreach (var row in textRowLocations) { int startRow = row.Item1 - 3; int heightRow = row.Item2 - row.Item1 + 6; if (startRow < 0) { startRow = 0; } if (heightRow + startRow > _bTrimmed_4_OCR.Height) { heightRow = _bTrimmed_4_OCR.Height - startRow; } // We'll use this later to identify the right correction image rowIds[rowCtr] = Guid.NewGuid().ToString(); using (Bitmap b = RNGraphics.Crop(_bTrimmed_4_OCR, new Rectangle(0, startRow, _bTrimmed_4_OCR.Width, heightRow))) { b.Save(Program.GetDataPath(@"OCR Correction Images\" + rowIds[rowCtr] + ".png")); } int columnCounter = 0; while (columnCounter < 8) { int left, width; switch (columnCounter) { case 0: // commodity left = 0; width = _calibrationPoints[3].X - _calibrationPoints[2].X; break; case 1: // sell left = _calibrationPoints[3].X - _calibrationPoints[2].X; width = _calibrationPoints[4].X - _calibrationPoints[3].X; break; case 2: //buy left = _calibrationPoints[4].X - _calibrationPoints[2].X; width = _calibrationPoints[5].X - _calibrationPoints[4].X; break; case 3: // freight left = _calibrationPoints[5].X - _calibrationPoints[2].X; width = _calibrationPoints[6].X - _calibrationPoints[5].X; break; case 4: // demand left = _calibrationPoints[6].X - _calibrationPoints[2].X; width = _calibrationPoints[7].X - _calibrationPoints[6].X; break; case 5: // demand level left = _calibrationPoints[7].X - _calibrationPoints[2].X; width = _calibrationPoints[8].X - _calibrationPoints[7].X; break; case 6: // supply left = _calibrationPoints[8].X - _calibrationPoints[2].X; width = _calibrationPoints[9].X - _calibrationPoints[8].X; break; case 7: // supply level left = _calibrationPoints[9].X - _calibrationPoints[2].X; width = _calibrationPoints[10].X - _calibrationPoints[9].X; break; default: left = 0; width = _calibrationPoints[3].X - _calibrationPoints[2].X; break; } var fudgeFactor = 0;// _bOriginal.Height * 6 / 1440; left = left + fudgeFactor; width = width - fudgeFactor; DarkPixels = 0; if (Program.DBCon.getIniValue <Boolean>(IBESettingsView.DB_GROUPNAME, "CheckNextScreenshotForOne", false.ToString(), false, true)) { if (PixelTest == null) { PixelTest = new EBPixeltest(); } if (columnCounter == 3) { var brainerousOut = RNGraphics.Crop(_bTrimmed_4_OCR, new Rectangle(left, startRow, width, heightRow)); // check how much dark pixels are on the bitmap for (int i = 0; i < brainerousOut.Height; i++) { for (int j = 0; j < brainerousOut.Width; j++) { if (brainerousOut.GetPixel(j, i).GetBrightness() < Program.DBCon.getIniValue <Int32>(IBE.IBESettingsView.DB_GROUPNAME, "EBPixelThreshold")) { DarkPixels++; } } } PixelTest.addPicture(brainerousOut, DarkPixels); } } else { // RNGraphics.Crop a little bit more form the left border because sometimes if theres // the line of the table it was recognized as "1" or "7" left += 10; width -= 10; if (columnCounter != 0 && columnCounter != 5 && columnCounter != 7) { //If it's a numeric column write it out for Brainerous to process later var brainerousOut = RNGraphics.Crop(_bTrimmed_4_OCR, new Rectangle(left, startRow, width, heightRow)); if (Program.DBCon.getIniValue <Int32>(IBE.IBESettingsView.DB_GROUPNAME, "EBPixelAmount") > 0) { // check how much dark pixels are on the bitmap -> we process only bitmaps // with something on it (minimum one digit supposed, a "1" hat about 25 pixels in default 1920x1200) for (int i = 0; i < brainerousOut.Height; i++) { for (int j = 0; j < brainerousOut.Width; j++) { if (brainerousOut.GetPixel(j, i).GetBrightness() < Program.DBCon.getIniValue <Int32>(IBE.IBESettingsView.DB_GROUPNAME, "EBPixelThreshold")) { DarkPixels++; } } } } if (DarkPixels >= Program.DBCon.getIniValue <Int32>(IBE.IBESettingsView.DB_GROUPNAME, "EBPixelAmount")) { brainerousOut.Save(Program.GetDataPath(@"Brainerous\images\" + bitmapCtr + ".png")); } bitmapCtr++; } else { // It's a text column, we'll use Tesseract // Prepare some different versions of the bitmap, we will take the best result var c = new Bitmap[7]; c[0] = (RNGraphics.Crop(_bTrimmed_4_OCR, new Rectangle(left, startRow, width, heightRow))); c[1] = (RNGraphics.Crop(_bTrimmed_4_OCR, new Rectangle(left + 1, startRow, width, heightRow))); c[2] = (RNGraphics.Crop(_bTrimmed_4_OCR, new Rectangle(left - 1, startRow, width, heightRow))); c[3] = (RNGraphics.Crop(_bTrimmed_4_OCR, new Rectangle(left, startRow - 1, width, heightRow))); c[4] = (RNGraphics.Crop(_bTrimmed_4_OCR, new Rectangle(left + 1, startRow - 1, width, heightRow))); c[5] = (RNGraphics.Crop(_bTrimmed_4_OCR, new Rectangle(left - 1, startRow - 1, width, heightRow))); c[6] = (RNGraphics.Crop(_bTrimmed_4_OCR, new Rectangle(left, startRow + 2, width, heightRow - 2))); var t = new string[c.Length]; var cf = new float[c.Length]; for (int i = 0; i < c.Length; i++) { t[i] = AnalyseFrameUsingTesseract((Bitmap)(c[i].Clone()), engine, out cf[i]); } int result = 0; float confidence = cf[0]; for (int i = 1; i < c.Length; i++) { if (confidence < cf[i]) { result = i; confidence = cf[i]; } } originalBitmaps[rowCtr, columnCounter] = (Bitmap)(c[result].Clone()); switch (columnCounter) { //bodges for number columns case 1: case 2: case 3: t[result] = t[result].Replace(" ", "").Replace("O", "0").Replace("I", "1").Replace("'", ""); t[result] = System.Text.RegularExpressions.Regex.Replace(t[result], @"[a-zA-Z\s]+", string.Empty); // remove any alphas that remain break; case 5: case 7: t[result] = t[result].Replace(" ", "").Replace("-", ""); if (t[result] == "HIGH" || t[result] == "MED" || t[result] == "LOW") { cf[result] = 1; } break; } if ((columnCounter == 5 && t[result].Contains("ENTER")) || (columnCounter == 6 && (t[result].Contains("NGAR") || t[result].Contains("SURFACE")))) { t[result] = ""; cf[result] = 1; } commodityColumnText[rowCtr, columnCounter] += t[result]; originalBitmapConfidences[rowCtr, columnCounter] = cf[result]; } } columnCounter++; } rowCtr++; } if (Program.DBCon.getIniValue <Boolean>(IBESettingsView.DB_GROUPNAME, "CheckNextScreenshotForOne", false.ToString(), false, true)) { PixelTest.StartModal(_callingForm); } else { if (textRowLocations.Count > 0) { // Call out to Brainerous to process the numeric bitmaps we saved earlier var outputFromBrainerous = ""; var pr = new Process(); pr.StartInfo.UseShellExecute = false; pr.StartInfo.CreateNoWindow = true; pr.StartInfo.RedirectStandardOutput = true; pr.StartInfo.FileName = Program.GetDataPath(@"Brainerous\nn_training.exe"); pr.StartInfo.WorkingDirectory = Program.GetDataPath("Brainerous"); pr.Start(); outputFromBrainerous = pr.StandardOutput.ReadToEnd(); while (outputFromBrainerous.Contains("Failed to pad successfully")) { var o2 = outputFromBrainerous.IndexOf("Failed to "); var o3 = outputFromBrainerous.Substring(0, o2); var o4 = outputFromBrainerous.Substring(o2).IndexOf(Program.GetDataPath("images"), StringComparison.InvariantCultureIgnoreCase); // I had a string with "Failed to pad successfully" and only some trash behind but no "./images" // so "o4" was "-1" and this results in strange behaviour if (o4 > 0) { var o5 = outputFromBrainerous.Substring(o2 + o4); outputFromBrainerous = o3 + "\r\n" + o5; } else { outputFromBrainerous = o3; } } pr.WaitForExit(); List <string> splitOutput = ((string[])outputFromBrainerous.Replace("\r", "").Split('\n')).ToList(); for (var i = 0; i < (textRowLocations.Count * 10); i += 2) { string Filename = (i / 2).ToString() + ".png"; if ((splitOutput.Count <= i) || (splitOutput[i].Length < 14) || (splitOutput[i].Substring(9) != Filename)) { splitOutput.Insert(i, Program.GetDataPath(@"images\" + Filename)); splitOutput.Insert(i + 1, ""); } } // Load the result from Brainerous into the OCR output for (var i = 0; i < textRowLocations.Count; i++) { commodityColumnText[i, 1] = splitOutput[i * 10 + 1]; originalBitmaps[i, 1] = null; originalBitmapConfidences[i, 1] = 1; commodityColumnText[i, 2] = splitOutput[i * 10 + 3]; originalBitmaps[i, 2] = null; originalBitmapConfidences[i, 2] = 1; commodityColumnText[i, 3] = splitOutput[i * 10 + 5]; originalBitmaps[i, 3] = null; originalBitmapConfidences[i, 3] = 1; commodityColumnText[i, 4] = splitOutput[i * 10 + 7]; originalBitmaps[i, 4] = null; originalBitmapConfidences[i, 4] = 1; commodityColumnText[i, 6] = splitOutput[i * 10 + 9]; originalBitmaps[i, 6] = null; originalBitmapConfidences[i, 6] = 1; } } } _bOriginal.Dispose(); _bOriginalClone.Dispose(); engine.Dispose(); if (Program.DBCon.getIniValue <Boolean>(IBESettingsView.DB_GROUPNAME, "CheckNextScreenshotForOne", false.ToString(), false, true)) { Program.DBCon.setIniValue(IBESettingsView.DB_GROUPNAME, "CheckNextScreenshotForOne", false.ToString()); Form1.InstanceObject.cOcrCaptureAndCorrect.clearOcrOutput(); } else { // Send the results for this screenshot back to the Form _callingForm.cOcrCaptureAndCorrect.DisplayCommodityResults(commodityColumnText, originalBitmaps, originalBitmapConfidences, rowIds, CurrentScreenshot); } // ...and if we've got any buffered screenshots waiting to be processed, process the next one if (ScreenshotBuffer.Count > 0) { var screenshot = ScreenshotBuffer[0]; ScreenshotBuffer.Remove(screenshot); ProcessNewScreenshot(screenshot); } Working = false; Debug.WriteLine("set to " + Working); }
protected override void Cleanup() { engine.Dispose(); MeanConfidences = null; }
//получение всех данные по странице с помощью масштабирования страницы с разным шагом(исп-ть, если не находит на исходном масштабе) //sourceImage = входное изображение, stepScale = шаг масштабирования, amountOfSteps = кол-во проходов рескейлинга, //infoPage = ссылка на структуру для заполнения public void getData(Image sourceImage, ref Tesseract_OCR_Window.pdfPageInfo infoPage, ref Dictionary <string, string> replaceWords, float stepScale, int amountOfSteps) { //подключаем распознаватель TesseractEngine ocrRus = new TesseractEngine("./tessdata", "rus", EngineMode.Default); TesseractEngine ocrEng = new TesseractEngine("./tessdata", "eng", EngineMode.Default); //создаем графический фильтр TResizeTool resizeTool = new TResizeTool(); //ставим методику распознавания страницы PageSegMode pSegMode = PageSegMode.SparseText; string seriaNumber = ""; //серия string country = ""; //страна string orgSert = ""; //организация string shortNumber = ""; //short number string typeSeria = ""; //тип серии float scaleValue = 1.0f; //исходный масштаб for (int k = 0; k <= amountOfSteps; k++) { //по индексу отмасш-й страницы берем исходную(без масштаба) Image img = sourceImage; int newWidth = Convert.ToInt32((img.Width) * ((k * stepScale) + scaleValue)); int newHeight = Convert.ToInt32((img.Height) * ((k * stepScale) + scaleValue)); Bitmap imgBitmap = resizeTool.resizeImage(img, newWidth, newHeight); Image rescaledPage = ((Image)imgBitmap); string rescaledPageTextRus = ""; string rescaledPageTextEng = ""; //удаляем процесс, чтобы не было ошибки using (var pageRus = ocrRus.Process(imgBitmap, pSegMode)) { rescaledPageTextRus = pageRus.GetText(); } //удаляем процесс, чтобы не было ошибки using (var pageEng = ocrEng.Process(imgBitmap, pSegMode)) { rescaledPageTextEng = pageEng.GetText(); } //собираем данные для структуры infoPage if (infoPage.seriaNumber_ == "") { seriaNumber = getSeriaNumber(rescaledPageTextRus, rescaledPageTextEng);//серия if (seriaNumber != "") { infoPage.seriaNumber_ = seriaNumber; } } if (infoPage.country_ == "") { country = getCountry(rescaledPageTextEng); if (country != "") { infoPage.country_ = country; } } if (infoPage.orgSert_ == "") { orgSert = getOrgSert(rescaledPageTextRus); if (orgSert != "") { infoPage.orgSert_ = orgSert; } } if (infoPage.shortNumber_ == "") { shortNumber = getShortNumber(rescaledPageTextRus); if (shortNumber != "") { infoPage.shortNumber_ = shortNumber; } } if (infoPage.typeSeria_ == "") { typeSeria = getTypeSeria(rescaledPageTextRus, rescaledPageTextEng, ref replaceWords); if (typeSeria != "") { infoPage.typeSeria_ = typeSeria; } } } ocrRus.Dispose(); ocrEng.Dispose(); }
public void getDataViaRescaling(Image sourceImage, ref Tesseract_OCR_Window.pdfPageInfo infoPage, ref Dictionary <string, string> replaceWords, float stepScale, int amountOfSteps) { //подключаем распознаватель TesseractEngine ocrRus = new TesseractEngine("./tessdata", "rus", EngineMode.Default); TesseractEngine ocrEng = new TesseractEngine("./tessdata", "eng", EngineMode.Default); //создаем графический фильтр TResizeTool resizeTool = new TResizeTool(); //ставим методику распознавания страницы PageSegMode pSegMode = PageSegMode.SparseText; float scaleValue = 1.0f;//исходный масштаб List <string> seriaNumberList = new List <string>(); List <string> countryList = new List <string>(); List <string> orgSertList = new List <string>(); List <string> shortNumberList = new List <string>(); List <string> typeSeriaList = new List <string>(); for (int k = 0; k <= amountOfSteps; k++) { //по индексу отмасш-й страницы берем исходную(без масштаба) Image img = sourceImage; int newWidth = Convert.ToInt32((img.Width) * ((k * stepScale) + scaleValue)); int newHeight = Convert.ToInt32((img.Height) * ((k * stepScale) + scaleValue)); Bitmap imgBitmap = resizeTool.resizeImage(img, newWidth, newHeight); Image rescaledPage = ((Image)imgBitmap); string rescaledPageTextRus = ""; string rescaledPageTextEng = ""; //удаляем процесс, чтобы не было ошибки using (var pageRus = ocrRus.Process(imgBitmap, pSegMode)) { rescaledPageTextRus = pageRus.GetText(); } //удаляем процесс, чтобы не было ошибки using (var pageEng = ocrEng.Process(imgBitmap, pSegMode)) { rescaledPageTextEng = pageEng.GetText(); } //набираем статистику для последующего поиска наибольшего соответствия string seriaNumber = getSeriaNumber(rescaledPageTextRus, rescaledPageTextEng); string country = getCountry(rescaledPageTextEng); string orgSert = getOrgSert(rescaledPageTextRus); string shortNumber = getShortNumber(rescaledPageTextRus); string typeSeria = getTypeSeria(rescaledPageTextRus, rescaledPageTextEng, ref replaceWords); if (seriaNumber != "") { seriaNumberList.Add(seriaNumber);//серия } if (country != "") { countryList.Add(country);//страна } if (orgSert != "") { orgSertList.Add(orgSert);//орган по сертификации } if (shortNumber != "") { shortNumberList.Add(shortNumber);//shortNumber } if (typeSeria != "") { typeSeriaList.Add(typeSeria);//тип серии } } ocrRus.Dispose(); ocrEng.Dispose(); //собираем данные для структуры infoPage infoPage.seriaNumber_ = highestMatch(seriaNumberList); infoPage.country_ = highestMatch(countryList); infoPage.orgSert_ = highestMatch(orgSertList); infoPage.shortNumber_ = highestMatch(shortNumberList); infoPage.typeSeria_ = highestMatch(typeSeriaList); }
public static void RecognizeBill(object obj) { object[] objs = (object[])obj; MainForm mform = (MainForm)objs[0]; ori_img = (Bitmap)Bitmap.FromFile((string)objs[1]); RecognizeForm rform = new RecognizeForm(); ShowRecognizeForm(mform, rform); mform.UpdateText("Clear Edges"); EdgeFilter(); rform.UpdateImage(show_img); mform.UpdateLog("Clear Edges"); mform.UpdateText("Execute Sobel Filter"); SobelFilter(); rform.UpdateImage(show_img); mform.UpdateLog("Execute Sobel Filter"); mform.UpdateText("Find target Rectangles"); FindRectangle(10, 20); rform.UpdateImage(show_img); mform.UpdateLog("Find target Rectangles"); mform.UpdateText("Cut image"); CutImage(); mform.UpdateLog("Cut image"); mform.UpdateText("Recognize address"); TesseractEngine ocr = new TesseractEngine(@"C:\Users\Allen Chou\Documents\Visual Studio 2013\Projects\Finder\packages\Tesseract.3.0.2.0\tessdata", "chi_tra+eng", EngineMode.Default); Pix img = PixConverter.ToPix(targets[1]); Page addpage = ocr.Process(img); string address = addpage.GetText().Trim().Replace(" ", String.Empty); ocr.Dispose(); mform.UpdateLog("Recognize address"); mform.UpdateText("Recognize eid, date, price"); Pix idpimg = PixConverter.ToPix(targets[2]); TesseractEngine ocre = new TesseractEngine(@"C:\Users\Allen Chou\Documents\Visual Studio 2013\Projects\Finder\packages\Tesseract.3.0.2.0\tessdata", "eng", EngineMode.Default); Page idppage = ocre.Process(idpimg); string[] idpdata = idppage.GetText().Trim().Split(' '); int tar = 0; string eid = ""; for (int i = 0; i < idpdata.Length; i++) { Regex rex = new Regex("\\d{2}-\\d{2}-\\d{4}-\\d{2}-\\d{1}"); if (rex.IsMatch(idpdata[i])) { tar = i; Match match = rex.Match(idpdata[i]); eid = match.Value; break; } } ocre.Dispose(); string date = idpdata[tar + 1]; string price = idpdata[tar + 2].Replace("*", String.Empty); mform.UpdateLog("Recognize eid, date, price\n"); mform.UpdateText("Recognize kWh"); Pix kwhimg = PixConverter.ToPix(targets[0]); ocre = new TesseractEngine(@"C:\Users\Allen Chou\Documents\Visual Studio 2013\Projects\Finder\packages\Tesseract.3.0.2.0\tessdata", "eng", EngineMode.Default); Page kwhpage = ocre.Process(kwhimg); string[] kwhdata = kwhpage.GetText().Trim().Split(' '); string kwh = ""; for (int i = 0; i < kwhdata.Length; i++) { Regex rex = new Regex("\\*\\d{1,}"); if (rex.IsMatch(kwhdata[i])) { Match match = rex.Match(kwhdata[i]); kwh = match.Value.Replace("*", String.Empty); break; } } ocre.Dispose(); mform.UpdateLog("Recognize kWh"); mform.UpdateLog("Result [ " + eid + " , " + date + " , " + price + " , " + kwh + " , " + address + " ]"); mform.UpdateText("Finished"); mform.UpdateLog("Finished"); }
public void Dispose() { _engine.Dispose(); }
protected override void DisposeObject() { _ocr.Dispose(); }
public void pic_orc() { if (!Directory.Exists(path + "/new/")) { Directory.CreateDirectory(path + "/new/"); } int n = files.Count(); System.Diagnostics.Stopwatch sw1 = new System.Diagnostics.Stopwatch(); sw1.Start(); int tempn = 0;//记录n for (int i = 0; i < n; i++) { System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch(); string ans = files[i].Replace(path, ""); sw.Start(); //识别图片文字 var img = new Bitmap(files[i]); //显示 if (this.pictureBox2.InvokeRequired)//不同线程为true,所以这里是true { BeginInvoke(new Action(() => { if (this.pictureBox2.Image != null) { this.pictureBox2.Image.Dispose(); } this.pictureBox2.Height = (int)this.pictureBox2.Width * cH / cW; this.pictureBox2.Image = (Cut(img, cX, cY, cW, cH)); })); } if (this.pictureBox1.InvokeRequired)//不同线程为true,所以这里是true { BeginInvoke(new Action(() => { if (this.pictureBox1.Image != null) { this.pictureBox1.Image.Dispose(); } this.pictureBox1.Load(files[i]); this.label6.Text = (currentnum + 1).ToString(); })); } var ocr = new TesseractEngine("./tessdata", "chi_sim", EngineMode.TesseractOnly); ocr.SetVariable("tessedit_char_whitelist", "0123456789"); //设置识别变量,当前只能识别数字。 var page = ocr.Process(BlodBitmap(Cut(img, cX, cY, cW, cH))); //100, 584, 3687, 713 //var page = ocr.Process(Cut(img, 1875, 579, 1917, 693)); string ocr_text = page.GetText().Replace(" ", "").Replace("\n", ""); if (this.textBox1.InvokeRequired)//不同线程为true,所以这里是true { BeginInvoke(new Action(() => { this.textBox1.Text = ocr_text; })); } page.Dispose(); img.Dispose(); ocr.Dispose(); int l = ocr_text.Length; int acc = 0; if (l >= 10) { int start = ocr_text.IndexOf(codestart.ToString()); if (start < 0)//长度够,没有关键字 { acc = 50; ans += " >" + acc.ToString() + "%,编号:" + code1 + "_" + num.ToString(); //list.Add(filename + "-" + code); code = code1 + "_" + num.ToString(); num++; } else { code = ocr_text.Substring(start, 10); //更新 code1 = code; if (code.Length == 10) { acc = 100;//长度够,有关键字 ans += " >" + acc.ToString() + "% 编号:" + code; } else { acc = 90;//裁剪后长度不够,有关键字!!! ans += " >" + acc.ToString() + "% 编号:" + code; } num = 0; num++; } } else { int start = ocr_text.IndexOf(codestart.ToString()); if (start < 0)//长度不够,也没有找到关键字 { acc = 0; ans += " >" + acc.ToString() + "%,编号:" + code1 + "_" + num.ToString(); //list.Add(filename + "-" + code); code = code1 + "_" + num.ToString(); num++; } else//长度不够,找到关键字 { acc = 70; ans += " >" + acc.ToString() + "%,编号:" + code1 + "_" + num.ToString(); //list.Add(filename + "-" + code); code = code1 + "_" + num.ToString(); num++; } } sw.Stop(); TimeSpan ts2 = sw.Elapsed; string dettime = (ts2.TotalMilliseconds / 1000.0).ToString("0.00") + "秒"; ans += " 耗时:" + dettime + "\r\n"; ocr_text = null; if (this.textBox2.InvokeRequired)//不同线程为true,所以这里是true { BeginInvoke(new Action(() => { this.textBox2.Text += ans; this.textBox2.SelectionStart = this.textBox2.TextLength; this.textBox2.ScrollToCaret(); })); } if (this.label4.InvokeRequired)//不同线程为true,所以这里是true { BeginInvoke(new Action(() => { this.label4.Text = (i + 1).ToString() + "/" + n.ToString(); })); } if (this.progressBar1.InvokeRequired)//不同线程为true,所以这里是true { BeginInvoke(new Action(() => { this.progressBar1.Value += this.progressBar1.Step; })); } dt.Rows.Add(files[i], code, dettime.ToString(), acc); Thread thread1 = new Thread(() => updatelistview(i, code, dettime.ToString(), acc.ToString())); thread1.Start(); if (this.label1.InvokeRequired)//不同线程为true,所以这里是true { BeginInvoke(new Action(() => { this.label1.Text = code; })); } /* * if (this.label6.InvokeRequired)//不同线程为true,所以这里是true * { * BeginInvoke(new Action(() => { * this.label6.Text = (currentnum + 1).ToString(); * })); * } */ try { if (dt.Rows[i]["acc"].ToString() == "100" || dt.Rows[i]["acc"].ToString() == "0") { System.IO.File.Copy(dt.Rows[i]["url"].ToString(), path + "/new/" + dt.Rows[i]["code"].ToString() + ".jpg", true); File.AppendAllText(path + "/new/log.txt", "\r\n" + "复制" + dt.Rows[i]["url"].ToString().Replace(path, "") + " 到 " + dt.Rows[i]["code"].ToString() + ".jpg <" + dt.Rows[i]["acc"].ToString() + ">"); } else { File.AppendAllText(path + "/new/log.txt", "\r\n" + "复制" + dt.Rows[i]["url"].ToString().Replace(path, "") + " 到 " + dt.Rows[i]["code"].ToString() + ".jpg失败 <" + dt.Rows[i]["acc"].ToString() + ">!!!"); needstop = true; } } catch (Exception exp) { File.AppendAllText(path + "/new/error.txt", "\r\n" + "复制" + dt.Rows[i]["url"].ToString().Replace(path, "") + " 到 " + dt.Rows[i]["code"].ToString() + ".jpg失败," + exp.ToString()); needstop = true; } tempn = i + 1; //判断是否结束 if (needstop) { MessageBox.Show("识别出错,将结束剩下的操作!"); needstop = false; break; } currentnum++; } if (this.button5.InvokeRequired)//不同线程为true,所以这里是true { BeginInvoke(new Action(() => { this.button5.Enabled = true; })); } if (this.button7.InvokeRequired)//不同线程为true,所以这里是true { BeginInvoke(new Action(() => { this.button7.Enabled = true; })); } if (this.button8.InvokeRequired)//不同线程为true,所以这里是true { BeginInvoke(new Action(() => { this.button8.Enabled = true; })); } if (this.button6.InvokeRequired)//不同线程为true,所以这里是true { BeginInvoke(new Action(() => { this.button6.Enabled = true; })); } if (this.button9.InvokeRequired)//不同线程为true,所以这里是true { BeginInvoke(new Action(() => { this.button9.Enabled = true; })); } //MessageBox.Show("识别完毕"); sw1.Stop(); TimeSpan ts3 = sw1.Elapsed; if (this.textBox2.InvokeRequired)//不同线程为true,所以这里是true { BeginInvoke(new Action(() => { this.textBox2.Text += "共耗时:" + (ts3.TotalMilliseconds / 1000.0).ToString("0.00") + "秒";; this.textBox2.SelectionStart = this.textBox2.TextLength; this.textBox2.ScrollToCaret(); })); } File.AppendAllText(path + "/new/log.txt", "\r\n" + "共耗时:" + (ts3.TotalMilliseconds / 1000.0).ToString("0.00") + "秒"); MessageBox.Show("复制完毕"); needstop = false; //回复listview }
public void orconce(string filename) { //识别图片文字 var img = new Bitmap(filename); var ocr = new TesseractEngine("./tessdata", "chi_sim", EngineMode.Default); ocr.SetVariable("tessedit_char_whitelist", "0123456789"); //设置识别变量,当前只能识别数字。 var page = ocr.Process(BlodBitmap(Cut(img, cX, cY, cW, cH))); string ocrtext = page.GetText().Replace(" ", "").Replace("\n", ""); //using (var page = ocr.Process(Cut(img, 1869, 561, 1865, 661))) { if (this.textBox1.InvokeRequired)//不同线程为true,所以这里是true { BeginInvoke(new Action(() => { this.textBox1.Text = ocrtext; })); } else { this.textBox1.Text = ocrtext; } page.Dispose(); ocr.Dispose(); img.Dispose(); } int l = this.textBox1.Text.Length; if (l >= 10) { int start = this.textBox1.Text.IndexOf(codestart.ToString()); if (start < 0) { if (this.label1.InvokeRequired)//不同线程为true,所以这里是true { BeginInvoke(new Action(() => { this.label1.Text = "无,50%,长度够,但没有找到关键字"; })); } else { this.label1.Text = "无,50%,长度够,但没有找到关键字"; } } else { if (this.label1.InvokeRequired)//不同线程为true,所以这里是true { BeginInvoke(new Action(() => { this.label1.Text = "订单编号:" + this.textBox1.Text.Substring(start, 10); })); } else { this.label1.Text = "订单编号:" + this.textBox1.Text.Substring(start, 10); } } } else { int start = this.textBox1.Text.IndexOf(codestart.ToString()); if (start < 0) { if (this.label1.InvokeRequired)//不同线程为true,所以这里是true { BeginInvoke(new Action(() => { this.label1.Text = "无 0%,长度不够,也没有找到关键字"; })); } else { this.label1.Text = "无 0%,长度不够,也没有找到关键字"; } } else { if (this.label1.InvokeRequired)//不同线程为true,所以这里是true { BeginInvoke(new Action(() => { this.label1.Text = "无 70%,长度不够,但有关键字!!!!"; })); } else { this.label1.Text = "无 70%,长度不够,但有关键字!!!!"; } } } /* * //查找订单编号 * int start = this.textBox1.Text.IndexOf("订单编号"); * if (start < 0) * { * this.label1.Text = "无"; * } * else * { * this.label1.Text = "订单编号:" + this.textBox1.Text.Substring(start + 5, 10).Replace("〇", "0"); * } */ }
public void Dispose() { tesseractEngine.Dispose(); }
public void Dispose() { _tessEngine?.Dispose(); }
static void Main(string[] args) { Queue <int> answers = new Queue <int>(); Queue <int> question = new Queue <int>(); PicConverter converter = new PicConverter(); Clicker clicker = new Clicker(); Stopwatch sw = new Stopwatch(); var ocr1 = new TesseractEngine(@"./tessdata", "eng", EngineMode.TesseractAndCube); var ocr2 = new TesseractEngine(@"./tessdata", "eng", EngineMode.TesseractAndCube); int correctAnswer = 0; Console.WriteLine("Введите что-нибудь для старта"); Console.ReadLine(); sw.Start(); try { //берём картинку и делаем чёрно-белой // метод CutImgFromScreen для скрина экрана Bitmap image = new Bitmap(@"C:\Users\gvozd\source\repos\Ocr1\Ocr1\NewFolder1\image.png"); Bitmap imageBW = converter.BitmapToBlackWhite2(image, 0.9); //Bitmap image = new Bitmap(b1, new Size(450, 150)); //если нужно ресайзим imageBW.Save(@"D:\image.png"); //вырезаем вопрос и ответы Bitmap questionImg = converter.CutImgFromImg(20, 8, 120, 38, imageBW, "question.png"); Bitmap stackAns = converter.StackImg(imageBW); var task1 = Task.Factory.StartNew(() => { Console.WriteLine("таск 1 начался"); var text1 = ocr1.Process(questionImg); Console.WriteLine(text1.GetText()); PageToNumbers(text1, ref question); for (int i = 0; i < 2; i++) { correctAnswer += question.Dequeue(); } ocr1.Dispose(); Console.WriteLine("таск 1 закончился"); }); var task2 = Task.Factory.StartNew(() => { Console.WriteLine("таск 2 начался"); var text2 = ocr2.Process(stackAns); Console.WriteLine(text2.GetText()); PageToNumbers(text2, ref answers); ocr2.Dispose(); Console.WriteLine("таск 2 закончился"); }); Task.WaitAll(task1, task2); sw.Stop(); Console.WriteLine((sw.ElapsedMilliseconds / 1000.0).ToString() + " секунд"); clicker.DoCorrectClick(answers, correctAnswer); } catch (Exception exception) { Console.WriteLine(exception.Message); } Console.ReadKey(); }
public void Uninit() { _tesseract.Dispose(); }
public void Dispose() { _engine?.Dispose(); _engine = null; }
private Slide[] BuildThumbnailsWin(ConversionConfiguration config, string slidesFileName) { var thumbOutDir = Path.Combine(config.OutputDirectory, "thumbs"); Directory.CreateDirectory(thumbOutDir); var ocrEngine = new TesseractEngine(Path.Combine("resources", "tessdata"), "eng", EngineMode.Default); List <Slide> result = new List <Slide>(); try { dynamic projectJson = JsonConvert.DeserializeObject(File.ReadAllText(config.MetadataPath)); int currentId = 0; var keyframes = new List <TimeSpan>(); keyframes.Add(TimeSpan.Zero); foreach (string timestamp in projectJson["slides"]) { keyframes.Add(TimeSpan.Parse(timestamp)); } foreach (var keyframe in keyframes) { TimeSpan?nextKeyframe = null; if (keyframes.IndexOf(keyframe) != keyframes.Count - 1) { nextKeyframe = keyframes[keyframes.IndexOf(keyframe) + 1]; } else { nextKeyframe = FFmpegHelper.GetMediaLength(config.SlideVideoPath); } string thumbName = FFmpegHelper.ExportThumbnail((float)nextKeyframe.GetValueOrDefault().TotalSeconds - 2.0f, config.SlideVideoPath, thumbOutDir, (currentId++).ToString()); var slide = new Slide { StartPosition = (float)keyframe.TotalSeconds + 0.2f, Thumbnail = "thumbs/" + thumbName, Ocr = PerformOcr(Path.Combine(thumbOutDir, thumbName), ocrEngine) }; if (keyframe.Equals(TimeSpan.Zero)) { slide.StartPosition = 0.0f; } result.Add(slide); } } catch (Exception ex) { _logger.LogError(ex.Message); } finally { ocrEngine.Dispose(); } return(result.ToArray()); }