public Window1() { InitializeComponent(); Mat src = new Mat(@"./carp1.jpg"); Cv2.ImShow("src", src); src = src.Resize(new Size(src.Width / 2, src.Height / 2)); //src = src.Resize(new Size(src.Width / 3, src.Height / 3)); //for (var y = 0; y < src.Height; y++) //{ // for (var x = 0; x < src.Width; x++) // { // var color = src.Get<Vec3b>(y, x); // //if (color.Item2 < 175) // if (color.Item2 < 225) // { // color.Item0 = 255; // color.Item1 = 0; // color.Item2 = 0; // } // src.Set(y, x, color); // } //} var binary = BinarizationMat(src); Cv2.ImShow("src", src); Cv2.ImShow("bin", binary); //var line = binary.Canny(100, 200); //Cv2.ImShow("line", line); var fScreenMat = FindContoursMat(binary, src); fScreenMat = fScreenMat.Resize(new Size(fScreenMat.Width * 2, fScreenMat.Height * 2)); fScreenMat = new Mat(fScreenMat, new Rect((int)(fScreenMat.Width * 0.05), (int)(fScreenMat.Height * 0.1), fScreenMat.Width - (int)(fScreenMat.Width * 0.1), fScreenMat.Height - (int)(fScreenMat.Height * 0.2))); var fScreenBinaryMat = BinarizationMat(fScreenMat); Cv2.BitwiseNot(fScreenBinaryMat, fScreenBinaryMat, new Mat()); var fCardMat = FindContoursMat(fScreenBinaryMat, fScreenMat); //Cv2.ImShow("fScreenMat", fScreenMat); //Cv2.ImShow("fCardMat", fCardMat); //dstImg = new Mat(dstImg, // new Rect((int)(dstImg.Width * 0.15), (int)(dstImg.Height * 0.3), // dstImg.Width - (int)(dstImg.Width * 0.3), dstImg.Height - (int)(dstImg.Height * 0.6))); //fCardMat = fCardMat.Resize(new Size(fCardMat.Width / 1.5, fCardMat.Height / 1.5)); Cv2.ImShow("fCardMat", fCardMat); var dstImg = BinarizationMat(fCardMat); dstImg = dstImg.Threshold(50, 255, ThresholdTypes.Otsu | ThresholdTypes.Binary); Cv2.BitwiseNot(dstImg, dstImg, new Mat()); Cv2.ImShow("dst", dstImg); dstImg = dstImg.Resize(new Size(dstImg.Width / 2.5, dstImg.Height / 2.5)); var engine = new TesseractEngine("./tessdata", "din+eng+chi_sim", EngineMode.Default); var resProcess = engine.Process(Pix.LoadTiffFromMemory(dstImg.ToBytes(".tiff"))); MessageBox.Show(resProcess.GetText()); }
public ImageDocument(Stream fileStream) { try { using (var engine = new TesseractEngine(@"tessdata", "spa", EngineMode.Default)) { byte[] buffer = new byte[fileStream.Length]; fileStream.Read(buffer, 0, (int)fileStream.Length); using (var img = Pix.LoadTiffFromMemory(buffer)) { using (var page = engine.Process(img)) { _textContents = page.GetText(); } } // have to load Pix via a bitmap since Pix doesn't support loading a stream. //using (var image = Pix.LoadTiffFromMemory(reader.)) //{ // //using (var pix = PixConverter.ToPix(image)) // //{ // // using (var page = engine.Process(pix)) // // { // // meanConfidenceLabel.InnerText = String.Format("{0:P}", page.GetMeanConfidence()); // // resultText.InnerText = page.GetText(); // // } // //} //} } } catch (Exception e) { _textContents = e.Message; return; } }
private string Convert(byte[] imageBytes) { try { using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) { using (var img = Pix.LoadTiffFromMemory(imageBytes)) { using (var page = engine.Process(img)) { var text = page.GetText(); using (var iter = page.GetIterator()) { iter.Begin(); } return(text); } } } } catch (Exception ex) { return(string.Format("{0}, {1},", "Unexpected Error: " + ex.Message, ex.ToString())); } }
public Result <char> Process(Mat input) { try { var buff = new VectorOfByte(); CvInvoke.Imencode(".tiff", input, buff); using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) { engine.DefaultPageSegMode = PageSegMode.SingleChar; engine.SetVariable("tessedit_char_whitelist", "0123456789ABEKMHOPCTYXDI"); using (var img = Pix.LoadTiffFromMemory(buff.ToArray())) { using (var page = engine.Process(img)) { var text = page.GetText()[0]; _debugLogger.Log(debugLogBuilder => debugLogBuilder.AddMessage("Letter").AddImage(input).AddMessage($"has been recognized as: {text}")); return(Result.Ok(text)); } } } } catch (Exception e) { return(Result.Fail <char>(e.Message)); } }
public static string ReadText(Bitmap image) { var regex = new Regex("[^a-zA-Z0-9]"); using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) { using (var img = Pix.LoadTiffFromMemory(image.ToBytes(ImageFormat.Tiff))) { using (var page = engine.Process(img)) { var text = page.GetText(); if (string.IsNullOrWhiteSpace(text) == false) { text = regex.Replace(text, string.Empty); //remove non alpha numeric characters text = text.ToLowerInvariant().Replace('i', '1').Replace('o', '0'); //to fix wrong interpretation return(text); } } } } /*using (var api = OcrApi.Create()) { * api.Init(Languages.English); * api.SetVariable("tessedit_char_whitelist", "0123456789"); * return api.GetTextFromImage(image); * }*/ return(string.Empty); }
public string OcrImage(Image image) { StringBuilder sb = new StringBuilder(); { using (var img = Pix.LoadTiffFromMemory(ConvertJpegToTiff(image))) { using (var page = this.engine.Process(img)) { var text = page.GetText(); Console.WriteLine("Mean confidence: {0}", page.GetMeanConfidence()); Console.WriteLine("Text (GetText): \r\n{0}", text); Console.WriteLine("Text (iterator):"); using (var iter = page.GetIterator()) { iter.Begin(); do { do { do { do { if (iter.IsAtBeginningOf(PageIteratorLevel.Block)) { sb.AppendLine("<BLOCK>"); } sb.Append(iter.GetText(PageIteratorLevel.Word)); sb.Append(" "); if (iter.IsAtFinalOf(PageIteratorLevel.TextLine, PageIteratorLevel.Word)) { sb.AppendLine(); } } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word)); if (iter.IsAtFinalOf(PageIteratorLevel.Para, PageIteratorLevel.TextLine)) { sb.AppendLine(); } } while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine)); } while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.Para)); } while (iter.Next(PageIteratorLevel.Block)); } } } } return(sb.ToString()); }
public string GetTextFromBitmap(Image bmp) { var pix = Pix.LoadTiffFromMemory(GetByteArrayFromImage(bmp)); var page = _tesseract.Process(pix); var text = page.GetText().Replace(" ", "").Replace(" ", "").Replace("\n", ""); return(text); }
private static Pix ToPix(Bitmap bitmap) { using (var stream = new MemoryStream()) { bitmap.Save(stream, System.Drawing.Imaging.ImageFormat.Tiff); return(Pix.LoadTiffFromMemory(stream.ToArray())); } }
public string ProcessImage(MemoryStream imageStream) { using (var ocr = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) using (var pix = Pix.LoadTiffFromMemory(imageStream.ToArray())) using (var page = ocr.Process(pix)) { return(page.GetText()); } }
Pix ConvertFromBitmap(Bitmap bmp) { MemoryStream byteStream = new MemoryStream(); bmp.Save(byteStream, System.Drawing.Imaging.ImageFormat.Tiff); var pix = Pix.LoadTiffFromMemory(byteStream.ToArray()); return(pix); }
/// <summary> /// 取得圖片中的文字 /// </summary> /// <param name="ImgData">圖片檔</param> /// <param name="TessData_Path">圖片文字識別資源檔之路徑</param> /// <param name="TessData_Language">選擇的文字語言</param> /// <returns></returns> public static string GetImgText(byte[] ImgData, string TessData_Path = @"./tessdata", string TessData_Language = "eng") { string ImgText; Mat MatImg = Todo_OpenCvSharp4.ImgByteArrayToMat(ImgData); using (var inms = new MemoryStream(MatImg.ToBytes())) using (var outms = new MemoryStream()) { System.Drawing.Bitmap.FromStream(inms).Save(outms, System.Drawing.Imaging.ImageFormat.Tiff); var pix = Pix.LoadTiffFromMemory(outms.ToArray()); using (var engine = new TesseractEngine(TessData_Path, TessData_Language, EngineMode.Default)) { Tesseract.Page page = engine.Process(pix); ImgText = page.GetText(); } } //Mat src = Cv2.ImDecode(image, ImreadModes.Color); //using (new OpenCvSharp.Window("asdf", src)) //{ //} ////Mat src = new Mat("lenna.png", ImreadModes.Grayscale); //Mat src = Cv2.ImDecode(image, ImreadModes.Grayscale); ////Mat dst = new Mat(); ////Cv2.Canny(src, dst, 50, 200); //using (new OpenCvSharp.Window("src image", src)) //{ // Cv2.WaitKey(); //} //using (var inms = new MemoryStream(src.ToBytes())) //using (var outms = new MemoryStream()) //{ // System.Drawing.Bitmap.FromStream(inms).Save(outms, System.Drawing.Imaging.ImageFormat.Tiff); // var pix = Pix.LoadTiffFromMemory(outms.ToArray()); // ImageSource result; // result = BitmapFrame.Create(outms, BitmapCreateOptions.None, BitmapCacheOption.OnLoad); // Img_Test.Source = result; // using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) // { // Tesseract.Page page = engine.Process(pix); // string res = page.GetText(); // lbl_Test.Content = res; // } //} return(ImgText); }
public static async Task <string> GetTextFromTiff(byte[] tiff, string tessdataLocation = null) { return(await Task.Run(() => { using (var img = Pix.LoadTiffFromMemory(tiff)) { return ReadTextFromPix(img, tessdataLocation); } })); }
public string GetStringFromImage(Mat image) { using (var pix = Pix.LoadTiffFromMemory(image.ImEncode(".tiff"))) { using (var page = _tessEngine.Process(pix)) { var result = Regex.Replace(page.GetText(), @"\s+", ""); return(result); } } }
public static OcrResult Process(Image image, string language = "eng") { using (var ms = new MemoryStream()) { image.Save(ms, System.Drawing.Imaging.ImageFormat.Tiff); var array = ms.ToArray(); using (var pix = Pix.LoadTiffFromMemory(array)) { return(ProcessProc(pix, language)); } } }
public IEnumerable <IEnumerable <string> > OcrImages(IEnumerable <IEnumerable <Mat> > imageRows) { var engine = new TesseractEngine(HostingEnvironment.MapPath(@"~/tessdata"), "eng"); engine.SetVariable("tessedit_char_whitelist", "0123456789"); return(imageRows.Select(r => r.Select(i => { using (var page = engine.Process(Pix.LoadTiffFromMemory(i.ToBytes(".tiff")))) { return page.GetText().Trim(); } }))); }
public static void ConvertorFromPdFtoData(string filelocationlocation, string datafilelocation) { Stream str = File.OpenRead(filelocationlocation); string tessdataPath = SolutionPath + "\\Tessdata\\"; var engine = new TesseractEngine(tessdataPath, "eng", EngineMode.Default); for (int i = 1; i <= GetPdfPageCount(str); i++) { using (var process = engine.Process(Pix.LoadTiffFromMemory(PdfToTiff(str, i, datafilelocation)))) { //File.WriteAllText(string.Format(datafilelocation, i, "txt"), process.GetText()); File.WriteAllText(string.Format(datafilelocation, i, "html"), process.GetHOCRText(1)); //File.WriteAllText(string.Format(datafilelocation, i, "xlsx"), process.GetHOCRText(1)); } } }
internal static void Demonstration() { var imagePath = "20200917101.jpg"; Bitmap image = new Bitmap(imagePath); #region 简单暴利 int w = image.Width; int h = image.Height; Color c; Color white = Color.White; int r, g, b; for (int y = 0; y < h; ++y) { for (int x = 0; x < w; ++x) { c = image.GetPixel(x, y); r = c.R; g = c.G; b = c.B; if (r + g + b >= 256)//将图片像素的rgb偏离黑色0超过32的值设置为白色 { image.SetPixel(x, y, white); } } } #endregion MemoryStream ms = new MemoryStream(); image.Save(ms, System.Drawing.Imaging.ImageFormat.Jpeg); ms.Seek(0, SeekOrigin.Begin); //一定不要忘记将流的初始位置重置 using (var engine = new TesseractEngine(@"D:\github\learning\CSharps\ImageProcessingProjects\bin\Debug\tessdata", "eng", EngineMode.Default)) { using (var img = Pix.LoadTiffFromMemory(ms.GetBuffer())) { using (var page = engine.Process(img)) { var text = page.GetText(); } } } image.Save($"{DateTime.Now.ToString("yyyyMMddHHmmss")}.png"); }
public string GetScreenText(byte[] image) { var ocrtext = string.Empty; using (var engine = new TesseractEngine(@"tessdata", "eng", EngineMode.Default, @"tessdata\configs\temtem")) { using (var img = Pix.LoadTiffFromMemory(image)) { using (var page = engine.Process(img, PageSegMode.SingleBlock)) { ocrtext = page.GetText(); } } } return(ocrtext); }
public string ProcessImage(MemoryStream imageStream) { using (var ocr = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) { ocr.SetVariable("segment_penalty_garbage", "0"); ocr.SetVariable("segment_penalty_dict_nonword", "0"); ocr.SetVariable("segment_penalty_dict_frequent_word", "0"); ocr.SetVariable("segment_penalty_dict_case_ok", "0"); ocr.SetVariable("segment_penalty_dict_case_bad", "0"); using (var pix = Pix.LoadTiffFromMemory(imageStream.ToArray())) using (var page = ocr.Process(pix)) { return(page.GetText()); } } }
private void OCR(byte[] fileName) { using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) { using (var img = Pix.LoadTiffFromMemory(fileName)) { var i = 1; using (var page = engine.Process(img)) { Message = page.GetText(); OCRRow row = new OCRRow(); using (var iter = page.GetIterator()) { iter.Begin(); do { if (iter.IsAtBeginningOf(PageIteratorLevel.TextLine)) { listOCRRow.Add(row); row = new OCRRow(); row.Row = iter.GetText(PageIteratorLevel.TextLine); row.Confidence = iter.GetConfidence(PageIteratorLevel.TextLine); } WordInfo wordInfo = new WordInfo(); wordInfo.Word = iter.GetText(PageIteratorLevel.Word); if (!string.IsNullOrWhiteSpace(wordInfo.Word)) { Pix pix = iter.GetImage(PageIteratorLevel.Word, 0, out wordInfo.XPos, out wordInfo.YPos); wordInfo.Width = pix.Width; wordInfo.Height = pix.Height; wordInfo.Confidence = iter.GetConfidence(PageIteratorLevel.Word); row.ListWord.Add(wordInfo); } i++; }while (iter.Next(PageIteratorLevel.Word)); listOCRRow.Add(row); } } } } }
public static List <string> ExtractText(Bitmap bitmap) { byte[] byteArray; using (MemoryStream byteStream = new MemoryStream()) { bitmap.Save(byteStream, System.Drawing.Imaging.ImageFormat.Tiff); byteStream.Close(); byteArray = byteStream.ToArray(); } using (var engine = CreateEngine()) { using (var img = Pix.LoadTiffFromMemory(byteArray)) { return(GetText(engine, img)); } } }
public IEnumerable <TextBlockItem> Parse(byte[] data) { var language = "eng"; logger.LogDebug("Constructing {0} {1}", location, language); using (var engine = new TesseractEngine(location, language, EngineMode.Default)) using (var pix = Pix.LoadTiffFromMemory(data)) using (var page = engine.Process(pix)) using (var iter = page.GetIterator()) { iter.Begin(); do { foreach (var blockItem in ExtractPage(iter)) { yield return(blockItem); } }while (iter.Next(PageIteratorLevel.Block)); } }
public async Task <string> RecognizeFromFile(byte[] img) { var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default); var jpgStream = new MemoryStream(img); var image = Image.FromStream(jpgStream); var tiffStream = new MemoryStream(); image.Save(tiffStream, System.Drawing.Imaging.ImageFormat.Tiff); var pix = Pix.LoadTiffFromMemory(tiffStream.ToArray()); var page = engine.Process(pix); var text = page.GetText(); page.Dispose(); return(text); }
string ImageToText(byte[] imageBytes, RecognitionLanguage rl, RecognitionPrecision rp) { try { using var engine = new TesseractEngine(@"./Files", _optionService.TranslateLanguage(rl), EngineMode.Default); using var img = _optionService.TranslatePrecision(rp).Item4 == System.Drawing.Imaging.ImageFormat.Tiff ? Pix.LoadTiffFromMemory(imageBytes) : Pix.LoadFromMemory(imageBytes); using var pager = engine.Process(img); return(pager.GetText().ToString()); //System.Diagnostics.Debug.WriteLine("Mean confidence: {0}", pager.GetMeanConfidence()); //System.Diagnostics.Debug.WriteLine("Text {0}", text); } catch (Exception ee) { System.Diagnostics.Debug.WriteLine("Unexpected Error: " + ee.Message); System.Diagnostics.Debug.WriteLine("Details: "); System.Diagnostics.Debug.WriteLine(ee.ToString()); } return(""); }
private string Ocr(IList <Region> regions, float minConfidence) { StringBuilder sb = new StringBuilder(); foreach (var region in OrderRegions(regions)) { using (var pix = Pix.LoadTiffFromMemory(region.Tiff)) { using (var page = OcrEngine.Instance.Process(pix, PageSegMode.SingleLine)) { var confidence = page.GetMeanConfidence(); //pix.Save(DateTime.Now.Ticks + "_" + Math.Round(confidence * 100) + ".tiff"); if (confidence >= minConfidence) { EvaluateText(page.GetText(), sb); } } } } return(sb.ToString()); }
public override IEnumerable <RecognitionResult> Recognize(IImage bitmap, ZoneConfiguration config) { bitmap = ScaleIfEnabled(bitmap); foreach (var inputBitmap in BitmapGenerators.SelectMany(generator => generator.Generate(bitmap))) { var bytes = ConvertToTiffByteArray(inputBitmap); SetVariablesAccordingToConfig(engine, config); using (var img = Pix.LoadTiffFromMemory(bytes)) { using (var page = engine.Process(img, PageSegMode.SingleBlock)) { var text = config.TextualDataFilter.GetBestMatchFromRaw(page.GetText()); var confidence = page.GetMeanConfidence() * 0.9; yield return(new RecognitionResult(text, confidence)); } } } }
private string GetText(TesseractEngine engine, int index) { string result; byte[] tiffBytes; using (var tiffStream = new MemoryStream()) using (var bitmap = GetSubtitleBitmap(index)) { bitmap.Save(tiffStream, System.Drawing.Imaging.ImageFormat.Tiff); tiffBytes = ToByteArray(tiffStream); } using (var image = Pix.LoadTiffFromMemory(tiffBytes)) using (var page = engine.Process(image)) { result = page.GetText(); result = result?.Trim(); } return(result); }
static string doOCR(Bitmap bitmap) { try { string tessDataPath = Path.Combine(Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location), "tessdata"); using (var engine = new TesseractEngine(@".\tessdata", "eng", EngineMode.Default)) { using (var img = Pix.LoadTiffFromMemory(bitmapToByteArray(bitmap, System.Drawing.Imaging.ImageFormat.Tiff))) { using (var page = engine.Process(img)) { return(page.GetText()); } } } } catch (Exception) { } return(null); }
private static string RunTesseractOcr(MemoryStream memoryStream) { if (memoryStream == null || memoryStream.Length == 0) { return(""); } if (ocrEngine == null) { var ENGLISH_LANGUAGE = @"eng"; ocrEngine = new TesseractEngine(@".\tessdata", ENGLISH_LANGUAGE); ocrEngine.SetVariable("load_system_dawg", false); ocrEngine.SetVariable("load_freq_dawg", false); } using (var imageWithText = Pix.LoadTiffFromMemory(memoryStream.ToArray())) { using (var page = ocrEngine.Process(imageWithText)) { return(page.GetText().Replace('\n', ' ')); } } }
/*private void BulkTest() * { * var folder = @"C:\Users\Mike\Desktop\text recognition"; * foreach (var file in Directory.EnumerateFiles(folder)) * { * var output = TryOCR(file); * Cv2.ImWrite("out\\" + Path.GetFileName(file) + ".jpg", output.DebugImage); * } * }*/ private bool TryOcrAddress(byte[] buffer, out string output) { output = string.Empty; var pix = Pix.LoadTiffFromMemory(buffer); using (var page = _ocr.Process(pix, PageSegMode.SingleBlock)) { var pageText = page.GetText(); var lines = pageText.Split(new string[2] { "\n", "\r" }, StringSplitOptions.RemoveEmptyEntries); if (!HeuristicAddressCheck(lines)) { return(false); } //assume 3 line address var flat = String.Join("\n", lines.Take(3)); bool hasPerson = false; bool hasLocation = false; foreach (Triple result in _classifier.classifyToCharacterOffsets(lines[0]).toArray()) { hasPerson |= result.first().ToString() == "PERSON"; hasLocation |= result.first().ToString() == "LOCATION"; } if (hasPerson) { output = flat; return(true); } return(false); } }