Exemple #1
0
        public Window1()
        {
            InitializeComponent();

            Mat src = new Mat(@"./carp1.jpg");

            Cv2.ImShow("src", src);
            src = src.Resize(new Size(src.Width / 2, src.Height / 2));
            //src = src.Resize(new Size(src.Width / 3, src.Height / 3));
            //for (var y = 0; y < src.Height; y++)
            //{
            //    for (var x = 0; x < src.Width; x++)
            //    {
            //        var color = src.Get<Vec3b>(y, x);
            //        //if (color.Item2 < 175)
            //        if (color.Item2 < 225)
            //        {
            //            color.Item0 = 255;
            //            color.Item1 = 0;
            //            color.Item2 = 0;
            //        }
            //        src.Set(y, x, color);
            //    }
            //}
            var binary = BinarizationMat(src);

            Cv2.ImShow("src", src);
            Cv2.ImShow("bin", binary);
            //var line = binary.Canny(100, 200);
            //Cv2.ImShow("line", line);
            var fScreenMat = FindContoursMat(binary, src);

            fScreenMat = fScreenMat.Resize(new Size(fScreenMat.Width * 2, fScreenMat.Height * 2));
            fScreenMat = new Mat(fScreenMat,
                                 new Rect((int)(fScreenMat.Width * 0.05), (int)(fScreenMat.Height * 0.1),
                                          fScreenMat.Width - (int)(fScreenMat.Width * 0.1), fScreenMat.Height - (int)(fScreenMat.Height * 0.2)));
            var fScreenBinaryMat = BinarizationMat(fScreenMat);

            Cv2.BitwiseNot(fScreenBinaryMat, fScreenBinaryMat, new Mat());
            var fCardMat = FindContoursMat(fScreenBinaryMat, fScreenMat);

            //Cv2.ImShow("fScreenMat", fScreenMat);
            //Cv2.ImShow("fCardMat", fCardMat);
            //dstImg = new Mat(dstImg,
            //    new Rect((int)(dstImg.Width * 0.15), (int)(dstImg.Height * 0.3),
            //        dstImg.Width - (int)(dstImg.Width * 0.3), dstImg.Height - (int)(dstImg.Height * 0.6)));
            //fCardMat = fCardMat.Resize(new Size(fCardMat.Width / 1.5, fCardMat.Height / 1.5));

            Cv2.ImShow("fCardMat", fCardMat);
            var dstImg = BinarizationMat(fCardMat);

            dstImg = dstImg.Threshold(50, 255, ThresholdTypes.Otsu | ThresholdTypes.Binary);
            Cv2.BitwiseNot(dstImg, dstImg, new Mat());
            Cv2.ImShow("dst", dstImg);
            dstImg = dstImg.Resize(new Size(dstImg.Width / 2.5, dstImg.Height / 2.5));
            var engine     = new TesseractEngine("./tessdata", "din+eng+chi_sim", EngineMode.Default);
            var resProcess = engine.Process(Pix.LoadTiffFromMemory(dstImg.ToBytes(".tiff")));

            MessageBox.Show(resProcess.GetText());
        }
Exemple #2
0
 public ImageDocument(Stream fileStream)
 {
     try
     {
         using (var engine = new TesseractEngine(@"tessdata", "spa", EngineMode.Default))
         {
             byte[] buffer = new byte[fileStream.Length];
             fileStream.Read(buffer, 0, (int)fileStream.Length);
             using (var img = Pix.LoadTiffFromMemory(buffer))
             {
                 using (var page = engine.Process(img))
                 {
                     _textContents = page.GetText();
                 }
             }
             // have to load Pix via a bitmap since Pix doesn't support loading a stream.
             //using (var image = Pix.LoadTiffFromMemory(reader.))
             //{
             //    //using (var pix = PixConverter.ToPix(image))
             //    //{
             //    //    using (var page = engine.Process(pix))
             //    //    {
             //    //        meanConfidenceLabel.InnerText = String.Format("{0:P}", page.GetMeanConfidence());
             //    //        resultText.InnerText = page.GetText();
             //    //    }
             //    //}
             //}
         }
     }
     catch (Exception e)
     {
         _textContents = e.Message;
         return;
     }
 }
 private string Convert(byte[] imageBytes)
 {
     try
     {
         using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
         {
             using (var img = Pix.LoadTiffFromMemory(imageBytes))
             {
                 using (var page = engine.Process(img))
                 {
                     var text = page.GetText();
                     using (var iter = page.GetIterator())
                     {
                         iter.Begin();
                     }
                     return(text);
                 }
             }
         }
     }
     catch (Exception ex)
     {
         return(string.Format("{0}, {1},", "Unexpected Error: " + ex.Message, ex.ToString()));
     }
 }
        public Result <char> Process(Mat input)
        {
            try
            {
                var buff = new VectorOfByte();
                CvInvoke.Imencode(".tiff", input, buff);
                using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
                {
                    engine.DefaultPageSegMode = PageSegMode.SingleChar;
                    engine.SetVariable("tessedit_char_whitelist", "0123456789ABEKMHOPCTYXDI");
                    using (var img = Pix.LoadTiffFromMemory(buff.ToArray()))
                    {
                        using (var page = engine.Process(img))
                        {
                            var text = page.GetText()[0];

                            _debugLogger.Log(debugLogBuilder => debugLogBuilder.AddMessage("Letter").AddImage(input).AddMessage($"has been recognized as: {text}"));

                            return(Result.Ok(text));
                        }
                    }
                }
            }
            catch (Exception e)
            {
                return(Result.Fail <char>(e.Message));
            }
        }
        public static string ReadText(Bitmap image)
        {
            var regex = new Regex("[^a-zA-Z0-9]");

            using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) {
                using (var img = Pix.LoadTiffFromMemory(image.ToBytes(ImageFormat.Tiff))) {
                    using (var page = engine.Process(img)) {
                        var text = page.GetText();
                        if (string.IsNullOrWhiteSpace(text) == false)
                        {
                            text = regex.Replace(text, string.Empty);                           //remove non alpha numeric characters
                            text = text.ToLowerInvariant().Replace('i', '1').Replace('o', '0'); //to fix wrong interpretation
                            return(text);
                        }
                    }
                }
            }

            /*using (var api = OcrApi.Create()) {
             *  api.Init(Languages.English);
             *  api.SetVariable("tessedit_char_whitelist", "0123456789");
             *  return api.GetTextFromImage(image);
             * }*/

            return(string.Empty);
        }
        public string OcrImage(Image image)
        {
            StringBuilder sb = new StringBuilder();

            {
                using (var img = Pix.LoadTiffFromMemory(ConvertJpegToTiff(image)))
                {
                    using (var page = this.engine.Process(img))
                    {
                        var text = page.GetText();
                        Console.WriteLine("Mean confidence: {0}", page.GetMeanConfidence());

                        Console.WriteLine("Text (GetText): \r\n{0}", text);
                        Console.WriteLine("Text (iterator):");
                        using (var iter = page.GetIterator())
                        {
                            iter.Begin();

                            do
                            {
                                do
                                {
                                    do
                                    {
                                        do
                                        {
                                            if (iter.IsAtBeginningOf(PageIteratorLevel.Block))
                                            {
                                                sb.AppendLine("<BLOCK>");
                                            }

                                            sb.Append(iter.GetText(PageIteratorLevel.Word));
                                            sb.Append(" ");

                                            if (iter.IsAtFinalOf(PageIteratorLevel.TextLine,
                                                                 PageIteratorLevel.Word))
                                            {
                                                sb.AppendLine();
                                            }
                                        } while (iter.Next(PageIteratorLevel.TextLine,
                                                           PageIteratorLevel.Word));

                                        if (iter.IsAtFinalOf(PageIteratorLevel.Para,
                                                             PageIteratorLevel.TextLine))
                                        {
                                            sb.AppendLine();
                                        }
                                    } while (iter.Next(PageIteratorLevel.Para,
                                                       PageIteratorLevel.TextLine));
                                } while (iter.Next(PageIteratorLevel.Block,
                                                   PageIteratorLevel.Para));
                            } while (iter.Next(PageIteratorLevel.Block));
                        }
                    }
                }
            }

            return(sb.ToString());
        }
Exemple #7
0
        public string GetTextFromBitmap(Image bmp)
        {
            var pix  = Pix.LoadTiffFromMemory(GetByteArrayFromImage(bmp));
            var page = _tesseract.Process(pix);
            var text = page.GetText().Replace(" ", "").Replace(" ", "").Replace("\n", "");

            return(text);
        }
Exemple #8
0
 private static Pix ToPix(Bitmap bitmap)
 {
     using (var stream = new MemoryStream())
     {
         bitmap.Save(stream, System.Drawing.Imaging.ImageFormat.Tiff);
         return(Pix.LoadTiffFromMemory(stream.ToArray()));
     }
 }
Exemple #9
0
 public string ProcessImage(MemoryStream imageStream)
 {
     using (var ocr = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
         using (var pix = Pix.LoadTiffFromMemory(imageStream.ToArray()))
             using (var page = ocr.Process(pix))
             {
                 return(page.GetText());
             }
 }
        Pix ConvertFromBitmap(Bitmap bmp)
        {
            MemoryStream byteStream = new MemoryStream();

            bmp.Save(byteStream, System.Drawing.Imaging.ImageFormat.Tiff);
            var pix = Pix.LoadTiffFromMemory(byteStream.ToArray());

            return(pix);
        }
            /// <summary>
            /// 取得圖片中的文字
            /// </summary>
            /// <param name="ImgData">圖片檔</param>
            /// <param name="TessData_Path">圖片文字識別資源檔之路徑</param>
            /// <param name="TessData_Language">選擇的文字語言</param>
            /// <returns></returns>
            public static string GetImgText(byte[] ImgData, string TessData_Path = @"./tessdata", string TessData_Language = "eng")
            {
                string ImgText;

                Mat MatImg = Todo_OpenCvSharp4.ImgByteArrayToMat(ImgData);

                using (var inms = new MemoryStream(MatImg.ToBytes()))
                    using (var outms = new MemoryStream())
                    {
                        System.Drawing.Bitmap.FromStream(inms).Save(outms, System.Drawing.Imaging.ImageFormat.Tiff);
                        var pix = Pix.LoadTiffFromMemory(outms.ToArray());

                        using (var engine = new TesseractEngine(TessData_Path, TessData_Language, EngineMode.Default))
                        {
                            Tesseract.Page page = engine.Process(pix);
                            ImgText = page.GetText();
                        }
                    }

                //Mat src = Cv2.ImDecode(image, ImreadModes.Color);
                //using (new OpenCvSharp.Window("asdf", src))
                //{

                //}

                ////Mat src = new Mat("lenna.png", ImreadModes.Grayscale);
                //Mat src = Cv2.ImDecode(image, ImreadModes.Grayscale);
                ////Mat dst = new Mat();

                ////Cv2.Canny(src, dst, 50, 200);
                //using (new OpenCvSharp.Window("src image", src))
                //{
                //    Cv2.WaitKey();
                //}

                //using (var inms = new MemoryStream(src.ToBytes()))
                //using (var outms = new MemoryStream())
                //{
                //    System.Drawing.Bitmap.FromStream(inms).Save(outms, System.Drawing.Imaging.ImageFormat.Tiff);
                //    var pix = Pix.LoadTiffFromMemory(outms.ToArray());

                //    ImageSource result;
                //    result = BitmapFrame.Create(outms, BitmapCreateOptions.None, BitmapCacheOption.OnLoad);
                //    Img_Test.Source = result;


                //    using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
                //    {
                //        Tesseract.Page page = engine.Process(pix);

                //        string res = page.GetText();
                //        lbl_Test.Content = res;
                //    }
                //}
                return(ImgText);
            }
Exemple #12
0
 public static async Task <string> GetTextFromTiff(byte[] tiff, string tessdataLocation = null)
 {
     return(await Task.Run(() =>
     {
         using (var img = Pix.LoadTiffFromMemory(tiff))
         {
             return ReadTextFromPix(img, tessdataLocation);
         }
     }));
 }
Exemple #13
0
 public string GetStringFromImage(Mat image)
 {
     using (var pix = Pix.LoadTiffFromMemory(image.ImEncode(".tiff")))
     {
         using (var page = _tessEngine.Process(pix))
         {
             var result = Regex.Replace(page.GetText(), @"\s+", "");
             return(result);
         }
     }
 }
Exemple #14
0
 public static OcrResult Process(Image image, string language = "eng")
 {
     using (var ms = new MemoryStream())
     {
         image.Save(ms, System.Drawing.Imaging.ImageFormat.Tiff);
         var array = ms.ToArray();
         using (var pix = Pix.LoadTiffFromMemory(array))
         {
             return(ProcessProc(pix, language));
         }
     }
 }
Exemple #15
0
        public IEnumerable <IEnumerable <string> > OcrImages(IEnumerable <IEnumerable <Mat> > imageRows)
        {
            var engine = new TesseractEngine(HostingEnvironment.MapPath(@"~/tessdata"), "eng");

            engine.SetVariable("tessedit_char_whitelist", "0123456789");

            return(imageRows.Select(r => r.Select(i =>
            {
                using (var page = engine.Process(Pix.LoadTiffFromMemory(i.ToBytes(".tiff"))))
                {
                    return page.GetText().Trim();
                }
            })));
        }
Exemple #16
0
            public static void ConvertorFromPdFtoData(string filelocationlocation, string datafilelocation)
            {
                Stream str          = File.OpenRead(filelocationlocation);
                string tessdataPath = SolutionPath + "\\Tessdata\\";
                var    engine       = new TesseractEngine(tessdataPath, "eng", EngineMode.Default);

                for (int i = 1; i <= GetPdfPageCount(str); i++)
                {
                    using (var process = engine.Process(Pix.LoadTiffFromMemory(PdfToTiff(str, i, datafilelocation))))
                    {
                        //File.WriteAllText(string.Format(datafilelocation, i, "txt"), process.GetText());
                        File.WriteAllText(string.Format(datafilelocation, i, "html"), process.GetHOCRText(1));
                        //File.WriteAllText(string.Format(datafilelocation, i, "xlsx"), process.GetHOCRText(1));
                    }
                }
            }
Exemple #17
0
        internal static void Demonstration()
        {
            var    imagePath = "20200917101.jpg";
            Bitmap image     = new Bitmap(imagePath);

            #region 简单暴利
            int   w = image.Width;
            int   h = image.Height;
            Color c;
            Color white = Color.White;
            int   r, g, b;
            for (int y = 0; y < h; ++y)
            {
                for (int x = 0; x < w; ++x)
                {
                    c = image.GetPixel(x, y);
                    r = c.R;
                    g = c.G;
                    b = c.B;
                    if (r + g + b >= 256)//将图片像素的rgb偏离黑色0超过32的值设置为白色
                    {
                        image.SetPixel(x, y, white);
                    }
                }
            }
            #endregion

            MemoryStream ms = new MemoryStream();

            image.Save(ms, System.Drawing.Imaging.ImageFormat.Jpeg);

            ms.Seek(0, SeekOrigin.Begin); //一定不要忘记将流的初始位置重置

            using (var engine = new TesseractEngine(@"D:\github\learning\CSharps\ImageProcessingProjects\bin\Debug\tessdata", "eng", EngineMode.Default))
            {
                using (var img = Pix.LoadTiffFromMemory(ms.GetBuffer()))
                {
                    using (var page = engine.Process(img))
                    {
                        var text = page.GetText();
                    }
                }
            }


            image.Save($"{DateTime.Now.ToString("yyyyMMddHHmmss")}.png");
        }
Exemple #18
0
        public string GetScreenText(byte[] image)
        {
            var ocrtext = string.Empty;

            using (var engine = new TesseractEngine(@"tessdata", "eng", EngineMode.Default, @"tessdata\configs\temtem"))
            {
                using (var img = Pix.LoadTiffFromMemory(image))
                {
                    using (var page = engine.Process(img, PageSegMode.SingleBlock))
                    {
                        ocrtext = page.GetText();
                    }
                }
            }

            return(ocrtext);
        }
Exemple #19
0
        public string ProcessImage(MemoryStream imageStream)
        {
            using (var ocr = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
            {
                ocr.SetVariable("segment_penalty_garbage", "0");
                ocr.SetVariable("segment_penalty_dict_nonword", "0");
                ocr.SetVariable("segment_penalty_dict_frequent_word", "0");
                ocr.SetVariable("segment_penalty_dict_case_ok", "0");
                ocr.SetVariable("segment_penalty_dict_case_bad", "0");

                using (var pix = Pix.LoadTiffFromMemory(imageStream.ToArray()))
                    using (var page = ocr.Process(pix))
                    {
                        return(page.GetText());
                    }
            }
        }
        private void OCR(byte[] fileName)
        {
            using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
            {
                using (var img = Pix.LoadTiffFromMemory(fileName))
                {
                    var i = 1;
                    using (var page = engine.Process(img))
                    {
                        Message = page.GetText();
                        OCRRow row = new OCRRow();
                        using (var iter = page.GetIterator())
                        {
                            iter.Begin();
                            do
                            {
                                if (iter.IsAtBeginningOf(PageIteratorLevel.TextLine))
                                {
                                    listOCRRow.Add(row);
                                    row            = new OCRRow();
                                    row.Row        = iter.GetText(PageIteratorLevel.TextLine);
                                    row.Confidence = iter.GetConfidence(PageIteratorLevel.TextLine);
                                }

                                WordInfo wordInfo = new WordInfo();
                                wordInfo.Word = iter.GetText(PageIteratorLevel.Word);

                                if (!string.IsNullOrWhiteSpace(wordInfo.Word))
                                {
                                    Pix pix = iter.GetImage(PageIteratorLevel.Word, 0, out wordInfo.XPos, out wordInfo.YPos);
                                    wordInfo.Width      = pix.Width;
                                    wordInfo.Height     = pix.Height;
                                    wordInfo.Confidence = iter.GetConfidence(PageIteratorLevel.Word);
                                    row.ListWord.Add(wordInfo);
                                }



                                i++;
                            }while (iter.Next(PageIteratorLevel.Word));
                            listOCRRow.Add(row);
                        }
                    }
                }
            }
        }
        public static List <string> ExtractText(Bitmap bitmap)
        {
            byte[] byteArray;
            using (MemoryStream byteStream = new MemoryStream())
            {
                bitmap.Save(byteStream, System.Drawing.Imaging.ImageFormat.Tiff);
                byteStream.Close();
                byteArray = byteStream.ToArray();
            }

            using (var engine = CreateEngine())
            {
                using (var img = Pix.LoadTiffFromMemory(byteArray))
                {
                    return(GetText(engine, img));
                }
            }
        }
Exemple #22
0
        public IEnumerable <TextBlockItem> Parse(byte[] data)
        {
            var language = "eng";

            logger.LogDebug("Constructing {0} {1}", location, language);
            using (var engine = new TesseractEngine(location, language, EngineMode.Default))
                using (var pix = Pix.LoadTiffFromMemory(data))
                    using (var page = engine.Process(pix))
                        using (var iter = page.GetIterator())
                        {
                            iter.Begin();
                            do
                            {
                                foreach (var blockItem in ExtractPage(iter))
                                {
                                    yield return(blockItem);
                                }
                            }while (iter.Next(PageIteratorLevel.Block));
                        }
        }
Exemple #23
0
        public async Task <string> RecognizeFromFile(byte[] img)
        {
            var engine    = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default);
            var jpgStream = new MemoryStream(img);
            var image     = Image.FromStream(jpgStream);

            var tiffStream = new MemoryStream();

            image.Save(tiffStream, System.Drawing.Imaging.ImageFormat.Tiff);

            var pix = Pix.LoadTiffFromMemory(tiffStream.ToArray());

            var page = engine.Process(pix);

            var text = page.GetText();

            page.Dispose();

            return(text);
        }
        string ImageToText(byte[] imageBytes, RecognitionLanguage rl, RecognitionPrecision rp)
        {
            try
            {
                using var engine = new TesseractEngine(@"./Files", _optionService.TranslateLanguage(rl), EngineMode.Default);
                using var img    = _optionService.TranslatePrecision(rp).Item4 == System.Drawing.Imaging.ImageFormat.Tiff ?
                                   Pix.LoadTiffFromMemory(imageBytes) : Pix.LoadFromMemory(imageBytes);
                using var pager = engine.Process(img);
                return(pager.GetText().ToString());
                //System.Diagnostics.Debug.WriteLine("Mean confidence: {0}", pager.GetMeanConfidence());
                //System.Diagnostics.Debug.WriteLine("Text {0}", text);
            }
            catch (Exception ee)
            {
                System.Diagnostics.Debug.WriteLine("Unexpected Error: " + ee.Message);
                System.Diagnostics.Debug.WriteLine("Details: ");
                System.Diagnostics.Debug.WriteLine(ee.ToString());
            }

            return("");
        }
        private string Ocr(IList <Region> regions, float minConfidence)
        {
            StringBuilder sb = new StringBuilder();

            foreach (var region in OrderRegions(regions))
            {
                using (var pix = Pix.LoadTiffFromMemory(region.Tiff))
                {
                    using (var page = OcrEngine.Instance.Process(pix, PageSegMode.SingleLine))
                    {
                        var confidence = page.GetMeanConfidence();
                        //pix.Save(DateTime.Now.Ticks + "_" + Math.Round(confidence * 100) + ".tiff");

                        if (confidence >= minConfidence)
                        {
                            EvaluateText(page.GetText(), sb);
                        }
                    }
                }
            }
            return(sb.ToString());
        }
        public override IEnumerable <RecognitionResult> Recognize(IImage bitmap, ZoneConfiguration config)
        {
            bitmap = ScaleIfEnabled(bitmap);

            foreach (var inputBitmap in BitmapGenerators.SelectMany(generator => generator.Generate(bitmap)))
            {
                var bytes = ConvertToTiffByteArray(inputBitmap);

                SetVariablesAccordingToConfig(engine, config);

                using (var img = Pix.LoadTiffFromMemory(bytes))
                {
                    using (var page = engine.Process(img, PageSegMode.SingleBlock))
                    {
                        var text = config.TextualDataFilter.GetBestMatchFromRaw(page.GetText());

                        var confidence = page.GetMeanConfidence() * 0.9;
                        yield return(new RecognitionResult(text, confidence));
                    }
                }
            }
        }
Exemple #27
0
    private string GetText(TesseractEngine engine, int index)
    {
        string result;

        byte[] tiffBytes;

        using (var tiffStream = new MemoryStream())
            using (var bitmap = GetSubtitleBitmap(index))
            {
                bitmap.Save(tiffStream, System.Drawing.Imaging.ImageFormat.Tiff);
                tiffBytes = ToByteArray(tiffStream);
            }

        using (var image = Pix.LoadTiffFromMemory(tiffBytes))
            using (var page = engine.Process(image))
            {
                result = page.GetText();
                result = result?.Trim();
            }

        return(result);
    }
Exemple #28
0
        static string doOCR(Bitmap bitmap)
        {
            try
            {
                string tessDataPath = Path.Combine(Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location), "tessdata");

                using (var engine = new TesseractEngine(@".\tessdata", "eng", EngineMode.Default))
                {
                    using (var img = Pix.LoadTiffFromMemory(bitmapToByteArray(bitmap, System.Drawing.Imaging.ImageFormat.Tiff)))
                    {
                        using (var page = engine.Process(img))
                        {
                            return(page.GetText());
                        }
                    }
                }
            }
            catch (Exception)
            {
            }

            return(null);
        }
        private static string RunTesseractOcr(MemoryStream memoryStream)
        {
            if (memoryStream == null || memoryStream.Length == 0)
            {
                return("");
            }

            if (ocrEngine == null)
            {
                var ENGLISH_LANGUAGE = @"eng";
                ocrEngine = new TesseractEngine(@".\tessdata", ENGLISH_LANGUAGE);
                ocrEngine.SetVariable("load_system_dawg", false);
                ocrEngine.SetVariable("load_freq_dawg", false);
            }

            using (var imageWithText = Pix.LoadTiffFromMemory(memoryStream.ToArray()))
            {
                using (var page = ocrEngine.Process(imageWithText))
                {
                    return(page.GetText().Replace('\n', ' '));
                }
            }
        }
        /*private void BulkTest()
         * {
         *  var folder = @"C:\Users\Mike\Desktop\text recognition";
         *  foreach (var file in Directory.EnumerateFiles(folder))
         *  {
         *      var output = TryOCR(file);
         *      Cv2.ImWrite("out\\" + Path.GetFileName(file) + ".jpg", output.DebugImage);
         *  }
         * }*/

        private bool TryOcrAddress(byte[] buffer, out string output)
        {
            output = string.Empty;

            var pix = Pix.LoadTiffFromMemory(buffer);

            using (var page = _ocr.Process(pix, PageSegMode.SingleBlock))
            {
                var pageText = page.GetText();
                var lines    = pageText.Split(new string[2] {
                    "\n", "\r"
                }, StringSplitOptions.RemoveEmptyEntries);

                if (!HeuristicAddressCheck(lines))
                {
                    return(false);
                }

                //assume 3 line address
                var flat = String.Join("\n", lines.Take(3));

                bool hasPerson   = false;
                bool hasLocation = false;
                foreach (Triple result in _classifier.classifyToCharacterOffsets(lines[0]).toArray())
                {
                    hasPerson   |= result.first().ToString() == "PERSON";
                    hasLocation |= result.first().ToString() == "LOCATION";
                }

                if (hasPerson)
                {
                    output = flat;
                    return(true);
                }
                return(false);
            }
        }