public async Task <string> GetText(Bitmap image)
        {
            try
            {
                if (_tesseractEngine.IsDisposed)
                {
                    _tesseractEngine = new TesseractEngine(TessDataPath, "lit", EngineMode.Default);
                }

                using var img = Pix.LoadFromMemory(ImageToByte(image));
                var imageToRecognize = img
                                       .ConvertRGBToGray()
                                       //.BinarizeSauvola(10, 0.35f, false)
                                       //.BinarizeOtsuAdaptiveThreshold(10, 10, 0, 0, 0)
                                       .Deskew()
                ;

                var processTask = new Task <Page>(() => _tesseractEngine.Process(imageToRecognize));
                processTask.Start();

                RecognizedPage = await processTask;

                return(RecognizedPage.GetText());
            }
            catch (TesseractException ex)
            {
                throw ex;
            }
        }
Пример #2
0
        public string GetTextFromImage(byte[] imageData)
        {
            using Pix img = Pix.LoadFromMemory(imageData);
            using Page page = _engine.Process(img);

            return page.GetText();
        }
Пример #3
0
        public String DoOCR([FromForm] OcrModel request)
        {
            string name        = request.Image.FileName;
            var    image       = request.Image;
            var    imageStream = new MemoryStream();

            if (image.Length > 0)
            {
                image.CopyTo(imageStream);
            }

            string tessPath = Path.Combine(trainedDataFolderName, "");
            string result   = "";

            // TODO: Create one instance of engine and inject into app
            using (var engine = new TesseractEngine(tessPath, request.DestinationLanguage, EngineMode.Default))
            {
                // whitelist numbers only
                engine.SetVariable("tessedit_char_whitelist", "0123456789");
                var img  = Pix.LoadFromMemory(imageStream.GetBuffer());
                var page = engine.Process(img);
                result = page.GetText();
                Console.WriteLine(result);
            }

            return(String.IsNullOrWhiteSpace(result) ? "Ocr is finished. Return empty" : result);
        }
Пример #4
0
        public TesseractImage ReadImage(TesseractImage imagem)
        {
            string path = GetDirectoryName();

            byte[] bytesImg = StringHelper.ConvertBase64ToByteArray(imagem.Base64);

            try
            {
                using (var engine = new TesseractEngine(path, imagem.SiglaLinguagem, EngineMode.Default))
                {
                    using (var img = Pix.LoadFromMemory(bytesImg))
                    {
                        using (var dataImg = engine.Process(img))
                        {
                            imagem.Texto          = dataImg.GetText();
                            imagem.MeanConfidence = dataImg.GetMeanConfidence();
                        }
                    }
                }

                return(imagem);
            }
            catch (Exception ex)
            {
                throw new Exception(ex.Message);
            }
        }
Пример #5
0
 private static string GetOcr(byte[] dosya)
 {
     using TesseractEngine engine = new("./tessdata", "tur", EngineMode.LstmOnly);
     using Pix pixImage           = Pix.LoadFromMemory(dosya);
     using Page page = engine.Process(pixImage);
     return(page.GetText());
 }
Пример #6
0
        public string OCRimage(Image ImageToUse, int Zoomlevel, out double TimeTaken, out float Confidence)
        {
            DateTime Starttime = DateTime.Now;

            //load in image
            LoadImage(ImageToUse);
            //post process the image
            ImageToOCR = AccordImageProcessing.AccordImageProc.ImageProcessing(ImageToOCR, Zoomlevel);
            //Convert to Tesseract format
            byte[] ImgByte = ToByteArray(ImageToOCR, System.Drawing.Imaging.ImageFormat.Bmp);

            Pix img = Pix.LoadFromMemory(ImgByte);

            // OCR it
            Page page = TesseractOCRCore.Process(img);
            //get test
            string text = page.GetText();

            //Get confidence
            Confidence = page.GetMeanConfidence();
            //Get Time
            DateTime EndTime = DateTime.Now;

            TimeTaken = (EndTime - Starttime).TotalSeconds;
            page.Dispose();
            img.Dispose();
            return(text);
        }
Пример #7
0
        public static string RecognizeText(byte[] imageBytes)
        {
            string result = null;

            try
            {
                using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
                {
                    using (Pix pix = Pix.LoadFromMemory(imageBytes))
                    {
                        using (var page = engine.Process(pix))
                        {
                            string text = page.GetText();
                            Trace.WriteLine($"Text (GetText): \r\n{text}");
                            result = $"Mean confidence is {page.GetMeanConfidence()}\r\nText={text.Trim()}";
                        }
                    }
                }
            }
            catch (Exception e)
            {
                Trace.TraceError(e.ToString());
            }

            return(result);
        }
Пример #8
0
        public string FileTessOCR(byte[] data)
        {
            var pic    = Pix.LoadFromMemory(data);
            var res    = this.ocr.Process(pic);
            var result = res.GetText();

            return(result.Trim());
        }
Пример #9
0
        private string ProcessImage(Image image)
        {
            using var ocrEngineBest = new TesseractEngine(TessDataPath, "lit", EngineMode.Default);
            using var img           = Pix.LoadFromMemory(ImageToByte(image));
            var imgGray = img.ConvertRGBToGray();

            return(ocrEngineBest.Process(imgGray).GetText());
        }
Пример #10
0
 private static Pix BitmapToPix(Bitmap bitmap)
 {
     // Put the bitmap into a memory stream for Pix to load
     using MemoryStream stream = new MemoryStream();
     bitmap.Save(stream, System.Drawing.Imaging.ImageFormat.Png);
     stream.Position = 0;
     return(Pix.LoadFromMemory(stream.ToArray()));
 }
Пример #11
0
        public static string RecognizeBlocks(byte[] imageBytes)
        {
            StringBuilder sb = new StringBuilder();

            try
            {
                using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
                {
                    using (Pix pix = Pix.LoadFromMemory(imageBytes))
                    {
                        using (var page = engine.Process(pix))
                        {
                            sb.AppendLine("Text (iterator):");
                            using (ResultIterator iter = page.GetIterator())
                            {
                                iter.Begin();

                                do
                                {
                                    do
                                    {
                                        do
                                        {
                                            do
                                            {
                                                if (iter.IsAtBeginningOf(PageIteratorLevel.Block))
                                                {
                                                    sb.AppendLine("<BLOCK>");
                                                }

                                                sb.Append(iter.GetText(PageIteratorLevel.Word));
                                                sb.Append(" ");

                                                if (iter.IsAtFinalOf(PageIteratorLevel.TextLine, PageIteratorLevel.Word))
                                                {
                                                    sb.AppendLine();
                                                }
                                            } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));

                                            if (iter.IsAtFinalOf(PageIteratorLevel.Para, PageIteratorLevel.TextLine))
                                            {
                                                sb.AppendLine();
                                            }
                                        } while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine));
                                    } while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.Para));
                                } while (iter.Next(PageIteratorLevel.Block));
                            }
                        }
                    }
                }
            }
            catch (Exception e)
            {
                Trace.TraceError(e.ToString());
            }

            return(sb.ToString());
        }
Пример #12
0
        /// <summary>
        /// 按理说是进来文件出去字符的
        /// </summary>
        /// <param name="messageOption"></param>
        /// <returns></returns>
        public async Task <string> ExecuteAsync(Stream file)
        {
            var stream = new MemoryStream();
            var tg_img = Image.FromStream(file);
            var bitmap = await ConvertToGray(new Bitmap(tg_img));

            bitmap.Save(stream, System.Drawing.Imaging.ImageFormat.Png);
            stream.Position = 0;
            var texts = new List <string>();

            using (var engine = new TesseractEngine(@"/app/tessdata", "chi_sim", EngineMode.Default)) {
                using (var img = Pix.LoadFromMemory(stream.ToArray())) {
                    using (var page = engine.Process(img)) {
                        var text = page.GetText();
                        //Console.WriteLine("Mean confidence: {0}", page.GetMeanConfidence());

                        //Console.WriteLine("Text (GetText): \r\n{0}", text);
                        //Console.WriteLine("Text (iterator):");
                        using (var iter = page.GetIterator()) {
                            iter.Begin();

                            do
                            {
                                do
                                {
                                    do
                                    {
                                        do
                                        {
                                            //if (iter.IsAtBeginningOf(PageIteratorLevel.Block)) {
                                            //    Console.WriteLine("<BLOCK>");
                                            //}
                                            var inner_text = iter.GetText(PageIteratorLevel.Word);

                                            texts.Add(inner_text);

                                            //Console.Write(inner_text);
                                            //Console.Write(" ");

                                            //if (iter.IsAtFinalOf(PageIteratorLevel.TextLine, PageIteratorLevel.Word)) {
                                            //    Console.WriteLine();
                                            //}
                                        } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));

                                        //if (iter.IsAtFinalOf(PageIteratorLevel.Para, PageIteratorLevel.TextLine)) {
                                        //    Console.WriteLine();
                                        //}
                                    } while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine));
                                } while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.Para));
                            } while (iter.Next(PageIteratorLevel.Block));
                        }
                    }
                }
            }
            return(string.Join("", texts));
        }
Пример #13
0
 public string GetText(byte[] bytes)
 {
     using (var img = Pix.LoadFromMemory(bytes))
     {
         using (var page = _tesseractEngine.Process(img))
         {
         }
     }
     return("");
 }
Пример #14
0
 private static string GetOcr(byte[] dosya)
 {
     using (var engine = new TesseractEngine("./tessdata", "tur", EngineMode.TesseractOnly))
     {
         using (var pixImage = Pix.LoadFromMemory(dosya))
             using (var page = engine.Process(pixImage.Scale(3, 3)))
             {
                 return(page.GetText());
             }
     }
 }
Пример #15
0
    private static Pix GetPix(Image <Rgba32> bitmap)
    {
        byte[] pngBytes;

        using (var stream = new MemoryStream())
        {
            bitmap.SaveAsBmp(stream);
            pngBytes = stream.ToArray();
        }

        return(Pix.LoadFromMemory(pngBytes));
    }
Пример #16
0
        /// <summary>
        /// read a bitmap image for a number
        /// </summary>
        /// <param name="image">image to read</param>
        /// <returns>number read from the bitmap</returns>
        /// <exception cref="NotSupportedException">No number could be read from image</exception>
        private int ReadImageText(Bitmap image)
        {
            ImageConverter converter = new ImageConverter();

            byte[] newimage = (byte[])converter.ConvertTo(image, typeof(byte[]));
            using var page = engine.Process(Pix.LoadFromMemory(newimage));
            string convert = page.GetText();

            if (String.IsNullOrEmpty(convert) || String.IsNullOrWhiteSpace(convert))
            {
                throw new NotSupportedException();
            }
            return(Convert.ToInt32(convert));
        }
        public List <Rectangle> GetSubtitleBoundingBoxes(IMagickImage mImage, Rectangle subtitleRegion,
                                                         PageIteratorLevel pageIteratorLevel, string cropRegionPath = "")
        {
            //Preprocess
            var imageBytes = PreprocessImage(mImage, subtitleRegion, _preprocessScale, cropRegionPath);

            List <Rectangle> boundingBoxes = new List <Rectangle>();

            try
            {
                _tessEngine.SetVariable("user_defined_dpi", 300);
                _tessEngine.DefaultPageSegMode = PageSegMode.Auto;

                using var img  = Pix.LoadFromMemory(imageBytes);
                using var page = _tessEngine.Process(img);

                //The Iterator way - Same results...
                using var iter = page.GetIterator();
                iter.Begin();

                do
                {
                    if (!iter.TryGetBoundingBox(pageIteratorLevel, out var boundingBoxRect))
                    {
                        continue;
                    }

                    var boundingBox = new Rectangle(
                        (int)(boundingBoxRect.X1 / _preprocessScale) + subtitleRegion.X,
                        (int)(boundingBoxRect.Y1 / _preprocessScale) + subtitleRegion.Y,
                        (int)(boundingBoxRect.Width / _preprocessScale),
                        (int)(boundingBoxRect.Height / _preprocessScale));

                    boundingBoxes.Add(boundingBox);

                    Console.WriteLine($"Text:{iter.GetText(pageIteratorLevel).Trim()} - Bounding Box:{boundingBox}");
                } while (iter.Next(pageIteratorLevel));

                return(boundingBoxes);
            }
            catch (Exception error)
            {
                Console.WriteLine("Tesseract Error: " + error.Message);
            }

            Console.WriteLine(boundingBoxes.Count);

            return(boundingBoxes);
        }
Пример #18
0
        void doOCR()
        {
            var engine = new TesseractEngine("tessdata", "eng+chi_tra");
            Pix tessImg;

            using (var ms = new MemoryStream())
            {
                cropAtRect(img, playArea).Save(ms, System.Drawing.Imaging.ImageFormat.Png);
                tessImg = Pix.LoadFromMemory(ms.ToArray());
            }
            using (var page = engine.Process(tessImg))
            {
                textBox1.Text = Regex.Replace(page.GetText(), @"\r\n|\r|\n", "\r\n");
            }
        }
Пример #19
0
        /// <summary>
        /// Распознать текст нечитаемого PDF-файла.
        /// </summary>
        /// <param name="page">Нечитаемая PDF-страница.</param>
        /// <returns>Распознанный текст со страницы.</returns>
        private static string RecognizePageText(PdfPage page)
        {
            var options = PdfDrawOptions.Create();

            options.BackgroundColor      = new PdfRgbColor(255, 255, 255);
            options.HorizontalResolution = 200;
            options.VerticalResolution   = 200;

            using var memoryStream = new MemoryStream();
            page.Save(memoryStream, options);

            using var engine         = new TesseractEngine(@"tessdata\fast", "rus+eng", EngineMode.LstmOnly);
            using var img            = Pix.LoadFromMemory(memoryStream.GetBuffer());
            using var recognizedPage = engine.Process(img);

            return(recognizedPage.GetText());
        }
        /// <summary>
        /// Main loop for running the scanner
        /// </summary>
        public static async void MainScanner()
        {
            ImageConverter converter = new ImageConverter();

            while (!runScanner)
            {
                await Task.Delay(500);

                while (runScanner)
                {
                    Bitmap region = CaptureMyScreen(220, 80, 850, 570); //Capture the accept button

                    using (TesseractEngine engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
                    {
                        using (Pix img = Pix.LoadFromMemory((byte[])converter.ConvertTo(region, typeof(byte[]))))
                        {
                            using (Page page = engine.Process(img))
                            {
                                string text       = page.GetText();
                                float  confidence = page.GetMeanConfidence();

                                if (text.ToLower().Contains("accept") && confidence > 0.80) //Pretty sure its right
                                {
                                    System.Windows.Forms.Cursor.Position = new System.Drawing.Point((int)tempxstartpos + 10, (int)tempystartpos + 10);
                                    Thread.Sleep(100);
                                    uint X = (uint)System.Windows.Forms.Cursor.Position.X;
                                    uint Y = (uint)System.Windows.Forms.Cursor.Position.Y;
                                    mouse_event(MOUSEEVENTF_LEFTDOWN | MOUSEEVENTF_LEFTUP, X, Y, 0, 0); //Left click
                                    mouse_event(MOUSEEVENTF_LEFTDOWN | MOUSEEVENTF_LEFTUP, X, Y, 0, 0); //Left click twice for good measure
                                    mouse_event(MOUSEEVENTF_LEFTDOWN | MOUSEEVENTF_LEFTUP, X, Y, 0, 0); //Left click 3 times to be 900% sure!

                                    Environment.Exit(0);
                                    runScanner = false;
                                }
                            }
                        }
                    }
                    await Task.Delay(500);

                    if (!runScanner)
                    {
                        //Stopped
                    }
                }
            }
        }
Пример #21
0
        public static async Task <string> GetParseImageResult(byte[] imageData, string ocrLang)
        {
            try
            {
                var langCode = LanguageDictionary.GetLanguageOCRCode(ocrLang);

                using (var engine = new TesseractEngine(App.TESSDATA_DICTPATH, langCode, EngineMode.Default))
                {
                    using (var img = Pix.LoadFromMemory(imageData))
                    {
                        using (var page = engine.Process(img))
                        {
                            var input = page.GetText();

                            string       pattern      = @"\n([^A-Z^0-9])";
                            string       substitution = @" $1";
                            RegexOptions options      = RegexOptions.Multiline;
                            Regex        regex        = new Regex(pattern, options);

                            string result = regex.Replace(input, substitution).Trim();
                            if (result.StartsWith("\n"))
                            {
                                result = result.Substring(2);
                            }
                            if (result == null || result.Length < 1)
                            {
                                throw new Exception("Try again");
                            }
                            return(result);
                        }
                    }
                }
            }
            catch (Exception e)
            {
                if (e.Message.Contains("Failed to initialise tesseract engine"))
                {
                    throw new Exception("Can't find data of " + ocrLang);
                }
                throw e;
            }
        }
Пример #22
0
        // public string ScanAndOCR()
        // {
        //    var images = WIAScanner.Scan();
        //    string result = string.Empty;

        // using (var ms = new MemoryStream())
        //    {
        //        images[0].Save(ms, images[0].RawFormat);
        //        result = this.api.RecognizeImage(ms);
        //    }

        // return result;
        // }
        public IEnumerable <string> ScanTessOCR()
        {
            var images = WIAScanner.Scan();
            var result = new List <string>();

            using (var ms = new MemoryStream())
            {
                foreach (var image in images)
                {
                    image.Save(ms, image.RawFormat);
                    var pic = Pix.LoadFromMemory(ms.ToArray()).Deskew();
                    pic.Save("ocrImg.png", ImageFormat.Png);

                    var res = this.ocr.Process(pic);
                    result.Add(res.GetText().Trim());
                }
            }

            return(result);
        }
Пример #23
0
        //Get word location
        public string OCRimagewithLocation(Image ImageToUse, int Zoomlevel)
        {
            //load in image
            LoadImage(ImageToUse);
            //post process the image
            ImageToOCR = AccordImageProcessing.AccordImageProc.ImageProcessing(ImageToOCR, Zoomlevel);
            //Convert to Tesseract format

            byte[] ImgByte = ToByteArray(ImageToOCR, System.Drawing.Imaging.ImageFormat.Bmp);

            Pix img = Pix.LoadFromMemory(ImgByte);
            // OCR it
            Page page = TesseractOCRCore.Process(img);
            //get test
            string text = page.GetHOCRText(1);

            img.Dispose();
            page.Dispose();
            return(text);
        }
Пример #24
0
        string ImageToText(byte[] imageBytes, RecognitionLanguage rl, RecognitionPrecision rp)
        {
            try
            {
                using var engine = new TesseractEngine(@"./Files", _optionService.TranslateLanguage(rl), EngineMode.Default);
                using var img    = _optionService.TranslatePrecision(rp).Item4 == System.Drawing.Imaging.ImageFormat.Tiff ?
                                   Pix.LoadTiffFromMemory(imageBytes) : Pix.LoadFromMemory(imageBytes);
                using var pager = engine.Process(img);
                return(pager.GetText().ToString());
                //System.Diagnostics.Debug.WriteLine("Mean confidence: {0}", pager.GetMeanConfidence());
                //System.Diagnostics.Debug.WriteLine("Text {0}", text);
            }
            catch (Exception ee)
            {
                System.Diagnostics.Debug.WriteLine("Unexpected Error: " + ee.Message);
                System.Diagnostics.Debug.WriteLine("Details: ");
                System.Diagnostics.Debug.WriteLine(ee.ToString());
            }

            return("");
        }
Пример #25
0
        private void Process(MemoryStream ms, PixelFormat pixelFormat, PixelFormat processedPixelFormat, IFileInfo fi, DicomFile dicomFile, string sopID, string studyID, string seriesID, string modality, string[] imageType, int rotationIfAny = 0)
        {
            float  meanConfidence;
            string text;

            var bytes = ms.ToArray();

            // targetBmp is now in the desired format.
            using (var page = _tesseractEngine.Process(Pix.LoadFromMemory(bytes)))
            {
                text           = page.GetText();
                text           = Regex.Replace(text, @"\t|\n|\r", " "); // XXX abrooks surely more useful to have a space?
                text           = text.Trim();
                meanConfidence = page.GetMeanConfidence();
            }


            //if we find some text
            if (!string.IsNullOrWhiteSpace(text))
            {
                string problemField = rotationIfAny != 0 ? "PixelData" + rotationIfAny : "PixelData";

                if (text.Length < _ignoreTextLessThan)
                {
                    _logger.Debug($"Ignoring pixel data discovery in {fi.Name} of length {text.Length} because it is below the threshold {_ignoreTextLessThan}");
                }
                else
                {
                    var f = factory.Create(fi, dicomFile, text, problemField, new[] { new FailurePart(text, FailureClassification.PixelText) });

                    AddToReports(f);

                    _tesseractReport.FoundPixelData(fi, sopID, pixelFormat, processedPixelFormat, studyID, seriesID, modality, imageType, meanConfidence, text.Length, text, rotationIfAny);
                }
            }
        }
Пример #26
0
        public static string ReadFromScreenRect(int x, int y, int width, int height)
        {
            string text   = "";
            Bitmap bitmap = Image.Capture(x, y, width, height);

            try
            {
                using (var engine = new TesseractEngine("./tessdata", "eng"))
                {
                    using (var image = Pix.LoadFromMemory(ImageToByte(bitmap)))
                    {
                        using (var page = engine.Process(image))
                        {
                            text = page.GetText();
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
            return(text);
        }
Пример #27
0
        private static unsafe Pix LoadBitmapToPix(Bitmap inBitmap)
        {
            Pix    pix    = null;
            string uniqId = Guid.NewGuid().ToString("D");

            // Clone the bitmap from whatever format it's in now to a 24bit bitmap
            using (var bitmap = inBitmap.Clone(new Rectangle(0, 0, inBitmap.Width, inBitmap.Height), PixelFormat.Format24bppRgb))
            {
                using (MemoryStream stream = new MemoryStream())
                {
                    bitmap.Save(stream, System.Drawing.Imaging.ImageFormat.Png);
                    stream.Position = 0;

                    // Write out the result for later processing
                    using (FileStream file = File.OpenWrite($"ProcessedImages/{uniqId}_Original.png"))
                    {
                        stream.CopyTo(file);
                        stream.Position = 0;
                    }
                }

                BitmapData bmpData = bitmap.LockBits(new Rectangle(0, 0, bitmap.Width, bitmap.Height), ImageLockMode.ReadWrite, bitmap.PixelFormat);
                byte       bpp     = 24;
                byte *     scan0   = (byte *)bmpData.Scan0.ToPointer();

                int[] scales = new int[255];
                for (int i = 0; i < 255; i++)
                {
                    scales[i] = 0;
                }

                for (int row = 0; row < bmpData.Height; row++)
                {
                    for (int col = 0; col < bmpData.Width; col++)
                    {
                        byte *data  = scan0 + row * bmpData.Stride + col * bpp / 8;
                        byte  blue  = data[0];
                        byte  green = data[1];
                        byte  red   = data[2];

                        // Calculate grayscale and invert
                        byte avg = (byte)(0.2989 * red + 0.5870 * green + 0.1140 * blue);
                        blue = green = red = (byte)(255 - avg);

                        // Generate a curve for finding the best spot to threshold
                        scales[avg]++;

                        data[0] = blue;
                        data[1] = green;
                        data[2] = red;
                    }
                }

                int  max      = 0;
                byte maxColor = 0;
                for (byte i = 0; i < 255; i++)
                {
                    if (scales[i] > max)
                    {
                        max      = scales[i];
                        maxColor = i;
                    }
                }

                // Take 100 off the color, and use that
                byte threshold = (byte)(255 - maxColor - 130);

                // Apply the threshold to all the bits
                for (int row = 0; row < bmpData.Height; row++)
                {
                    for (int col = 0; col < bmpData.Width; col++)
                    {
                        byte *data  = scan0 + row * bmpData.Stride + col * bpp / 8;
                        byte  blue  = data[0];
                        byte  green = data[1];
                        byte  red   = data[2];

                        // Calculate grayscale and invert
                        byte pixel = (byte)(red > threshold ? 255 : 0);
                        blue = green = red = pixel;

                        data[0] = blue;
                        data[1] = green;
                        data[2] = red;
                    }
                }

                bitmap.UnlockBits(bmpData);

                // Put the bitmap into a memory stream for Pix to load
                using (MemoryStream stream = new MemoryStream())
                {
                    bitmap.Save(stream, System.Drawing.Imaging.ImageFormat.Png);
                    stream.Position = 0;

                    // Write out the result for later processing
                    using (FileStream file = File.OpenWrite($"ProcessedImages/{uniqId}_Threshold.png"))
                    {
                        stream.CopyTo(file);
                        stream.Position = 0;
                    }

                    pix = Pix.LoadFromMemory(stream.ToArray());
                }
            }

            return(pix);
        }
Пример #28
0
 public static Pix ConvertBitmapToPix(Bitmap img)
 {
     return(Pix.LoadFromMemory(ImageToByteArray(img)));
 }
Пример #29
0
        private async Task AnalyseUsingTesseract(StorageFile imageFile)
        {
            try
            {
                byte[] result;
                using (Stream stream = await imageFile.OpenStreamForReadAsync())
                {
                    using (var memoryStream = new MemoryStream())
                    {
                        stream.CopyTo(memoryStream);
                        result = memoryStream.ToArray();
                    }
                }

                string language = "eng";
                if (LanguageSelection.SelectedIndex == 1)
                {
                    language = "chi_sim";
                }
                if (LanguageSelection.SelectedIndex == 2)
                {
                    language = "kor";
                }

                using (var engine = new TesseractEngine(@"./TrainedData", language, EngineMode.Default))
                {
                    using (var img = Pix.LoadFromMemory(result))
                    {
                        using (var page = engine.Process(img))
                        {
                            string text = "=======================\r\nPLAIN TEXT\r\n=======================\r\n";

                            text += page.GetText();

                            OutputTextTitle.Text = string.Format("Output Text (Mean Confidence: {0})", page.GetMeanConfidence());

                            var recognisedTexts = new List <RecognisedText>();

                            var pgLevel = PageIteratorLevel.Word;

                            using (var iter = page.GetIterator())
                            {
                                do
                                {
                                    if (iter.TryGetBoundingBox(pgLevel, out Rect boundary))
                                    {
                                        var reading = iter.GetText(pgLevel);

                                        recognisedTexts.Add(new RecognisedText
                                        {
                                            Text     = reading,
                                            Boundary = boundary
                                        });
                                    }
                                } while (iter.Next(pgLevel));
                            }

                            text += "=======================\r\nJSON REPRESENTATION\r\n=======================\r\n";

                            text += JsonSerializer.Serialize(recognisedTexts);

                            OutputText.Document.SetText(TextSetOptions.FormatRtf, text);
                        }
                    }
                }
            }
            catch (System.Reflection.TargetInvocationException ex)
            {
                await AnalyseUsingAzureComputerVisionAsync(imageFile);
            }
            catch (Exception ex)
            {
                await ShowExceptionMessageAsync(ex);
            }
        }