/// <summary>
        /// pulls marked from param inputFilePath image
        /// </summary>
        /// <param name="fileBytes">Byte array of file data</param>
        /// <returns>encodes text</returns>
        public static string getText(byte[] fileBytes)
        {
            string text = "", rootPath = HostingEnvironment.ApplicationPhysicalPath;
            BitmapToPixConverter b = new BitmapToPixConverter();

            using (Stream memStream = new MemoryStream(fileBytes))
            using (Bitmap image = (Bitmap)Image.FromStream(memStream))
            using (TesseractEngine ocr = new TesseractEngine(rootPath, "eng", EngineMode.TesseractOnly))
            {

                image.SetResolution(300, 300);
                ocr.SetVariable("tessedit_char_whitelist", "\\/|#");
                Pix p = b.Convert(image);
                p = p.ConvertRGBToGray();
                Page page = ocr.Process(p, PageSegMode.Auto);
                text = page.GetText();
                p.Dispose();
                page.Dispose();
            }

            return text;
        }
        private void OCRButton_Click(object sender, RoutedEventArgs e)
        {
            Bitmap commodityData = cutImage.ToBitmap();

            bool processing = true;
            int nonBlankRow = 0;
            int blankRow = 0;
            Window window;
            while (processing)
            {
                nonBlankRow = findNonBlankRow(commodityData, blankRow + 1);
                blankRow = findBlankRow(commodityData, nonBlankRow + 1);

                if (nonBlankRow == -1 || blankRow == -1)
                {
                    processing = false;
                    break;
                }

                int heightOfRow = blankRow - nonBlankRow + 10;

                Bitmap newBitmap = new Bitmap(commodityData.Width, heightOfRow);
                using (Graphics g = Graphics.FromImage(newBitmap))
                {
                    g.DrawImage(commodityData, 0, -(nonBlankRow) + 5);
                }

                BitmapSource bmpSource = Utils.BitmapToBitmapSource(newBitmap);
                DisplayImage.Source = bmpSource;

                //window = new Window();
                //StackPanel panel = new StackPanel();

                //window.Content = panel;
                //window.Width = newBitmap.Width;
                //window.Height = newBitmap.Height;
                //System.Windows.Controls.Image image = new System.Windows.Controls.Image();
                //image.Source = bmpSource;
                //panel.Children.Add(image);
                //window.ShowDialog();

                TesseractEngine tesseract = new TesseractEngine("tessdata", "big", EngineMode.Default);
                var conv = new BitmapToPixConverter();
                int counter = 0;
                var p = conv.Convert(newBitmap);
                Tesseract.Page page = tesseract.Process(p, PageSegMode.SingleLine);

                using (var iter = page.GetIterator())
                {
                    do
                    {
                        MessageBox.Show("Text: " + iter.GetText(PageIteratorLevel.Block) + " Confidence: " + iter.GetConfidence(PageIteratorLevel.Block));
                        counter++;
                        if(counter > 10)
                        {
                            break;
                        }
                    } while (iter.Next(PageIteratorLevel.Block));
                }

                page.Dispose();
                newBitmap.Dispose();
                tesseract.Dispose();

                Thread.Sleep(1000);

            }
            //int nonBlankRow = findNonBlankRow(commodityData, 0);
            //int blankRow = findBlankRow(commodityData, nonBlankRow+1);

            //MessageBox.Show("Non-white: " + nonBlankRow + " White: " + blankRow);

            //nonBlankRow = findNonBlankRow(commodityData, blankRow);
            //blankRow = findBlankRow(commodityData, nonBlankRow + 1);

            //MessageBox.Show("Non-white: " + nonBlankRow + " White: " + blankRow);

            //using (tesseract)
            //{
            //    Tesseract.Page ocrResult = tesseract.Process(cutImage.ToBitmap());
            //    StringBuilder builder = new StringBuilder();

            //    using (var iter = ocrResult.GetIterator())
            //    {
            //        int i = 1;
            //        do
            //        {
            //            var text = iter.GetText(PageIteratorLevel.TextLine);
            //            builder.Append("Line ").Append(i).Append(": ").Append(text).Append(" Confidence: ")
            //                .Append(iter.GetConfidence(PageIteratorLevel.TextLine)).AppendLine();
            //            i++;
            //        } while (iter.Next(PageIteratorLevel.TextLine));
            //    }
            //    MessageBox.Show("Text: '" + builder.ToString());
            //}
        }
Пример #3
0
        private static string AnalyseFrameUsingTesseract(Bitmap c1, TesseractEngine engine, out float cf1)
        {
            var conv = new BitmapToPixConverter();
            var p = conv.Convert(c1);
            string t1;
            using (var page = engine.Process(p))
            {
                t1 = page.GetText().Replace("\n\n", "").Replace(".", "").Replace(",", "");
                cf1 = page.GetMeanConfidence();
            }

            return t1;
        }