/// <summary> /// pulls marked from param inputFilePath image /// </summary> /// <param name="fileBytes">Byte array of file data</param> /// <returns>encodes text</returns> public static string getText(byte[] fileBytes) { string text = "", rootPath = HostingEnvironment.ApplicationPhysicalPath; BitmapToPixConverter b = new BitmapToPixConverter(); using (Stream memStream = new MemoryStream(fileBytes)) using (Bitmap image = (Bitmap)Image.FromStream(memStream)) using (TesseractEngine ocr = new TesseractEngine(rootPath, "eng", EngineMode.TesseractOnly)) { image.SetResolution(300, 300); ocr.SetVariable("tessedit_char_whitelist", "\\/|#"); Pix p = b.Convert(image); p = p.ConvertRGBToGray(); Page page = ocr.Process(p, PageSegMode.Auto); text = page.GetText(); p.Dispose(); page.Dispose(); } return text; }
private void OCRButton_Click(object sender, RoutedEventArgs e) { Bitmap commodityData = cutImage.ToBitmap(); bool processing = true; int nonBlankRow = 0; int blankRow = 0; Window window; while (processing) { nonBlankRow = findNonBlankRow(commodityData, blankRow + 1); blankRow = findBlankRow(commodityData, nonBlankRow + 1); if (nonBlankRow == -1 || blankRow == -1) { processing = false; break; } int heightOfRow = blankRow - nonBlankRow + 10; Bitmap newBitmap = new Bitmap(commodityData.Width, heightOfRow); using (Graphics g = Graphics.FromImage(newBitmap)) { g.DrawImage(commodityData, 0, -(nonBlankRow) + 5); } BitmapSource bmpSource = Utils.BitmapToBitmapSource(newBitmap); DisplayImage.Source = bmpSource; //window = new Window(); //StackPanel panel = new StackPanel(); //window.Content = panel; //window.Width = newBitmap.Width; //window.Height = newBitmap.Height; //System.Windows.Controls.Image image = new System.Windows.Controls.Image(); //image.Source = bmpSource; //panel.Children.Add(image); //window.ShowDialog(); TesseractEngine tesseract = new TesseractEngine("tessdata", "big", EngineMode.Default); var conv = new BitmapToPixConverter(); int counter = 0; var p = conv.Convert(newBitmap); Tesseract.Page page = tesseract.Process(p, PageSegMode.SingleLine); using (var iter = page.GetIterator()) { do { MessageBox.Show("Text: " + iter.GetText(PageIteratorLevel.Block) + " Confidence: " + iter.GetConfidence(PageIteratorLevel.Block)); counter++; if(counter > 10) { break; } } while (iter.Next(PageIteratorLevel.Block)); } page.Dispose(); newBitmap.Dispose(); tesseract.Dispose(); Thread.Sleep(1000); } //int nonBlankRow = findNonBlankRow(commodityData, 0); //int blankRow = findBlankRow(commodityData, nonBlankRow+1); //MessageBox.Show("Non-white: " + nonBlankRow + " White: " + blankRow); //nonBlankRow = findNonBlankRow(commodityData, blankRow); //blankRow = findBlankRow(commodityData, nonBlankRow + 1); //MessageBox.Show("Non-white: " + nonBlankRow + " White: " + blankRow); //using (tesseract) //{ // Tesseract.Page ocrResult = tesseract.Process(cutImage.ToBitmap()); // StringBuilder builder = new StringBuilder(); // using (var iter = ocrResult.GetIterator()) // { // int i = 1; // do // { // var text = iter.GetText(PageIteratorLevel.TextLine); // builder.Append("Line ").Append(i).Append(": ").Append(text).Append(" Confidence: ") // .Append(iter.GetConfidence(PageIteratorLevel.TextLine)).AppendLine(); // i++; // } while (iter.Next(PageIteratorLevel.TextLine)); // } // MessageBox.Show("Text: '" + builder.ToString()); //} }
private static string AnalyseFrameUsingTesseract(Bitmap c1, TesseractEngine engine, out float cf1) { var conv = new BitmapToPixConverter(); var p = conv.Convert(c1); string t1; using (var page = engine.Process(p)) { t1 = page.GetText().Replace("\n\n", "").Replace(".", "").Replace(",", ""); cf1 = page.GetMeanConfidence(); } return t1; }