public List <Rectangle> GetTextRects(Bitmap currentImage) { string tessPath = System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location) + "\\tessdata"; TesseractEngine tess = new TesseractEngine(tessPath, "eng"); Page newPage = tess.Process(currentImage, PageSegMode.AutoOsd); ResultIterator iterator = newPage.GetIterator(); string totalText = newPage.GetText(); List <Rectangle> currentRects = new List <Rectangle>(); for (int i = 0; i < totalText.Length; i++) { Rect foundRect = new Rect(); string symbolText = iterator.GetText(PageIteratorLevel.Symbol); bool hasText = symbolText != "" && symbolText != null; bool onlyLetters = false; if (hasText) { onlyLetters = symbolText.ToCharArray().All(s => char.IsLetter(s)); } bool gotBoundingBox = iterator.TryGetBoundingBox(PageIteratorLevel.Symbol, out foundRect); if (hasText && onlyLetters && gotBoundingBox) { currentRects.Add(new Rectangle(foundRect.X1, foundRect.Y1, foundRect.X2 - foundRect.X1, foundRect.Y2 - foundRect.Y1)); } iterator.Next(PageIteratorLevel.Symbol); } tess.Dispose(); iterator.Dispose(); return(currentRects); }
private int RunTesseract(Mat img) { int conf = 0; using (TessBaseAPI engine = new TessBaseAPI(@".\tessdata", _lang, OcrEngineMode.TESSERACT_LSTM_COMBINED)) { engine.InitForAnalysePage(); engine.Init(null, _lang); engine.SetImage(new UIntPtr(BitConverter.ToUInt64(BitConverter.GetBytes(img.Data.ToInt64()), 0)), img.Size().Width, img.Size().Height, img.Channels(), (int)img.Step1()); engine.Recognize(); ResultIterator iterator = engine.GetIterator(); IterateFullPage(iterator, ref _textLines); iterator.Dispose(); conf = engine.MeanTextConf; } return(conf); }
private void GetPageData(TesseractEngine engine, Pix pageData, string language, ATAPY.Document.Data.Core.Page page) { ResultIterator resultIterator = null; try { using (var tessPage = engine.Process(pageData)) { tessPage.Recognize(); resultIterator = tessPage.GetIterator(); resultIterator.Begin(); do { var text = resultIterator.GetText(PageIteratorLevel.Word); if (TextIsValid(text) && resultIterator.TryGetBoundingBox(PageIteratorLevel.Word, out var rect)) { var rectW = GetRect(rect); var area = new TextArea(rectW, text, page); page.TextAreas.Add(area); var chars = new System.Windows.Rect[text.Length]; int charIter = 0; do { if (resultIterator.TryGetBoundingBox(PageIteratorLevel.Symbol, out var sRect)) { chars[charIter] = GetRect(sRect); } charIter++; } while (resultIterator.Next(PageIteratorLevel.Word, PageIteratorLevel.Symbol)); area.SetCharProperties(chars); } } while (resultIterator.Next(PageIteratorLevel.Word)); } } finally { resultIterator?.Dispose(); } //return page; }
/* * int LookingTextMarker(RecognazeRule Rule, Page page, out ResultIterator BestLineIter, out int word) * { * word = -1; * BestLineIter = null; * int BestDistance = 10000; * * ResultIterator LineIter = page.GetIterator(); * string[] Words = Rule.TextMarker.Split(new char[] {' '}, StringSplitOptions.RemoveEmptyEntries); * int NumberOfWords = Words.Length; * LineIter.Begin(); * do * { * int CurrentWordNumber = -1; * int CurrentBestDistance = 10000; * string Line = LineIter.GetText(PageIteratorLevel.TextLine); * if(Line == null) * continue; * string[] WordsOfLine = Line.Split(new char[] {' '}, StringSplitOptions.None); * if(WordsOfLine.Length < NumberOfWords) * continue; * * for(int shift = 0; shift <= WordsOfLine.Length - NumberOfWords; shift++) * { * int PassDistance = 0; * for(int i = 0; i < NumberOfWords; i++) * { * PassDistance += FuzzyStringComparer.GetDistanceLevenshtein(WordsOfLine[shift + i], * Words[i], * StringComparison.CurrentCultureIgnoreCase); * } * if(PassDistance < CurrentBestDistance) * { * CurrentBestDistance = PassDistance; * CurrentWordNumber = shift + 1; * } * } * if(CurrentBestDistance < BestDistance) * { * AddToLog ("new best"); * AddToLog (LineIter.GetText(PageIteratorLevel.Word)); * word = CurrentWordNumber; * if(BestLineIter != null) * BestLineIter.Dispose(); * BestLineIter = LineIter.Clone(); * AddToLog (BestLineIter.GetText(PageIteratorLevel.TextLine)); * BestDistance = CurrentBestDistance; * } * } while( LineIter.Next(PageIteratorLevel.TextLine)); * LineIter.Dispose(); * return BestDistance; * } */ int GetTextPosition(string Text, Page page, out int PosX, out int PosY, out double AngleRad, RecognazeRule[] AfterMarkerRules) { int BestDistance = 10000; PosX = -1; PosY = -1; AngleRad = 0; logger.Debug("Marker zone text:{0}", page.GetText()); ResultIterator LineIter = page.GetIterator(); string[] Words = Text.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); int NumberOfWords = Words.Length; LineIter.Begin(); do { int CurrentWordNumber = -1; int CurrentAfterWord = 0; int CurrentBestDistance = 10000; string Line = LineIter.GetText(PageIteratorLevel.TextLine); if (Line == null || Line == "") { continue; } Line = Line.Trim(); string[] WordsOfLine = Line.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); if (WordsOfLine.Length == 0) { continue; } for (int shift = 0; shift < WordsOfLine.Length; shift++) { for (int i = 1; i <= NumberOfWords && i <= WordsOfLine.Length - shift; i++) { string passString = String.Join(" ", WordsOfLine, shift, i); int PassDistance = FuzzyStringComparer.GetDistanceLevenshtein(passString, Text, StringComparison.CurrentCultureIgnoreCase); if (PassDistance < CurrentBestDistance) { CurrentBestDistance = PassDistance; CurrentWordNumber = shift; CurrentAfterWord = shift + i; } } } if (CurrentBestDistance < BestDistance) { logger.Debug("new best"); logger.Debug(LineIter.GetText(PageIteratorLevel.TextLine).Trim()); //Заполняем поля данными после маркера. foreach (RecognazeRule rule in AfterMarkerRules) { if (rule.NextAfterTextMarker && WordsOfLine.Length > CurrentAfterWord + rule.ShiftWordsCount) { rule.AfterTextMarkerValue = WordsOfLine[CurrentAfterWord + rule.ShiftWordsCount]; } } BestDistance = CurrentBestDistance; for (int i = 0; i < CurrentWordNumber; i++) { LineIter.Next(PageIteratorLevel.Word); } Rect Box; LineIter.TryGetBoundingBox(PageIteratorLevel.Word, out Box); PosX = Box.X1; PosY = Box.Y1; logger.Debug("Position X1:{0} Y1:{1} X2:{2} Y2:{3}", Box.X1, Box.Y1, Box.X2, Box.Y2); LineIter.TryGetBaseline(PageIteratorLevel.Word, out Box); logger.Debug("BaseLine X1:{0} Y1:{1} X2:{2} Y2:{3}", Box.X1, Box.Y1, Box.X2, Box.Y2); AngleRad = Math.Atan2(Box.Y2 - Box.Y1, Box.X2 - Box.X1); //угл наклона базовой линии. double AngleGrad = AngleRad * (180 / Math.PI); logger.Debug("Angle rad:{0} grad:{1}", AngleRad, AngleGrad); //Получаем уровень распознования полей в маркере. int iterAlreadyShifted = CurrentWordNumber - CurrentAfterWord; bool stopIteration = false; foreach (RecognazeRule rule in AfterMarkerRules.Where(x => x.NextAfterTextMarker).OrderBy(x => x.ShiftWordsCount)) { while (iterAlreadyShifted < rule.ShiftWordsCount) { if (LineIter.IsAtFinalOf(PageIteratorLevel.TextLine, PageIteratorLevel.Word)) { stopIteration = true; break; } LineIter.Next(PageIteratorLevel.Word); iterAlreadyShifted++; } if (stopIteration) { break; } rule.AfterTextMarkerConfidence = LineIter.GetConfidence(PageIteratorLevel.Word); logger.Debug("Cлово {0} со сдвигом {1} имеет точность {2}.", LineIter.GetText(PageIteratorLevel.Word), rule.ShiftWordsCount, rule.AfterTextMarkerConfidence); } } }while(LineIter.Next(PageIteratorLevel.TextLine)); LineIter.Dispose(); return(BestDistance); }