public bool FindWord(string searchWord, out Point position) { bool result = false; float minDistance = float.MaxValue; Point center = new Point(bitmap.Width / 2, bitmap.Height / 2); position = new Point(0, 0); debugText += page.GetText(); var iterator = page.GetIterator(); iterator.Begin(); do { string word = iterator.GetText(PageIteratorLevel.Word); if (word != null) { debugText += "\r\nWord: '" + word + "'"; if (searchWord.Length > 0 && word.ToUpper().Contains(searchWord.ToUpper())) { debugText += "\r\n*** Word match!"; Rect rect; if (iterator.TryGetBoundingBox(PageIteratorLevel.Word, out rect)) { Point wordCenter = new Point((rect.X1 + rect.X2) / 2, (rect.Y1 + rect.Y2) / 2); int dx = wordCenter.X - center.X; int dy = wordCenter.Y - center.Y; float distance = (float)Math.Sqrt(dx * dx + dy * dy); if (distance <= minDistance) { debugText += "\r\n*** Closest!"; position = wordCenter; result = true; minDistance = distance; } } else { debugText += "\r\n*** Error: Failed to get bounding box!"; } } } } while (iterator.Next(PageIteratorLevel.Word)); return(result); }
List <CharBox> getCharBoxs(Tesseract.Page page) { List <CharBox> cbs = new List <CharBox>(); //string t = page.GetHOCRText(1, true); //var dfg = page.GetThresholdedImage(); //Tesseract.Orientation o; //float c; // page.DetectBestOrientation(out o, out c); // var l = page.AnalyseLayout(); //var ti = l.GetBinaryImage(Tesseract.PageIteratorLevel.Para); //Tesseract.Rect r; // l.TryGetBoundingBox(Tesseract.PageIteratorLevel.Block, out r); using (var i = page.GetIterator()) { //int j = 0; //i.Begin(); //do //{ // bool g = i.IsAtBeginningOf(Tesseract.PageIteratorLevel.Block); // bool v = i.TryGetBoundingBox(Tesseract.PageIteratorLevel.Block, out r); // var bt = i.BlockType; // //if (Regex.IsMatch(bt.ToString(), @"image", RegexOptions.IgnoreCase)) // //{ // // //i.TryGetBoundingBox(Tesseract.PageIteratorLevel.Block,out r); // // Tesseract.Pix p = i.GetBinaryImage(Tesseract.PageIteratorLevel.Block); // // Bitmap b = Tesseract.PixConverter.ToBitmap(p); // // b.Save(Log.AppDir + "\\test" + (j++) + ".png", System.Drawing.Imaging.ImageFormat.Png); // //} //} while (i.Next(Tesseract.PageIteratorLevel.Block)); //do //{ // do // { // do // { // do // { do { //if (i.IsAtBeginningOf(PageIteratorLevel.Block)) //{ //} //if (i.IsAtBeginningOf(PageIteratorLevel.Para)) //{ //} //if (i.IsAtBeginningOf(PageIteratorLevel.TextLine)) //{ //} Rect r; if (i.TryGetBoundingBox(PageIteratorLevel.Symbol, out r)) { //if (i.IsAtBeginningOf(PageIteratorLevel.Word)) //{ //if (i.IsAtBeginningOf(PageIteratorLevel.Para)) //{ // cbs.Add(new CharBox // { // Char = "\r\n", // AutoInserted = true, // R = new RectangleF(r.X1 * Settings.Constants.Pdf2ImageResolutionRatio - Settings.Constants.CoordinateDeviationMargin * 2, r.Y1 * Settings.Constants.Pdf2ImageResolutionRatio, r.Width * Settings.Constants.Pdf2ImageResolutionRatio, r.Height * Settings.Constants.Pdf2ImageResolutionRatio) // }); //}//seems to work not well //cbs.Add(new CharBox//worked well before autoinsert was moved //{ // Char = " ", // AutoInserted = true, // R = new RectangleF(r.X1 * Settings.Constants.Pdf2ImageResolutionRatio - Settings.Constants.CoordinateDeviationMargin * 2, r.Y1 * Settings.Constants.Pdf2ImageResolutionRatio, r.Width * Settings.Constants.Pdf2ImageResolutionRatio, r.Height * Settings.Constants.Pdf2ImageResolutionRatio) //}); //} cbs.Add(new CharBox { Char = i.GetText(PageIteratorLevel.Symbol), R = new RectangleF(r.X1 * Settings.Constants.Pdf2ImageResolutionRatio, r.Y1 * Settings.Constants.Pdf2ImageResolutionRatio, r.Width * Settings.Constants.Pdf2ImageResolutionRatio, r.Height * Settings.Constants.Pdf2ImageResolutionRatio) }); } } while (i.Next(PageIteratorLevel.Symbol)); // } while (i.Next(PageIteratorLevel.Word, PageIteratorLevel.Symbol)); // } while (i.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word)); // } while (i.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine)); //} while (i.Next(PageIteratorLevel.Block, PageIteratorLevel.Para)); } return(cbs); }
/* int LookingTextMarker(RecognazeRule Rule, Page page, out ResultIterator BestLineIter, out int word) { word = -1; BestLineIter = null; int BestDistance = 10000; ResultIterator LineIter = page.GetIterator(); string[] Words = Rule.TextMarker.Split(new char[] {' '}, StringSplitOptions.RemoveEmptyEntries); int NumberOfWords = Words.Length; LineIter.Begin(); do { int CurrentWordNumber = -1; int CurrentBestDistance = 10000; string Line = LineIter.GetText(PageIteratorLevel.TextLine); if(Line == null) continue; string[] WordsOfLine = Line.Split(new char[] {' '}, StringSplitOptions.None); if(WordsOfLine.Length < NumberOfWords) continue; for(int shift = 0; shift <= WordsOfLine.Length - NumberOfWords; shift++) { int PassDistance = 0; for(int i = 0; i < NumberOfWords; i++) { PassDistance += FuzzyStringComparer.GetDistanceLevenshtein(WordsOfLine[shift + i], Words[i], StringComparison.CurrentCultureIgnoreCase); } if(PassDistance < CurrentBestDistance) { CurrentBestDistance = PassDistance; CurrentWordNumber = shift + 1; } } if(CurrentBestDistance < BestDistance) { AddToLog ("new best"); AddToLog (LineIter.GetText(PageIteratorLevel.Word)); word = CurrentWordNumber; if(BestLineIter != null) BestLineIter.Dispose(); BestLineIter = LineIter.Clone(); AddToLog (BestLineIter.GetText(PageIteratorLevel.TextLine)); BestDistance = CurrentBestDistance; } } while( LineIter.Next(PageIteratorLevel.TextLine)); LineIter.Dispose(); return BestDistance; } */ int GetTextPosition(string Text, Page page, out int PosX, out int PosY, out double AngleRad, RecognazeRule[] AfterMarkerRules) { int BestDistance = 10000; PosX = -1; PosY = -1; AngleRad = 0; logger.Debug("Marker zone text:{0}", page.GetText()); ResultIterator LineIter = page.GetIterator(); string[] Words = Text.Split(new char[] {' '}, StringSplitOptions.RemoveEmptyEntries); int NumberOfWords = Words.Length; LineIter.Begin(); do { int CurrentWordNumber = -1; int CurrentAfterWord = 0; int CurrentBestDistance = 10000; string Line = LineIter.GetText(PageIteratorLevel.TextLine); if(Line == null || Line == "") continue; Line = Line.Trim(); string[] WordsOfLine = Line.Split(new char[] {' '}, StringSplitOptions.RemoveEmptyEntries); if(WordsOfLine.Length == 0) continue; for(int shift = 0; shift < WordsOfLine.Length; shift++) { for(int i = 1; i <= NumberOfWords && i <= WordsOfLine.Length - shift; i++) { string passString = String.Join(" ", WordsOfLine, shift, i); int PassDistance = FuzzyStringComparer.GetDistanceLevenshtein(passString, Text, StringComparison.CurrentCultureIgnoreCase); if(PassDistance < CurrentBestDistance) { CurrentBestDistance = PassDistance; CurrentWordNumber = shift; CurrentAfterWord = shift + i; } } } if(CurrentBestDistance < BestDistance) { logger.Debug("new best"); logger.Debug(LineIter.GetText(PageIteratorLevel.TextLine).Trim()); //Заполняем поля данными после маркера. foreach(RecognazeRule rule in AfterMarkerRules) { if(rule.NextAfterTextMarker && WordsOfLine.Length > CurrentAfterWord + rule.ShiftWordsCount) { rule.AfterTextMarkerValue = WordsOfLine[CurrentAfterWord + rule.ShiftWordsCount]; } } BestDistance = CurrentBestDistance; for(int i = 0; i < CurrentWordNumber; i++) { LineIter.Next(PageIteratorLevel.Word); } Rect Box; LineIter.TryGetBoundingBox(PageIteratorLevel.Word, out Box); PosX = Box.X1; PosY = Box.Y1; logger.Debug("Position X1:{0} Y1:{1} X2:{2} Y2:{3}", Box.X1, Box.Y1, Box.X2, Box.Y2); LineIter.TryGetBaseline(PageIteratorLevel.Word, out Box); logger.Debug("BaseLine X1:{0} Y1:{1} X2:{2} Y2:{3}", Box.X1, Box.Y1, Box.X2, Box.Y2); AngleRad = Math.Atan2(Box.Y2 - Box.Y1, Box.X2 - Box.X1); //угл наклона базовой линии. double AngleGrad = AngleRad * (180/Math.PI); logger.Debug("Angle rad:{0} grad:{1}", AngleRad, AngleGrad); } } while( LineIter.Next(PageIteratorLevel.TextLine)); LineIter.Dispose(); return BestDistance; }
/* int LookingTextMarker(RecognazeRule Rule, Page page, out ResultIterator BestLineIter, out int word) { word = -1; BestLineIter = null; int BestDistance = 10000; ResultIterator LineIter = page.GetIterator(); string[] Words = Rule.TextMarker.Split(new char[] {' '}, StringSplitOptions.RemoveEmptyEntries); int NumberOfWords = Words.Length; LineIter.Begin(); do { int CurrentWordNumber = -1; int CurrentBestDistance = 10000; string Line = LineIter.GetText(PageIteratorLevel.TextLine); if(Line == null) continue; string[] WordsOfLine = Line.Split(new char[] {' '}, StringSplitOptions.None); if(WordsOfLine.Length < NumberOfWords) continue; for(int shift = 0; shift <= WordsOfLine.Length - NumberOfWords; shift++) { int PassDistance = 0; for(int i = 0; i < NumberOfWords; i++) { PassDistance += FuzzyStringComparer.GetDistanceLevenshtein(WordsOfLine[shift + i], Words[i], StringComparison.CurrentCultureIgnoreCase); } if(PassDistance < CurrentBestDistance) { CurrentBestDistance = PassDistance; CurrentWordNumber = shift + 1; } } if(CurrentBestDistance < BestDistance) { AddToLog ("new best"); AddToLog (LineIter.GetText(PageIteratorLevel.Word)); word = CurrentWordNumber; if(BestLineIter != null) BestLineIter.Dispose(); BestLineIter = LineIter.Clone(); AddToLog (BestLineIter.GetText(PageIteratorLevel.TextLine)); BestDistance = CurrentBestDistance; } } while( LineIter.Next(PageIteratorLevel.TextLine)); LineIter.Dispose(); return BestDistance; } */ int GetTextPosition(string Text, Page page, out int PosX, out int PosY) { int BestDistance = 10000; PosX = -1; PosY = -1; ResultIterator LineIter = page.GetIterator(); string[] Words = Text.Split(new char[] {' '}, StringSplitOptions.RemoveEmptyEntries); int NumberOfWords = Words.Length; LineIter.Begin(); do { int CurrentWordNumber = -1; int CurrentBestDistance = 10000; string Line = LineIter.GetText(PageIteratorLevel.TextLine); if(Line == null) continue; Line = Line.Trim(); string[] WordsOfLine = Line.Split(new char[] {' '}, StringSplitOptions.None); if(WordsOfLine.Length < NumberOfWords) continue; for(int shift = 0; shift <= WordsOfLine.Length - NumberOfWords; shift++) { int PassDistance = 0; for(int i = 0; i < NumberOfWords; i++) { PassDistance += FuzzyStringComparer.GetDistanceLevenshtein(WordsOfLine[shift + i], Words[i], StringComparison.CurrentCultureIgnoreCase); } if(PassDistance < CurrentBestDistance) { CurrentBestDistance = PassDistance; CurrentWordNumber = shift; } } if(CurrentBestDistance < BestDistance) { AddToLog ("new best"); AddToLog (LineIter.GetText(PageIteratorLevel.TextLine).Trim()); BestDistance = CurrentBestDistance; for(int i = 0; i < CurrentWordNumber; i++) { LineIter.Next(PageIteratorLevel.Word); } Rect Box; LineIter.TryGetBoundingBox(PageIteratorLevel.Word, out Box); PosX = Box.X1; PosY = Box.Y1; } } while( LineIter.Next(PageIteratorLevel.TextLine)); LineIter.Dispose(); return BestDistance; }