public bool FindWord(string searchWord, out Point position)
        {
            bool  result      = false;
            float minDistance = float.MaxValue;
            Point center      = new Point(bitmap.Width / 2, bitmap.Height / 2);

            position = new Point(0, 0);

            debugText += page.GetText();

            var iterator = page.GetIterator();

            iterator.Begin();
            do
            {
                string word = iterator.GetText(PageIteratorLevel.Word);
                if (word != null)
                {
                    debugText += "\r\nWord: '" + word + "'";
                    if (searchWord.Length > 0 && word.ToUpper().Contains(searchWord.ToUpper()))
                    {
                        debugText += "\r\n*** Word match!";
                        Rect rect;
                        if (iterator.TryGetBoundingBox(PageIteratorLevel.Word, out rect))
                        {
                            Point wordCenter = new Point((rect.X1 + rect.X2) / 2, (rect.Y1 + rect.Y2) / 2);
                            int   dx         = wordCenter.X - center.X;
                            int   dy         = wordCenter.Y - center.Y;
                            float distance   = (float)Math.Sqrt(dx * dx + dy * dy);
                            if (distance <= minDistance)
                            {
                                debugText  += "\r\n*** Closest!";
                                position    = wordCenter;
                                result      = true;
                                minDistance = distance;
                            }
                        }
                        else
                        {
                            debugText += "\r\n*** Error: Failed to get bounding box!";
                        }
                    }
                }
            } while (iterator.Next(PageIteratorLevel.Word));

            return(result);
        }
Beispiel #2
0
        List <CharBox> getCharBoxs(Tesseract.Page page)
        {
            List <CharBox> cbs = new List <CharBox>();

            //string t = page.GetHOCRText(1, true);
            //var dfg = page.GetThresholdedImage();
            //Tesseract.Orientation o;
            //float c;
            // page.DetectBestOrientation(out o, out c);
            //  var l = page.AnalyseLayout();
            //var ti =   l.GetBinaryImage(Tesseract.PageIteratorLevel.Para);
            //Tesseract.Rect r;
            // l.TryGetBoundingBox(Tesseract.PageIteratorLevel.Block, out r);
            using (var i = page.GetIterator())
            {
                //int j = 0;
                //i.Begin();
                //do
                //{
                //    bool g = i.IsAtBeginningOf(Tesseract.PageIteratorLevel.Block);
                //    bool v = i.TryGetBoundingBox(Tesseract.PageIteratorLevel.Block, out r);
                //    var bt = i.BlockType;
                //    //if (Regex.IsMatch(bt.ToString(), @"image", RegexOptions.IgnoreCase))
                //    //{
                //    //    //i.TryGetBoundingBox(Tesseract.PageIteratorLevel.Block,out r);
                //    //    Tesseract.Pix p = i.GetBinaryImage(Tesseract.PageIteratorLevel.Block);
                //    //    Bitmap b = Tesseract.PixConverter.ToBitmap(p);
                //    //    b.Save(Log.AppDir + "\\test" + (j++) + ".png", System.Drawing.Imaging.ImageFormat.Png);
                //    //}
                //} while (i.Next(Tesseract.PageIteratorLevel.Block));
                //do
                //{
                //    do
                //    {
                //        do
                //        {
                //            do
                //        {
                do
                {
                    //if (i.IsAtBeginningOf(PageIteratorLevel.Block))
                    //{
                    //}
                    //if (i.IsAtBeginningOf(PageIteratorLevel.Para))
                    //{
                    //}
                    //if (i.IsAtBeginningOf(PageIteratorLevel.TextLine))
                    //{
                    //}

                    Rect r;
                    if (i.TryGetBoundingBox(PageIteratorLevel.Symbol, out r))
                    {
                        //if (i.IsAtBeginningOf(PageIteratorLevel.Word))
                        //{
                        //if (i.IsAtBeginningOf(PageIteratorLevel.Para))
                        //{
                        //    cbs.Add(new CharBox
                        //    {
                        //        Char = "\r\n",
                        //        AutoInserted = true,
                        //        R = new RectangleF(r.X1 * Settings.Constants.Pdf2ImageResolutionRatio - Settings.Constants.CoordinateDeviationMargin * 2, r.Y1 * Settings.Constants.Pdf2ImageResolutionRatio, r.Width * Settings.Constants.Pdf2ImageResolutionRatio, r.Height * Settings.Constants.Pdf2ImageResolutionRatio)
                        //    });
                        //}//seems to work not well

                        //cbs.Add(new CharBox//worked well before autoinsert was moved
                        //{
                        //    Char = " ",
                        //    AutoInserted = true,
                        //    R = new RectangleF(r.X1 * Settings.Constants.Pdf2ImageResolutionRatio - Settings.Constants.CoordinateDeviationMargin * 2, r.Y1 * Settings.Constants.Pdf2ImageResolutionRatio, r.Width * Settings.Constants.Pdf2ImageResolutionRatio, r.Height * Settings.Constants.Pdf2ImageResolutionRatio)
                        //});
                        //}
                        cbs.Add(new CharBox
                        {
                            Char = i.GetText(PageIteratorLevel.Symbol),
                            R    = new RectangleF(r.X1 * Settings.Constants.Pdf2ImageResolutionRatio, r.Y1 * Settings.Constants.Pdf2ImageResolutionRatio, r.Width * Settings.Constants.Pdf2ImageResolutionRatio, r.Height * Settings.Constants.Pdf2ImageResolutionRatio)
                        });
                    }
                } while (i.Next(PageIteratorLevel.Symbol));
                //            } while (i.Next(PageIteratorLevel.Word, PageIteratorLevel.Symbol));
                //        } while (i.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));
                //    } while (i.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine));
                //} while (i.Next(PageIteratorLevel.Block, PageIteratorLevel.Para));
            }
            return(cbs);
        }
        /*
        int LookingTextMarker(RecognazeRule Rule, Page page, out ResultIterator BestLineIter, out int word)
        {
            word = -1;
            BestLineIter = null;
            int BestDistance = 10000;

            ResultIterator LineIter = page.GetIterator();
            string[] Words = Rule.TextMarker.Split(new char[] {' '}, StringSplitOptions.RemoveEmptyEntries);
            int NumberOfWords = Words.Length;
            LineIter.Begin();
            do
            {
                int CurrentWordNumber = -1;
                int CurrentBestDistance = 10000;
                string Line = LineIter.GetText(PageIteratorLevel.TextLine);
                if(Line == null)
                    continue;
                string[] WordsOfLine = Line.Split(new char[] {' '}, StringSplitOptions.None);
                if(WordsOfLine.Length < NumberOfWords)
                    continue;

                for(int shift = 0; shift <= WordsOfLine.Length - NumberOfWords; shift++)
                {
                    int PassDistance = 0;
                    for(int i = 0; i < NumberOfWords; i++)
                    {
                        PassDistance += FuzzyStringComparer.GetDistanceLevenshtein(WordsOfLine[shift + i],
                                                                                      Words[i],
                                                                                      StringComparison.CurrentCultureIgnoreCase);
                    }
                    if(PassDistance < CurrentBestDistance)
                    {
                        CurrentBestDistance = PassDistance;
                        CurrentWordNumber = shift + 1;
                    }
                }
                if(CurrentBestDistance < BestDistance)
                {
                    AddToLog ("new best");
                    AddToLog (LineIter.GetText(PageIteratorLevel.Word));
                    word = CurrentWordNumber;
                    if(BestLineIter != null)
                        BestLineIter.Dispose();
                    BestLineIter = LineIter.Clone();
                    AddToLog (BestLineIter.GetText(PageIteratorLevel.TextLine));
                    BestDistance = CurrentBestDistance;
                }
            } while( LineIter.Next(PageIteratorLevel.TextLine));
            LineIter.Dispose();
            return BestDistance;
        } */
        int GetTextPosition(string Text, Page page, out int PosX, out int PosY, out double AngleRad, RecognazeRule[] AfterMarkerRules)
        {
            int BestDistance = 10000;
            PosX = -1;
            PosY = -1;
            AngleRad = 0;
            logger.Debug("Marker zone text:{0}", page.GetText());
            ResultIterator LineIter = page.GetIterator();
            string[] Words = Text.Split(new char[] {' '}, StringSplitOptions.RemoveEmptyEntries);
            int NumberOfWords = Words.Length;
            LineIter.Begin();
            do
            {
                int CurrentWordNumber = -1;
                int CurrentAfterWord = 0;
                int CurrentBestDistance = 10000;
                string Line = LineIter.GetText(PageIteratorLevel.TextLine);

                if(Line == null || Line == "")
                    continue;
                Line = Line.Trim();
                string[] WordsOfLine = Line.Split(new char[] {' '}, StringSplitOptions.RemoveEmptyEntries);

                if(WordsOfLine.Length == 0)
                    continue;
                for(int shift = 0; shift < WordsOfLine.Length; shift++)
                {
                    for(int i = 1; i <= NumberOfWords && i <= WordsOfLine.Length - shift; i++)
                    {
                        string passString = String.Join(" ", WordsOfLine, shift, i);

                        int PassDistance = FuzzyStringComparer.GetDistanceLevenshtein(passString,
                            Text,
                            StringComparison.CurrentCultureIgnoreCase);
                        if(PassDistance < CurrentBestDistance)
                        {
                            CurrentBestDistance = PassDistance;
                            CurrentWordNumber = shift;
                            CurrentAfterWord = shift + i;
                        }
                    }
                }
                if(CurrentBestDistance < BestDistance)
                {
                    logger.Debug("new best");
                    logger.Debug(LineIter.GetText(PageIteratorLevel.TextLine).Trim());
                    //Заполняем поля данными после маркера.
                    foreach(RecognazeRule rule in AfterMarkerRules)
                    {
                        if(rule.NextAfterTextMarker && WordsOfLine.Length > CurrentAfterWord + rule.ShiftWordsCount)
                        {
                            rule.AfterTextMarkerValue = WordsOfLine[CurrentAfterWord + rule.ShiftWordsCount];
                        }
                    }

                    BestDistance = CurrentBestDistance;
                    for(int i = 0; i < CurrentWordNumber; i++)
                    {
                        LineIter.Next(PageIteratorLevel.Word);
                    }
                    Rect Box;
                    LineIter.TryGetBoundingBox(PageIteratorLevel.Word, out Box);
                    PosX = Box.X1;
                    PosY = Box.Y1;
                    logger.Debug("Position X1:{0} Y1:{1} X2:{2} Y2:{3}", Box.X1, Box.Y1, Box.X2, Box.Y2);
                    LineIter.TryGetBaseline(PageIteratorLevel.Word, out Box);
                    logger.Debug("BaseLine X1:{0} Y1:{1} X2:{2} Y2:{3}", Box.X1, Box.Y1, Box.X2, Box.Y2);
                    AngleRad = Math.Atan2(Box.Y2 - Box.Y1, Box.X2 - Box.X1); //угл наклона базовой линии.
                    double AngleGrad = AngleRad * (180/Math.PI);
                    logger.Debug("Angle rad:{0} grad:{1}", AngleRad, AngleGrad);
                }

            } while( LineIter.Next(PageIteratorLevel.TextLine));
            LineIter.Dispose();
            return BestDistance;
        }
Beispiel #4
0
        /*
        int LookingTextMarker(RecognazeRule Rule, Page page, out ResultIterator BestLineIter, out int word)
        {
            word = -1;
            BestLineIter = null;
            int BestDistance = 10000;

            ResultIterator LineIter = page.GetIterator();
            string[] Words = Rule.TextMarker.Split(new char[] {' '}, StringSplitOptions.RemoveEmptyEntries);
            int NumberOfWords = Words.Length;
            LineIter.Begin();
            do
            {
                int CurrentWordNumber = -1;
                int CurrentBestDistance = 10000;
                string Line = LineIter.GetText(PageIteratorLevel.TextLine);
                if(Line == null)
                    continue;
                string[] WordsOfLine = Line.Split(new char[] {' '}, StringSplitOptions.None);
                if(WordsOfLine.Length < NumberOfWords)
                    continue;

                for(int shift = 0; shift <= WordsOfLine.Length - NumberOfWords; shift++)
                {
                    int PassDistance = 0;
                    for(int i = 0; i < NumberOfWords; i++)
                    {
                        PassDistance += FuzzyStringComparer.GetDistanceLevenshtein(WordsOfLine[shift + i],
                                                                                      Words[i],
                                                                                      StringComparison.CurrentCultureIgnoreCase);
                    }
                    if(PassDistance < CurrentBestDistance)
                    {
                        CurrentBestDistance = PassDistance;
                        CurrentWordNumber = shift + 1;
                    }
                }
                if(CurrentBestDistance < BestDistance)
                {
                    AddToLog ("new best");
                    AddToLog (LineIter.GetText(PageIteratorLevel.Word));
                    word = CurrentWordNumber;
                    if(BestLineIter != null)
                        BestLineIter.Dispose();
                    BestLineIter = LineIter.Clone();
                    AddToLog (BestLineIter.GetText(PageIteratorLevel.TextLine));
                    BestDistance = CurrentBestDistance;
                }
            } while( LineIter.Next(PageIteratorLevel.TextLine));
            LineIter.Dispose();
            return BestDistance;
        } */
        int GetTextPosition(string Text, Page page, out int PosX, out int PosY)
        {
            int BestDistance = 10000;
            PosX = -1;
            PosY = -1;
            ResultIterator LineIter = page.GetIterator();
            string[] Words = Text.Split(new char[] {' '}, StringSplitOptions.RemoveEmptyEntries);
            int NumberOfWords = Words.Length;
            LineIter.Begin();
            do
            {
                int CurrentWordNumber = -1;
                int CurrentBestDistance = 10000;
                string Line = LineIter.GetText(PageIteratorLevel.TextLine);
                if(Line == null)
                    continue;
                Line = Line.Trim();
                string[] WordsOfLine = Line.Split(new char[] {' '}, StringSplitOptions.None);
                if(WordsOfLine.Length < NumberOfWords)
                    continue;

                for(int shift = 0; shift <= WordsOfLine.Length - NumberOfWords; shift++)
                {
                    int PassDistance = 0;
                    for(int i = 0; i < NumberOfWords; i++)
                    {
                        PassDistance += FuzzyStringComparer.GetDistanceLevenshtein(WordsOfLine[shift + i],
                                                                                   Words[i],
                                                                                   StringComparison.CurrentCultureIgnoreCase);
                    }
                    if(PassDistance < CurrentBestDistance)
                    {
                        CurrentBestDistance = PassDistance;
                        CurrentWordNumber = shift;
                    }
                }
                if(CurrentBestDistance < BestDistance)
                {
                    AddToLog ("new best");
                    AddToLog (LineIter.GetText(PageIteratorLevel.TextLine).Trim());
                    BestDistance = CurrentBestDistance;
                    for(int i = 0; i < CurrentWordNumber; i++)
                    {
                        LineIter.Next(PageIteratorLevel.Word);
                    }
                    Rect Box;
                    LineIter.TryGetBoundingBox(PageIteratorLevel.Word, out Box);
                    PosX = Box.X1;
                    PosY = Box.Y1;
                }
            } while( LineIter.Next(PageIteratorLevel.TextLine));
            LineIter.Dispose();
            return BestDistance;
        }