コード例 #1
0
        private void RecognizeTess()
        {
            using (var engine = new TesseractEngine(@"./tessdata", "rus", EngineMode.Default))
            {
                TextMarker Marker = Doc.Template.Markers[0];
                Pixbuf PixBox;

                logger.Info("Вычисляем сдвиг");
                Pixbuf WorkImage = Images[0];
                Marker.SetTarget(WorkImage.Width, WorkImage.Height);

                RelationalRectangle WorkZone = Marker.Zone.Clone();
                for (int i = 1; i <= 7; i++)
                {
                    logger.Debug("Попытка {0}, box: x={1},y={2},w={3},h={4}", i, WorkZone.PosX, WorkZone.PosY, WorkZone.Width, WorkZone.Heigth);
                    PixBox = new Pixbuf(WorkImage, WorkZone.PosX, WorkZone.PosY, WorkZone.Width, WorkZone.Heigth);
                    using (var img = RecognizeHelper.PixbufToPix(PixBox))
                    {
                        using (var page = engine.Process(img, PageSegMode.Auto))
                        {
                            int Distance, MarkerPosX, MarkerPosY;
                            double MarkerSkew;
                            RecognazeRule[] rules = new RecognazeRule[]{ Doc.Template.NumberRule, Doc.Template.DateRule};
                            Distance = GetTextPosition(Marker.Text, page, out MarkerPosX, out MarkerPosY, out MarkerSkew, rules);
                            if (Distance < 5)
                            {
                                logger.Debug("TextMarker <{0}> Distance: {1} Try:{2}", Marker.Text, Distance, i);
                                Marker.ActualPosX = MarkerPosX + WorkZone.PosX;
                                Marker.ActualPosY = MarkerPosY + WorkZone.PosY;
                                Marker.ActualSkew = MarkerSkew;
                                logger.Debug("Image Shift X: {0}", Marker.ShiftX);
                                logger.Debug("Image Shift Y: {0}", Marker.ShiftY);
                                break;
                            }
                            else if (i == 7 ||
                                (WorkZone.RelativePosX == 0 && WorkZone.RelativePosY == 0 && WorkZone.RelativeHeigth == 1 && WorkZone.RelativeWidth == 1) )
                            {
                                Marker.ActualPosX = (int)(Marker.PatternPosX * Marker.TargetWidth);
                                Marker.ActualPosY = (int)(Marker.PatternPosY * Marker.TargetHeigth);
                                Marker.ActualSkew = 0;
                                ShowImage(PixBox, "Маркер не найден. Зона маркера.");
                            }
                            else
                            { //Увеличиваем размер зоны поиска.
                                ExpandRelativeZone(WorkZone);
                            }
                        }
                    }
                }
                RecognazeRule CurRule;

                logger.Info("Распознаем номер");
                if (Doc.Template.NumberRule != null)
                {
                    CurRule = Doc.Template.NumberRule;

                    CurRule.Box.SetShiftByMarker(Marker);
                    logger.Debug("Number Shift X: {0}", CurRule.Box.ShiftX);
                    logger.Debug("Number Shift Y: {0}", CurRule.Box.ShiftY);
                    WorkZone = CurRule.Box.Clone();
                    //FIXME С этим пока работает лучше
                    WorkZone.RelativePosX += WorkZone.RelativeWidth * 0.2;
                    WorkZone.RelativePosY += WorkZone.RelativeHeigth * 0.2;
                    WorkZone.RelativeWidth *= 0.6;
                    WorkZone.RelativeHeigth *= 0.6;

                    Doc.DocNumberConfidence = 0;
                    for (int i = 1; i <= 5; i++)
                    {
                        PixBox = new Pixbuf(WorkImage, WorkZone.PosX, WorkZone.PosY, WorkZone.Width, WorkZone.Heigth);
                        using (var img = RecognizeHelper.PixbufToPix(PixBox))
                        {
                            using (var page = engine.Process(img, PageSegMode.SingleLine))
                            {

                                string FieldText = page.GetText().Trim();
                                if (page.GetMeanConfidence() > Doc.DocNumberConfidence)
                                {
                                    Doc.DocNumber = FieldText;
                                    Doc.DocNumberConfidence = page.GetMeanConfidence();
                                    logger.Debug("Try: {0}", i);
                                    logger.Debug("Found Field Value: {0}", FieldText);
                                    logger.Debug("Recognize confidence: {0}", page.GetMeanConfidence());
                                    if(CurRule.NextAfterTextMarker)
                                        logger.Debug("By after market detection: {0}", CurRule.AfterTextMarkerValue);
                                    if (FieldText == "")
                                        ShowImage(PixBox, "Номер пустой. Зона номера документа.");
                                    else
                                        ShowImage(PixBox, "Зона номера документа.");
                                }
                                if (Doc.DocNumber == CurRule.AfterTextMarkerValue)
                                {
                                    Doc.DocNumberConfidence = 1;
                                    break;
                                }
                                if(CurRule.Validate == ValidationTypes.IsNumber)
                                {
                                    int tempInt;
                                    bool AfterIsNumber = int.TryParse (CurRule.AfterTextMarkerValue, out tempInt);
                                    bool BoxIsNumber = int.TryParse (Doc.DocNumber, out tempInt);
                                    if(BoxIsNumber)
                                    {
                                        break;
                                    }
                                    else if (AfterIsNumber)
                                    {
                                        Doc.DocNumber = CurRule.AfterTextMarkerValue;
                                        //Doc.DocNumberConfidence = ;
                                        break;
                                    }
                                    else
                                    {
                                        Doc.DocNumberConfidence = 0;
                                    }
                                }
                                if (Doc.DocNumberConfidence > 0.8)
                                {
                                    break;
                                }
                                else
                                {//Увеличиваем размер зоны поиска.
                                    ExpandRelativeZone(WorkZone);
                                }
                            }
                        }
                    }
                }

                logger.Info("Распознаем Дату");
                if(Doc.Template.DateRule != null)
                {
                    CurRule = Doc.Template.DateRule;

                    CurRule.Box.SetShiftByMarker(Marker);

                    PixBox = new Pixbuf(WorkImage, CurRule.Box.PosX, CurRule.Box.PosY, CurRule.Box.Width, CurRule.Box.Heigth);
                    using (var img = RecognizeHelper.PixbufToPix(PixBox)) {
                        using (var page = engine.Process(img, PageSegMode.SingleLine)) {

                            string FieldText = page.GetText().Trim();
                            DateTime TempDate;
                            string[] words = FieldText.Split(new char[] {' '}, StringSplitOptions.RemoveEmptyEntries);
                            string Date = "";
                            foreach(string word in words)
                            {
                                if(DateTime.TryParse(word, out TempDate))
                                   Date = word;
                            }

                            if(DateTime.TryParse(Date, out TempDate))
                            {
                                Doc.DocDate = TempDate;
                                Doc.DocDateConfidence = page.GetMeanConfidence();
                            }
                            else
                            {
                                Doc.DocDateConfidence = -2;
                            }
                            logger.Debug("Found Field Value: {0}", FieldText);
                            logger.Debug("Split date Value: {0}", Date);
                            logger.Debug("Recognize confidence: {0}", page.GetMeanConfidence());
                            if(FieldText == "")
                                ShowImage(PixBox, "Дата пустая. Зона даты документа.");
                            else
                                ShowImage(PixBox, "Зона даты документа.");
                        }
                    }
                }

                //FIXME Добавить распознование дополнительных полей.
            }
        }
コード例 #2
0
ファイル: DocsType.cs プロジェクト: Badou03080/earchive
        protected void OnRecognizeActionActivated(object sender, EventArgs e)
        {
            if(DocTemplate == null)
            {
                DocTemplate = new RecognizeTemplate();
                labelTemplateName.LabelProp = "Новый";
                //FIXME Убрать тестовый документ
                DocTemplate.Name = "Торг12-тест";
                RecognazeRule testrule = new RecognazeRule();
                testrule.Box = new RelationalRectangle(0.357468643, 0.321774194, 0.088939567, 0.026612903);
                DocTemplate.NumberRule = testrule;

                testrule = new RecognazeRule();
                testrule.Box = new RelationalRectangle(0.444412771, 0.323387097, 0.08608894, 0.021774194);
                DocTemplate.DateRule = testrule;

                TextMarker Marker = new TextMarker();
                Marker.Text = "ТОВАРНАЯ НАКЛАДНАЯ";
                Marker.PatternPosX = 0.206100342;
                Marker.PatternPosY = 0.324193548;
                Marker.Zone = new RelationalRectangle(0.181299886, 0.292741935, 0.196693273, 0.12);
                DocTemplate.Markers = new TextMarker[]{Marker};
            }
            //FIXME Открыть диалог настройки шаблона.
        }
コード例 #3
0
        /*
        int LookingTextMarker(RecognazeRule Rule, Page page, out ResultIterator BestLineIter, out int word)
        {
            word = -1;
            BestLineIter = null;
            int BestDistance = 10000;

            ResultIterator LineIter = page.GetIterator();
            string[] Words = Rule.TextMarker.Split(new char[] {' '}, StringSplitOptions.RemoveEmptyEntries);
            int NumberOfWords = Words.Length;
            LineIter.Begin();
            do
            {
                int CurrentWordNumber = -1;
                int CurrentBestDistance = 10000;
                string Line = LineIter.GetText(PageIteratorLevel.TextLine);
                if(Line == null)
                    continue;
                string[] WordsOfLine = Line.Split(new char[] {' '}, StringSplitOptions.None);
                if(WordsOfLine.Length < NumberOfWords)
                    continue;

                for(int shift = 0; shift <= WordsOfLine.Length - NumberOfWords; shift++)
                {
                    int PassDistance = 0;
                    for(int i = 0; i < NumberOfWords; i++)
                    {
                        PassDistance += FuzzyStringComparer.GetDistanceLevenshtein(WordsOfLine[shift + i],
                                                                                      Words[i],
                                                                                      StringComparison.CurrentCultureIgnoreCase);
                    }
                    if(PassDistance < CurrentBestDistance)
                    {
                        CurrentBestDistance = PassDistance;
                        CurrentWordNumber = shift + 1;
                    }
                }
                if(CurrentBestDistance < BestDistance)
                {
                    AddToLog ("new best");
                    AddToLog (LineIter.GetText(PageIteratorLevel.Word));
                    word = CurrentWordNumber;
                    if(BestLineIter != null)
                        BestLineIter.Dispose();
                    BestLineIter = LineIter.Clone();
                    AddToLog (BestLineIter.GetText(PageIteratorLevel.TextLine));
                    BestDistance = CurrentBestDistance;
                }
            } while( LineIter.Next(PageIteratorLevel.TextLine));
            LineIter.Dispose();
            return BestDistance;
        } */
        int GetTextPosition(string Text, Page page, out int PosX, out int PosY, out double AngleRad, RecognazeRule[] AfterMarkerRules)
        {
            int BestDistance = 10000;
            PosX = -1;
            PosY = -1;
            AngleRad = 0;
            logger.Debug("Marker zone text:{0}", page.GetText());
            ResultIterator LineIter = page.GetIterator();
            string[] Words = Text.Split(new char[] {' '}, StringSplitOptions.RemoveEmptyEntries);
            int NumberOfWords = Words.Length;
            LineIter.Begin();
            do
            {
                int CurrentWordNumber = -1;
                int CurrentAfterWord = 0;
                int CurrentBestDistance = 10000;
                string Line = LineIter.GetText(PageIteratorLevel.TextLine);

                if(Line == null || Line == "")
                    continue;
                Line = Line.Trim();
                string[] WordsOfLine = Line.Split(new char[] {' '}, StringSplitOptions.RemoveEmptyEntries);

                if(WordsOfLine.Length == 0)
                    continue;
                for(int shift = 0; shift < WordsOfLine.Length; shift++)
                {
                    for(int i = 1; i <= NumberOfWords && i <= WordsOfLine.Length - shift; i++)
                    {
                        string passString = String.Join(" ", WordsOfLine, shift, i);

                        int PassDistance = FuzzyStringComparer.GetDistanceLevenshtein(passString,
                            Text,
                            StringComparison.CurrentCultureIgnoreCase);
                        if(PassDistance < CurrentBestDistance)
                        {
                            CurrentBestDistance = PassDistance;
                            CurrentWordNumber = shift;
                            CurrentAfterWord = shift + i;
                        }
                    }
                }
                if(CurrentBestDistance < BestDistance)
                {
                    logger.Debug("new best");
                    logger.Debug(LineIter.GetText(PageIteratorLevel.TextLine).Trim());
                    //Заполняем поля данными после маркера.
                    foreach(RecognazeRule rule in AfterMarkerRules)
                    {
                        if(rule.NextAfterTextMarker && WordsOfLine.Length > CurrentAfterWord + rule.ShiftWordsCount)
                        {
                            rule.AfterTextMarkerValue = WordsOfLine[CurrentAfterWord + rule.ShiftWordsCount];
                        }
                    }

                    BestDistance = CurrentBestDistance;
                    for(int i = 0; i < CurrentWordNumber; i++)
                    {
                        LineIter.Next(PageIteratorLevel.Word);
                    }
                    Rect Box;
                    LineIter.TryGetBoundingBox(PageIteratorLevel.Word, out Box);
                    PosX = Box.X1;
                    PosY = Box.Y1;
                    logger.Debug("Position X1:{0} Y1:{1} X2:{2} Y2:{3}", Box.X1, Box.Y1, Box.X2, Box.Y2);
                    LineIter.TryGetBaseline(PageIteratorLevel.Word, out Box);
                    logger.Debug("BaseLine X1:{0} Y1:{1} X2:{2} Y2:{3}", Box.X1, Box.Y1, Box.X2, Box.Y2);
                    AngleRad = Math.Atan2(Box.Y2 - Box.Y1, Box.X2 - Box.X1); //угл наклона базовой линии.
                    double AngleGrad = AngleRad * (180/Math.PI);
                    logger.Debug("Angle rad:{0} grad:{1}", AngleRad, AngleGrad);
                }

            } while( LineIter.Next(PageIteratorLevel.TextLine));
            LineIter.Dispose();
            return BestDistance;
        }
コード例 #4
0
        private void RecognizeTess()
        {
            using (var engine = new TesseractEngine(@"./tessdata", "rus", EngineMode.Default))
            {
                TextMarker Marker = Doc.Template.Markers[0];
                Pixbuf     PixBox;

                logger.Info("Вычисляем сдвиг");
                Pixbuf WorkImage = Images[0];
                Marker.SetTarget(WorkImage.Width, WorkImage.Height);

                RelationalRectangle WorkZone = Marker.Zone.Clone();
                for (int i = 1; i <= 7; i++)
                {
                    logger.Debug("Попытка {0}, box: x={1},y={2},w={3},h={4}", i, WorkZone.PosX, WorkZone.PosY, WorkZone.Width, WorkZone.Heigth);
                    PixBox = new Pixbuf(WorkImage, WorkZone.PosX, WorkZone.PosY, WorkZone.Width, WorkZone.Heigth);
                    using (var img = RecognizeHelper.PixbufToPix(PixBox))
                    {
                        using (var page = engine.Process(img, PageSegMode.Auto))
                        {
                            int             Distance, MarkerPosX, MarkerPosY;
                            double          MarkerSkew;
                            RecognazeRule[] rules = new RecognazeRule[] { Doc.Template.NumberRule, Doc.Template.DateRule };
                            Distance = GetTextPosition(Marker.Text, page, out MarkerPosX, out MarkerPosY, out MarkerSkew, rules);
                            if (Distance < 5)
                            {
                                logger.Debug("TextMarker <{0}> Distance: {1} Try:{2}", Marker.Text, Distance, i);
                                Marker.ActualPosX = MarkerPosX + WorkZone.PosX;
                                Marker.ActualPosY = MarkerPosY + WorkZone.PosY;
                                Marker.ActualSkew = MarkerSkew;
                                Marker.Confidence = page.GetMeanConfidence();
                                logger.Debug("Image Shift X: {0}", Marker.ShiftX);
                                logger.Debug("Image Shift Y: {0}", Marker.ShiftY);
                                break;
                            }
                            else if (i == 7 ||
                                     (WorkZone.RelativePosX == 0 && WorkZone.RelativePosY == 0 && WorkZone.RelativeHeigth == 1 && WorkZone.RelativeWidth == 1))
                            {
                                Marker.ActualPosX = (int)(Marker.PatternPosX * Marker.TargetWidth);
                                Marker.ActualPosY = (int)(Marker.PatternPosY * Marker.TargetHeigth);
                                Marker.ActualSkew = 0;
                                ShowImage(PixBox, "Маркер не найден. Зона маркера.");
                            }
                            else
                            {                             //Увеличиваем размер зоны поиска.
                                ExpandRelativeZone(WorkZone);
                            }
                        }
                    }
                }
                RecognazeRule CurRule;

                logger.Info("Распознаем номер");
                if (Doc.Template.NumberRule != null)
                {
                    CurRule = Doc.Template.NumberRule;

                    if (CurRule.Box != null)
                    {
                        CurRule.Box.SetShiftByMarker(Marker);
                        logger.Debug("Number Shift X: {0}", CurRule.Box.ShiftX);
                        logger.Debug("Number Shift Y: {0}", CurRule.Box.ShiftY);
                        WorkZone = CurRule.Box.Clone();
                        //FIXME С этим пока работает лучше
                        WorkZone.RelativePosX   += WorkZone.RelativeWidth * 0.2;
                        WorkZone.RelativePosY   += WorkZone.RelativeHeigth * 0.2;
                        WorkZone.RelativeWidth  *= 0.6;
                        WorkZone.RelativeHeigth *= 0.6;

                        Doc.DocNumberConfidence = 0;
                        for (int i = 1; i <= 5; i++)
                        {
                            PixBox = new Pixbuf(WorkImage, WorkZone.PosX, WorkZone.PosY, WorkZone.Width, WorkZone.Heigth);
                            using (var img = RecognizeHelper.PixbufToPix(PixBox)) {
                                using (var page = engine.Process(img, PageSegMode.SingleLine)) {
                                    string FieldText = page.GetText().Trim();
                                    if (page.GetMeanConfidence() > Doc.DocNumberConfidence)
                                    {
                                        Doc.DocNumber           = FieldText;
                                        Doc.DocNumberConfidence = page.GetMeanConfidence();
                                        logger.Debug("Try: {0}", i);
                                        logger.Debug("Found Field Value: {0}", FieldText);
                                        logger.Debug("Recognize confidence: {0}", page.GetMeanConfidence());
                                        if (FieldText == "")
                                        {
                                            ShowImage(PixBox, "Номер пустой. Зона номера документа.");
                                        }
                                        else
                                        {
                                            ShowImage(PixBox, "Зона номера документа.");
                                        }
                                    }
                                    if (CurRule.Validate == ValidationTypes.IsNumber)
                                    {
                                        int  tempInt;
                                        bool BoxIsNumber = int.TryParse(Doc.DocNumber, out tempInt);
                                        if (BoxIsNumber)
                                        {
                                            break;
                                        }
                                        else
                                        {
                                            Doc.DocNumberConfidence = 0;
                                        }
                                    }
                                    if (Doc.DocNumberConfidence > 0.8)
                                    {
                                        break;
                                    }
                                    else                                        //Увеличиваем размер зоны поиска.
                                    {
                                        ExpandRelativeZone(WorkZone);
                                    }
                                }
                            }
                        }
                    }
                    else if (CurRule.NextAfterTextMarker)
                    {
                        logger.Debug("By after market detection: {0}", CurRule.AfterTextMarkerValue);
                        Doc.DocNumber           = CurRule.AfterTextMarkerValue;
                        Doc.DocNumberConfidence = CurRule.AfterTextMarkerConfidence;
                    }
                }

                logger.Info("Распознаем Дату");
                if (Doc.Template.DateRule != null)
                {
                    CurRule = Doc.Template.DateRule;
                    if (CurRule.Box != null)
                    {
                        CurRule.Box.SetShiftByMarker(Marker);

                        PixBox = new Pixbuf(WorkImage, CurRule.Box.PosX, CurRule.Box.PosY, CurRule.Box.Width, CurRule.Box.Heigth);
                        using (var img = RecognizeHelper.PixbufToPix(PixBox)) {
                            using (var page = engine.Process(img, PageSegMode.SingleLine)) {
                                string    FieldText = page.GetText().Trim();
                                DateTime  TempDate;
                                string [] words = FieldText.Split(new char [] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
                                string    Date  = "";
                                foreach (string word in words)
                                {
                                    if (DateTime.TryParse(word, out TempDate))
                                    {
                                        Date = word;
                                    }
                                }

                                if (DateTime.TryParse(Date, out TempDate))
                                {
                                    Doc.DocDate           = TempDate;
                                    Doc.DocDateConfidence = page.GetMeanConfidence();
                                }
                                else
                                {
                                    Doc.DocDateConfidence = -2;
                                }
                                logger.Debug("Found Field Value: {0}", FieldText);
                                logger.Debug("Split date Value: {0}", Date);
                                logger.Debug("Recognize confidence: {0}", page.GetMeanConfidence());
                                if (FieldText == "")
                                {
                                    ShowImage(PixBox, "Дата пустая. Зона даты документа.");
                                }
                                else
                                {
                                    ShowImage(PixBox, "Зона даты документа.");
                                }
                            }
                        }
                    }
                    else
                    {
                        logger.Debug("By after market detection: {0}", CurRule.AfterTextMarkerValue);
                        if (DateTime.TryParse(CurRule.AfterTextMarkerValue, out Doc.DocDate))
                        {
                            logger.Debug("Date parsed:{0}", Doc.DocDate);
                            Doc.DocDateConfidence = CurRule.AfterTextMarkerConfidence;
                        }
                        else
                        {
                            Doc.DocDateConfidence = -2;
                        }
                    }
                }

                //FIXME Добавить распознование дополнительных полей.
            }
        }