Пример #1
0
        public DocumentCandidateNavigation(DocumentData pageOcr)
        {
            PageOcr = pageOcr;

            Init();
        }
Пример #2
0
        public DocumentDataNavigation(DocumentData pageOcr)
        {
            PageOcr = pageOcr;

            Init();
        }
Пример #3
0
        private static void LoadOcrData(DocumentData doc, string ocrFile, bool deskew)
        {
            var page = TPage.LoadFromPRD(ocrFile); //*.prd
            //  var pageNavigation = new TPageNavigation(page);
            var data  = doc.Words;
            var lines = doc.Lines;


            /* if (deskew)
             * {
             *   foreach(FieldData field in doc.Fields)
             *   {
             *       field.OriginRectangle = field.Rectangle;
             *       TOCRRect origin = new TOCRRect((int)field.Rectangle.Left,(int)field.Rectangle.Top,(int)field.Rectangle.Width,(int)field.Rectangle.Height);
             *       var originSkew = page.Deskew( origin);
             *       field.Rectangle = new Rect(originSkew.Left, originSkew.Top, originSkew.Width, originSkew.Height);
             *   }
             * }*/

            List <TOCRRect> wordsRects = new List <TOCRRect>();

            foreach (TLine line in page.Lines)
            {
                OcrLine ocrLine = new OcrLine();

                ocrLine.Rectangle = new Rect(line.Rect.Left, line.Rect.Top, line.Rect.Width, line.Rect.Height);


                foreach (TWord word in line.Words)
                {
                    if (wordsRects.Any(x => x.Left == word.Rect.Left &&
                                       x.Top == word.Rect.Top &&
                                       x.Right == word.Rect.Right &&
                                       x.Bottom == word.Rect.Bottom) == true)
                    {
                        continue;
                    }
                    else
                    {
                        wordsRects.Add(word.Rect);
                    }
                    // Split word
                    IList <MappedWord> spltWords = SplitWord(word)
                                                   .Select(x =>
                    {
                        var mapWord = new MappedWord()
                        {
                            Confidence = x.Confidance,
                            Contents   = string.Concat(x.Chars.Select(a => a.CharData).ToArray())
                        };

                        TOCRRect deskewedRect = x.Rect;

                        if (deskew)
                        {
                            deskewedRect = page.Deskew(x.Rect);
                        }

                        mapWord.Rectangle       = new Rect(deskewedRect.Left, deskewedRect.Top, deskewedRect.Width, deskewedRect.Height);
                        mapWord.OriginRectangle = new Rect(x.Rect.Left, x.Rect.Top, x.Rect.Width, x.Rect.Height);

                        return(mapWord);
                    })
                                                   .ToList();

                    // Add splited each words to the doc
                    for (int i = 0; i < spltWords.Count; i++)
                    {
                        MappedWord mapWord = spltWords[i];

                        if (i > 0)
                        {
                            spltWords[i - 1].SplitRight = mapWord;
                        }
                        if (i + 1 < spltWords.Count)
                        {
                            spltWords[i + 1].SplitLeft = mapWord;
                        }


                        doc.Words.Add(mapWord);
                        mapWord.Line = ocrLine;
                        ocrLine.Words.Add(mapWord);
                    }
                }
                if (ocrLine.Words.Any())
                {
                    lines.Add(ocrLine);
                }
            }

            int indexWord = 0;
            int indexLine = 0;

            foreach (var ocrLine in lines.OrderBy(a => a.Rectangle.Top).ToList())
            {
                ocrLine.ID = indexLine++;
            }
            data = data.OrderBy(a => a.Line.ID).ThenBy(a => a.Rectangle.X).ToList();
            foreach (OcrWord ocrWord in data)
            {
                ocrWord.ID = indexWord++;
            }
        }
Пример #4
0
        public static List <FieldClusterLine> CreateLines(DocumentData doc, fieldClusterModel cluster)
        {
            DocumentDataNavigation docNavigate = doc.WordsNavigator;

            var wordList = cluster.Fields;


            var wordHeight = CalcWordHeight(wordList);



            List <FieldClusterLine> result = new List <FieldClusterLine>();

            wordList = wordList.OrderBy(a => a.Rectangle.Top).ToList();
            List <MappedWord> newWordList = new List <MappedWord>(wordList);

            Dictionary <MappedWord, List <MappedWord> > wordLines = new Dictionary <MappedWord, List <MappedWord> >();

            foreach (var word in newWordList)
            {
                var inTheSameLine = docNavigate.GetWords(new System.Windows.Rect(cluster.Area.Left, word.Rectangle.Top, cluster.Area.Width, word.Rectangle.Height))
                                    .Where(x => x.Cluster.ID == word.Cluster.ID && x.Clusterline == null)
                                    .Where(x => x == word || x.Rectangle.Top + (x.Rectangle.Height / 2) * 1.2 < word.Rectangle.Bottom)
                                    .ToList();

                if (inTheSameLine.Count == 0)
                {
                    throw new Exception("A) No words in word line");
                }

                wordLines.Add(word, inTheSameLine);
            }


            int index = 0;

            while (newWordList.Count > 0)
            {
                MappedWord WordTop = newWordList.FirstOrDefault(x => x.Rectangle.Height < wordHeight * 2);

                if (WordTop == null)
                {
                    WordTop = newWordList.First();
                }

                var baseWords = wordLines[WordTop].Where(x => x.Clusterline == null).ToList();

                if (baseWords.Count == 0)
                {
                    // Bad word
                    throw new Exception("B) No words in word line");
                }

                FieldClusterLine FieldLine = new FieldClusterLine();
                FieldLine.ID     = index++;
                FieldLine.Fields = wordLines.Where(x => baseWords.Contains(x.Key))
                                   .SelectMany(x => x.Value)
                                   .Where(x => x.Clusterline == null)
                                   .Distinct()
                                   .OrderBy(x => x.Rectangle.Left)
                                   .ToList();

                newWordList.RemoveAll(a => FieldLine.Fields.Contains(a));



                //FieldLine.Fields = newWordList.Where(a => WordTop.Line.Words.Contains(a)).ToList().OrderBy(b => b.Rectangle.X).ToList();

                //  FieldLine.Fields = newWordList.Where(a => (a.Rectangle.Top <= (WordTop.Rectangle.Top + (WordTop.Rectangle.Height * PrecentAsLine)))).ToList().OrderBy(b=>b.Rectangle.X).ToList();

                FieldLine.Fields.ForEach(a => a.Clusterline = FieldLine);
                result.Add(FieldLine);
            }


            return(result);
        }
        public static List <fieldClusterModel> ClusterListFromDoc(DocumentData doc)
        {
            List <fieldClusterModel> result = new List <fieldClusterModel>();



            doc.Words.ToList().ForEach(a => result.Add(new fieldClusterModel((MappedWord)a)));

            bool change = false;

            do
            {
                change = false;
                for (int i = 0; i < result.Count(); i++)
                {
                    var fieldCluste1 = result.ElementAt(i);
                    if (fieldCluste1.isEmpty == false)
                    {
                        for (int j = 0; j < result.Count(); j++)
                        {
                            if (i == j)
                            {
                                continue;
                            }

                            var fieldCluste2 = result.ElementAt(j);
                            if (fieldCluste2.isEmpty == false)
                            {
                                if (fieldCluste2.AddIfIntersect(fieldCluste1))
                                {
                                    fieldCluste1.Clear();
                                    change = true;
                                    break;
                                }
                            }
                        }
                    }
                }
                result.RemoveAll(a => a.isEmpty);
            } while (change == true);

            change = false;
            do
            {
                change = false;
                for (int i = 0; i < result.Count(); i++)
                {
                    var fieldCluste1 = result.ElementAt(i);
                    if (fieldCluste1.isEmpty == false)
                    {
                        for (int j = 0; j < result.Count(); j++)
                        {
                            if (i == j)
                            {
                                continue;
                            }

                            var fieldCluste2 = result.ElementAt(j);
                            if (fieldCluste2.isEmpty == false)
                            {
                                if (fieldCluste2.AddIfInside(fieldCluste1))
                                {
                                    fieldCluste1.Clear();
                                    change = true;
                                    break;
                                }
                            }
                        }
                    }
                }
                result.RemoveAll(a => a.isEmpty);
            } while (change == true);

            int index = 0;

            result.ForEach(a => a.ID = index++);

            foreach (MappedWord word in doc.Words)
            {
                word.Cluster = result.Where(a => a.Fields.Contains(word)).FirstOrDefault();
            }

            result.ForEach(a => a.lines = LineEngine.CreateLines(doc, a));


            return(result);
        }