Exemplo n.º 1
0
        private static List <Word> MergeSplitValues(List <Word> words, string pattern)
        {
            var patternWords = words.Where(x => Regex.IsMatch(x.Text, pattern) && !Regex.IsMatch(x.Text, datePattern))
                               .ToList();

            var lines = GroupInLines(patternWords);

            foreach (var orderedLine in lines.Select(line => line.OrderBy(x => x.BoundingRect.X).ToList()))
            {
                if (orderedLine.Count < 2)
                {
                    continue;
                }
                var  wordsToRemove = new List <Word>();
                var  newWords      = new List <Word>();
                Word buffer        = null;
                for (var i = 1; i < orderedLine.Count; i++)
                {
                    var firstWord  = orderedLine[i - 1];
                    var secondWord = orderedLine[i];

                    if (CalculateDistance(firstWord, secondWord) <
                        Math.Min(firstWord.BoundingRect.Height, secondWord.BoundingRect.Height))
                    {
                        if (buffer == null)
                        {
                            buffer = firstWord + secondWord;
                            wordsToRemove.Add(firstWord);
                            wordsToRemove.Add(secondWord);
                        }
                        else
                        {
                            buffer += secondWord;
                            wordsToRemove.Add(secondWord);
                        }
                    }
                    else
                    {
                        if (buffer != null)
                        {
                            i++;
                            newWords.Add(buffer);
                            buffer = null;
                        }
                    }
                }
                if (buffer != null)
                {
                    newWords.Add(buffer);
                }
                words = words.Except(wordsToRemove).ToList();
                words = words.Concat(newWords).ToList();
            }

            return(words);
        }
Exemplo n.º 2
0
        public static List <Word> ConvertGoogleResponse(ResponseObject response)
        {
            var list = new List <Word>();

            foreach (var imageResponse in response.Responses)
            {
                foreach (var page in imageResponse.FullTextAnnotation.Pages)
                {
                    foreach (var pageBlock in page.Blocks)
                    {
                        foreach (var paragraph in pageBlock.Paragraphs)
                        {
                            foreach (var word in paragraph.Words)
                            {
                                if (word.Confidence < 0.7)
                                {
                                    continue;
                                }
                                var text = string.Empty;
                                word.Symbols.ForEach(s => text += s.Text);

                                var x = word.BoundingBox.Vertices[0].X;
                                var y = word.BoundingBox.Vertices[0].Y;

                                var bottomVertex =
                                    MathTools.Median(word.Symbols.Select(s => s.BoundingBox.Vertices[2].Y).ToList());

                                var width  = word.BoundingBox.Vertices[2].X - x;
                                var height = bottomVertex - y;


                                var rect    = new Rect(x, y, width, height);
                                var newWord = new Word(text, rect);
                                list.Add(newWord);
                            }
                        }
                    }
                }
            }


            list = MergeSplitDates(list);

            const string postalCodePattern = @"\d{2}|[-]|\d{2}[-]|^\d{3}";

            list = MergeSplitValues(list, postalCodePattern);

            const string monetaryPattern = @"\d+[.,]?\d+|[.,]|\d+";

            return(MergeSplitValues(list, monetaryPattern));
        }
Exemplo n.º 3
0
 private static double CalculateDistance(Word w1, Word w2)
 {
     return(MathTools.GetDistance(w1.BoundingRect.GetCentralPoint(), w2.BoundingRect.GetCentralPoint()));
 }