Пример #1
0
        private static GMTextBlock TransformTextBlock(GMTextBlock textBlock, Func <BoundingBox, BoundingBox> boxConverter)
        {
            var targetTextBlock = new GMTextBlock(boxConverter(textBlock.BoundingBox));

            foreach (var paragraph in textBlock.Paragraphs())
            {
                var targetParagraph = targetTextBlock.AddParagraph(boxConverter(paragraph.BoundingBox));
                foreach (var line in paragraph.Lines())
                {
                    var targetLine = targetParagraph.AddLine(boxConverter(line.BoundingBox));
                    foreach (var word in line.Words())
                    {
                        var targetWord = new GMWord(boxConverter(word.BoundingBox), word.Text, word.Accuracy);
                        targetLine.AddWord(targetWord);
                    }
                }
            }

            foreach (var word in textBlock.StandaloneWords())
            {
                var targetWord = new GMWord(boxConverter(word.BoundingBox), word.Text, word.Accuracy);
                targetTextBlock.AddStandaloneWord(targetWord);
            }
            return(targetTextBlock);
        }
Пример #2
0
        private static bool IsWordValidForAngleDetection(GMWord word)
        {
            var result = validWordPattern.IsMatch(word.Text);

//			if (!result)
//			{
//				if (!invalidWordPattern.IsMatch(word.Text) && word.Text.Trim().Length > 0)
//				{
//					Console.Out.WriteLine("Rejected word: \"{0}\"", word.Text);
//				}
//			}
            return(result);
        }
Пример #3
0
        /// <summary>
        /// Assuming that firstWord and secondWord are representing bounding boxes for words of the same line
        /// (edges of bounding boxes are aligned to X and Y axes) whis method detects angle between
        /// line and positive direction of X-axis.
        /// Angle is detected by direction of line connecting the centers of given bounding rectangles
        /// </summary>
        /// <param name="firstWord"></param>
        /// <param name="secondWord"></param>
        /// <returns>Angle in radians</returns>
        private static double DetectAngle(GMWord firstWord, GMWord secondWord)
        {
            var firstBBox  = firstWord.BoundingBox;
            var secondBBox = secondWord.BoundingBox;

            // Page coordinates have origin in top-left corner,
            // positive direction of Y-axis is oriented to down,
            // positive direction of X-axis is oriented to right
            Func <BoundingBox, int> selectorX = (bbox => (bbox.XMin + bbox.XMax));
            Func <BoundingBox, int> selectorY = (bbox => (bbox.YMin + bbox.YMax));

            // Here are coordinates of vector of text direction
            var dirX = selectorX(secondBBox) - selectorX(firstBBox);
            var dirY = selectorY(secondBBox) - selectorY(firstBBox);

            dirY = -dirY;             // To invert Y-axis direction
            double angle = (dirX >= 0) ? 0 : (dirY > 0) ? Math.PI : -Math.PI;

            if (dirX < 0)
            {
                dirX = -dirX;
                dirY = -dirY;
            }

            if (-dirX <= dirY && dirY <= dirX)
            {
                angle += Math.Atan(dirY / (double)dirX);
            }
            else if (dirY > 0)
            {
                angle += Math.PI / 2 - Math.Atan(dirX / (double)dirY);
            }
            else
            {
                angle += -Math.PI / 2 + Math.Atan(dirX / (double)dirY);
            }

            return(angle);
        }
        private static GMTextBlock RemoveEmptyElements(this GMTextBlock textBlock)
        {
            var textBlockNew = new GMTextBlock(textBlock.BoundingBox);

            foreach (var paragraph in textBlock.Paragraphs())
            {
                var paragraphNew = new GMParagraph(paragraph.BoundingBox);
                foreach (var line in paragraph.Lines())
                {
                    var lineNew = new GMLine(line.BoundingBox);
                    foreach (var word in line.Words())
                    {
                        if (word.Text.Trim().Length > 0)
                        {
                            var wordNew = new GMWord(word.BoundingBox, word.Text, word.Accuracy);
                            lineNew.AddWord(wordNew);
                        }
                    }
                    if (lineNew.Words().Any())
                    {
                        paragraphNew.AddLine(lineNew);
                    }
                }
                if (paragraphNew.Lines().Any())
                {
                    textBlockNew.AddParagraph(paragraphNew);
                }
                foreach (var word in textBlock.StandaloneWords())
                {
                    if (word.Text.Trim().Length > 0)
                    {
                        var wordNew = new GMWord(word.BoundingBox, word.Text, word.Accuracy);
                        textBlockNew.AddStandaloneWord(wordNew);
                    }
                }
            }
            return(textBlockNew);
        }
Пример #5
0
 public void AddWord(GMWord word)
 {
     words.Add(word);
 }
Пример #6
0
        /// <summary>
        /// Alternative approach to angle detection: this method uses line connecting those vertices of word's bounding rectangles which
        /// are the closest to base line of text.
        /// </summary>
        /// <param name="firstWord"></param>
        /// <param name="secondWord"></param>
        /// <returns>Angle in radians</returns>
        private static double DetectAngle3(GMWord firstWord, GMWord secondWord)
        {
            var firstBBox  = firstWord.BoundingBox;
            var secondBBox = secondWord.BoundingBox;
            Func <BoundingBox, int> selectorX;
            Func <BoundingBox, int> selectorY;

            // Page coordinates have origin in top-left corner,
            // positive direction of Y-axis is oriented to down,
            // positive direction of X-axis is oriented to right
            if (secondBBox.XMax < firstBBox.XMax)
            {
                // Page is rotated upside-down (words in line go from right to left)
                if (firstBBox.YMin >= secondBBox.YMin)
                {
                    // Text goes from down-right corner to upper-left corner
                    selectorX = (bbox => bbox.XMin);
                    selectorY = (bbox => bbox.YMin);
                }
                else
                {
                    // Text goes from upper-right corner to down-left corner
                    selectorX = (bbox => bbox.XMax);
                    selectorY = (bbox => bbox.YMin);
                }
            }
            else
            {
                // Page is properly rotated (words in linego from left to right)
                if (firstBBox.YMax >= secondBBox.YMax)
                {
                    // Text goes from down-left corner to upper-right corner
                    selectorX = (bbox => bbox.XMin);
                    selectorY = (bbox => bbox.YMax);
                }
                else
                {
                    // Text goes from upper-left corner to down-right corner
                    selectorX = (bbox => bbox.XMax);
                    selectorY = (bbox => bbox.YMax);
                }
            }
            // Here are coordinates of vector of text direction
            var dirX = selectorX(secondBBox) - selectorX(firstBBox);
            var dirY = selectorY(secondBBox) - selectorY(firstBBox);

            dirY = -dirY;
            var dirLength = Math.Sqrt(dirX * dirX + dirY * dirY);
            //Console.Out.WriteLine("{0:N4}:{1:N4}", dirX / dirLength, dirY / dirLength);
            double angle = 0;

            if (dirX < 0)
            {
                if (dirY > 0)
                {
                    angle = Math.PI;
                }
                else
                {
                    angle = -Math.PI;
                }
                dirX = -dirX;
                dirY = -dirY;
            }
            if (-dirX <= dirY && dirY <= dirX)
            {
                angle += Math.Atan(dirY / (double)dirX);
            }
            else if (dirY > 0)
            {
                angle += Math.PI / 2 - Math.Atan(dirX / (double)dirY);
            }
            else
            {
                angle += -Math.PI / 2 + Math.Atan(dirX / (double)dirY);
            }
            return(angle);
        }
Пример #7
0
 public void AddStandaloneWord(GMWord word)
 {
     words.Add(word);
 }