Esempio n. 1
0
        /// <summary>
        /// Get information on the nearest point, filtered for angle.
        /// </summary>
        /// <param name="words"></param>
        /// <param name="pivot"></param>
        /// <param name="funcPivotDist"></param>
        /// <param name="funcPivotAngle"></param>
        /// <param name="funcPointsDist"></param>
        /// <param name="funcPointsAngle"></param>
        /// <param name="angleStart"></param>
        /// <param name="angleEnd"></param>
        /// <param name="finalDistMEasure"></param>
        /// <returns></returns>
        private double[] GetNearestPointData(Word[] words, Word pivot, Func <PdfRectangle,
                                                                             PdfPoint> funcPivotDist, Func <PdfRectangle, PdfPoint> funcPivotAngle,
                                             Func <PdfRectangle, PdfPoint> funcPointsDist, Func <PdfRectangle, PdfPoint> funcPointsAngle,
                                             double angleStart, double angleEnd,
                                             Func <PdfPoint, PdfPoint, double> finalDistMEasure)
        {
            var pointR = funcPivotDist(pivot.BoundingBox);

            // Filter by angle
            var filtered = words.Where(w =>
            {
                var angleWL = Distances.Angle(funcPivotAngle(pivot.BoundingBox), funcPointsAngle(w.BoundingBox));
                return(angleWL >= angleStart && angleWL <= angleEnd);
            }).ToList();

            filtered.Remove(pivot); // remove itself

            if (filtered.Count > 0)
            {
                int index = pointR.FindIndexNearest(
                    filtered.Select(w => funcPointsDist(w.BoundingBox)).ToList(),
                    Distances.Euclidean, out double distWL);

                if (index >= 0)
                {
                    var matchWL = filtered[index];
                    return(new double[]
                    {
                        (double)pivot.Letters.Select(l => l.FontSize).Mode(),
                        finalDistMEasure(pointR, funcPointsDist(matchWL.BoundingBox))
                    });
                }
            }
            return(null);
        }
Esempio n. 2
0
        private static IEnumerable <TextLine> GetLines(List <Word> words, double maxDist, AngleBounds withinLine)
        {
            TextDirection textDirection  = words[0].TextDirection;
            var           groupedIndexes = ClusteringAlgorithms.ClusterNearestNeighbours(words, Distances.Euclidean,
                                                                                         (pivot, candidate) => maxDist,
                                                                                         pivot => pivot.BoundingBox.BottomRight, candidate => candidate.BoundingBox.BottomLeft,
                                                                                         pivot => true,
                                                                                         (pivot, candidate) =>
            {
                // Compare bottom right with bottom left for angle
                var withinLineAngle = Distances.Angle(pivot.BoundingBox.BottomRight, candidate.BoundingBox.BottomLeft);

                return(withinLineAngle >= withinLine.Lower && withinLineAngle <= withinLine.Upper);
            }).ToList();

            Func <IEnumerable <Word>, IReadOnlyList <Word> > orderFunc = l => l.OrderBy(x => x.BoundingBox.Left).ToList();

            if (textDirection == TextDirection.Rotate180)
            {
                orderFunc = l => l.OrderByDescending(x => x.BoundingBox.Right).ToList();
            }
            else if (textDirection == TextDirection.Rotate90)
            {
                orderFunc = l => l.OrderByDescending(x => x.BoundingBox.Top).ToList();
            }
            else if (textDirection == TextDirection.Rotate270)
            {
                orderFunc = l => l.OrderBy(x => x.BoundingBox.Bottom).ToList();
            }

            for (var a = 0; a < groupedIndexes.Count; a++)
            {
                yield return(new TextLine(orderFunc(groupedIndexes[a].Select(i => words[i]))));
            }
        }
            public PdfRectangle GetPivot()
            {
                int indexMiddle = Distances.FindIndexNearest(Bound.Centroid,
                                                             Obstacles.Select(o => o.Centroid).ToList(),
                                                             p => p, p => p, Distances.Euclidean, out double d);

                return(indexMiddle == -1 ? Obstacles.First() : Obstacles.ElementAt(indexMiddle));
            }
Esempio n. 4
0
        private PdfRectangle GetBoundingBoxOther(IReadOnlyList <TextLine> lines)
        {
            var points = lines.SelectMany(l => new[]
            {
                l.BoundingBox.BottomLeft,
                l.BoundingBox.BottomRight,
                l.BoundingBox.TopLeft,
                l.BoundingBox.TopRight
            });

            // Candidates bounding boxes
            var obb  = Geometry.GeometryExtensions.MinimumAreaRectangle(points);
            var obb1 = new PdfRectangle(obb.BottomLeft, obb.TopLeft, obb.BottomRight, obb.TopRight);
            var obb2 = new PdfRectangle(obb.BottomRight, obb.BottomLeft, obb.TopRight, obb.TopLeft);
            var obb3 = new PdfRectangle(obb.TopRight, obb.BottomRight, obb.TopLeft, obb.BottomLeft);

            // Find the orientation of the OBB, using the baseline angle
            // Assumes line order is correct
            var lastLine = lines[lines.Count - 1];

            var baseLineAngle = Distances.BoundAngle180(Distances.Angle(lastLine.BoundingBox.BottomLeft, lastLine.BoundingBox.BottomRight));

            double deltaAngle  = Math.Abs(Distances.BoundAngle180(obb.Rotation - baseLineAngle));
            double deltaAngle1 = Math.Abs(Distances.BoundAngle180(obb1.Rotation - baseLineAngle));

            if (deltaAngle1 < deltaAngle)
            {
                deltaAngle = deltaAngle1;
                obb        = obb1;
            }

            double deltaAngle2 = Math.Abs(Distances.BoundAngle180(obb2.Rotation - baseLineAngle));

            if (deltaAngle2 < deltaAngle)
            {
                deltaAngle = deltaAngle2;
                obb        = obb2;
            }

            double deltaAngle3 = Math.Abs(Distances.BoundAngle180(obb3.Rotation - baseLineAngle));

            if (deltaAngle3 < deltaAngle)
            {
                obb = obb3;
            }

            return(obb);
        }
Esempio n. 5
0
        /// <summary>
        /// Get information on the nearest point, filtered for angle.
        /// </summary>
        private double?GetNearestPointDistance(List <Word> words, Word pivot, Func <PdfRectangle,
                                                                                    PdfPoint> funcPivotDist, Func <PdfRectangle, PdfPoint> funcPivotAngle,
                                               Func <PdfRectangle, PdfPoint> funcPointsDist, Func <PdfRectangle, PdfPoint> funcPointsAngle,
                                               AngleBounds angleBounds,
                                               Func <PdfPoint, PdfPoint, double> finalDistanceMeasure)
        {
            var pointR = funcPivotDist(pivot.BoundingBox);

            var pivotPoint = funcPivotAngle(pivot.BoundingBox);

            var wordsWithinAngleBoundDistancePoints = new List <PdfPoint>();

            // Filter to words within the angle range.
            foreach (var word in words)
            {
                // Ignore the pivot word.
                if (ReferenceEquals(word, pivot))
                {
                    continue;
                }

                var angle = Distances.Angle(pivotPoint, funcPointsAngle(word.BoundingBox));

                if (angleBounds.Contains(angle))
                {
                    wordsWithinAngleBoundDistancePoints.Add(funcPointsDist(word.BoundingBox));
                }
            }

            if (wordsWithinAngleBoundDistancePoints.Count == 0)
            {
                return(null);
            }

            var closestWordIndex = pointR.FindIndexNearest(wordsWithinAngleBoundDistancePoints, p => p,
                                                           p => p, Distances.Euclidean, out _);

            if (closestWordIndex < 0 || closestWordIndex >= wordsWithinAngleBoundDistancePoints.Count)
            {
                return(null);
            }

            return(finalDistanceMeasure(pointR, wordsWithinAngleBoundDistancePoints[closestWordIndex]));
        }
Esempio n. 6
0
        /// <summary>
        /// Build lines via transitive closure.
        /// </summary>
        private static IEnumerable <TextLine> GetLines(List <Word> words, double maxDist, AngleBounds withinLine)
        {
            /***************************************************************************************************
            * /!\ WARNING: Given how FindIndexNearest() works, if 'maxDist' > 'word Width', the algo might not
            * work as the FindIndexNearest() function might pair the pivot with itself (the pivot's right point
            * (distance = width) is closer than other words' left point).
            * -> Solution would be to find more than one nearest neighbours. Use KDTree?
            ***************************************************************************************************/

            TextDirection textDirection  = words[0].TextDirection;
            var           groupedIndexes = ClusteringAlgorithms.SimpleTransitiveClosure(words, Distances.Euclidean,
                                                                                        (pivot, candidate) => maxDist,
                                                                                        pivot => pivot.BoundingBox.BottomRight, candidate => candidate.BoundingBox.BottomLeft,
                                                                                        pivot => true,
                                                                                        (pivot, candidate) =>
            {
                // Compare bottom right with bottom left for angle
                var withinLineAngle = Distances.Angle(pivot.BoundingBox.BottomRight, candidate.BoundingBox.BottomLeft);

                return(withinLineAngle >= withinLine.Lower && withinLineAngle <= withinLine.Upper);
            }).ToList();

            Func <IEnumerable <Word>, IReadOnlyList <Word> > orderFunc = l => l.OrderBy(x => x.BoundingBox.Left).ToList();

            if (textDirection == TextDirection.Rotate180)
            {
                orderFunc = l => l.OrderByDescending(x => x.BoundingBox.Right).ToList();
            }
            else if (textDirection == TextDirection.Rotate90)
            {
                orderFunc = l => l.OrderByDescending(x => x.BoundingBox.Top).ToList();
            }
            else if (textDirection == TextDirection.Rotate270)
            {
                orderFunc = l => l.OrderBy(x => x.BoundingBox.Bottom).ToList();
            }

            for (var a = 0; a < groupedIndexes.Count; a++)
            {
                yield return(new TextLine(orderFunc(groupedIndexes[a].Select(i => words[i]))));
            }
        }
Esempio n. 7
0
        /// <summary>
        /// Build blocks via transitive closure.
        /// </summary>
        private static IEnumerable <TextBlock> GetLinesGroups(TextLine[] lines, double maxDist)
        {
            /**************************************************************************************************
            * We want to measure the distance between two lines using the following method:
            *  We check if two lines are overlapping horizontally.
            *  If they are overlapping, we compute the middle point (new X coordinate) of the overlapping area.
            *  We finally compute the Euclidean distance between these two middle points.
            *  If the two lines are not overlapping, the distance is set to the max distance.
            *
            * /!\ WARNING: Given how FindIndexNearest() works, if 'maxDist' > 'line Height', the algo won't
            * work as the FindIndexNearest() function will always pair the pivot with itself (the pivot's top
            * point (distance = height) is closer than other lines' top point).
            * -> Solution would be to find more than one nearest neighbours. Use KDTree?
            **************************************************************************************************/

            Func <PdfLine, PdfLine, double> euclidianOverlappingMiddleDistance = (l1, l2) =>
            {
                var left = Math.Max(l1.Point1.X, l2.Point1.X);
                var d    = (Math.Min(l1.Point2.X, l2.Point2.X) - left);

                if (d < 0)
                {
                    return(double.MaxValue);       // not overlapping -> max distance
                }
                return(Distances.Euclidean(
                           new PdfPoint(left + d / 2, l1.Point1.Y),
                           new PdfPoint(left + d / 2, l2.Point1.Y)));
            };

            var groupedIndexes = ClusteringAlgorithms.SimpleTransitiveClosure(lines,
                                                                              euclidianOverlappingMiddleDistance,
                                                                              (pivot, candidate) => maxDist,
                                                                              pivot => new PdfLine(pivot.BoundingBox.BottomLeft, pivot.BoundingBox.BottomRight),
                                                                              candidate => new PdfLine(candidate.BoundingBox.TopLeft, candidate.BoundingBox.TopRight),
                                                                              pivot => true, (pivot, candidate) => true).ToList();

            for (int a = 0; a < groupedIndexes.Count(); a++)
            {
                yield return(new TextBlock(groupedIndexes[a].Select(i => lines[i]).ToList()));
            }
        }
Esempio n. 8
0
        private static IEnumerable <TextBlock> GetLinesGroups(TextLine[] lines, double maxDist, int maxDegreeOfParallelism)
        {
            /**************************************************************************************************
            * We want to measure the distance between two lines using the following method:
            *  We check if two lines are overlapping horizontally.
            *  If they are overlapping, we compute the middle point (new X coordinate) of the overlapping area.
            *  We finally compute the Euclidean distance between these two middle points.
            *  If the two lines are not overlapping, the distance is set to the max distance.
            **************************************************************************************************/

            Func <PdfLine, PdfLine, double> euclidianOverlappingMiddleDistance = (l1, l2) =>
            {
                var left = Math.Max(l1.Point1.X, l2.Point1.X);
                var d    = (Math.Min(l1.Point2.X, l2.Point2.X) - left);

                if (d < 0)
                {
                    return(double.MaxValue);       // not overlapping -> max distance
                }
                return(Distances.Euclidean(
                           new PdfPoint(left + d / 2, l1.Point1.Y),
                           new PdfPoint(left + d / 2, l2.Point1.Y)));
            };

            var groupedIndexes = ClusteringAlgorithms.ClusterNearestNeighbours(lines,
                                                                               euclidianOverlappingMiddleDistance,
                                                                               (pivot, candidate) => maxDist,
                                                                               pivot => new PdfLine(pivot.BoundingBox.BottomLeft, pivot.BoundingBox.BottomRight),
                                                                               candidate => new PdfLine(candidate.BoundingBox.TopLeft, candidate.BoundingBox.TopRight),
                                                                               pivot => true, (pivot, candidate) => true,
                                                                               maxDegreeOfParallelism).ToList();

            for (int a = 0; a < groupedIndexes.Count(); a++)
            {
                yield return(new TextBlock(groupedIndexes[a].Select(i => lines[i]).ToList()));
            }
        }
Esempio n. 9
0
        private static PdfRectangle GetBoundingBoxOther(IReadOnlyList <Word> words)
        {
            var baseLinePoints = words.SelectMany(r => new[]
            {
                r.BoundingBox.BottomLeft,
                r.BoundingBox.BottomRight,
            }).ToList();

            // Fitting a line through the base lines points
            // to find the orientation (slope)
            double x0              = baseLinePoints.Average(p => p.X);
            double y0              = baseLinePoints.Average(p => p.Y);
            double sumProduct      = 0;
            double sumDiffSquaredX = 0;

            for (int i = 0; i < baseLinePoints.Count; i++)
            {
                var point  = baseLinePoints[i];
                var x_diff = point.X - x0;
                var y_diff = point.Y - y0;
                sumProduct      += x_diff * y_diff;
                sumDiffSquaredX += x_diff * x_diff;
            }

            double cos = 0;
            double sin = 1;

            if (sumDiffSquaredX > 1e-3)
            {
                // not a vertical line
                double angleRad = Math.Atan(sumProduct / sumDiffSquaredX); // -π/2 ≤ θ ≤ π/2
                cos = Math.Cos(angleRad);
                sin = Math.Sin(angleRad);
            }

            // Rotate the points to build the axis-aligned bounding box (AABB)
            var inverseRotation = new TransformationMatrix(
                cos, -sin, 0,
                sin, cos, 0,
                0, 0, 1);

            var transformedPoints = words.SelectMany(r => new[]
            {
                r.BoundingBox.BottomLeft,
                r.BoundingBox.BottomRight,
                r.BoundingBox.TopLeft,
                r.BoundingBox.TopRight
            }).Distinct().Select(p => inverseRotation.Transform(p));
            var aabb = new PdfRectangle(transformedPoints.Min(p => p.X),
                                        transformedPoints.Min(p => p.Y),
                                        transformedPoints.Max(p => p.X),
                                        transformedPoints.Max(p => p.Y));

            // Rotate back the AABB to obtain to oriented bounding box (OBB)
            var rotateBack = new TransformationMatrix(
                cos, sin, 0,
                -sin, cos, 0,
                0, 0, 1);

            // Candidates bounding boxes
            var obb  = rotateBack.Transform(aabb);
            var obb1 = new PdfRectangle(obb.BottomLeft, obb.TopLeft, obb.BottomRight, obb.TopRight);
            var obb2 = new PdfRectangle(obb.BottomRight, obb.BottomLeft, obb.TopRight, obb.TopLeft);
            var obb3 = new PdfRectangle(obb.TopRight, obb.BottomRight, obb.TopLeft, obb.BottomLeft);

            // Find the orientation of the OBB, using the baseline angle
            // Assumes word order is correct
            var firstWord = words[0];
            var lastWord  = words[words.Count - 1];

            var baseLineAngle = Distances.Angle(firstWord.BoundingBox.BottomLeft, lastWord.BoundingBox.BottomRight);

            double deltaAngle  = Math.Abs(Distances.BoundAngle180(obb.Rotation - baseLineAngle));
            double deltaAngle1 = Math.Abs(Distances.BoundAngle180(obb1.Rotation - baseLineAngle));

            if (deltaAngle1 < deltaAngle)
            {
                deltaAngle = deltaAngle1;
                obb        = obb1;
            }

            double deltaAngle2 = Math.Abs(Distances.BoundAngle180(obb2.Rotation - baseLineAngle));

            if (deltaAngle2 < deltaAngle)
            {
                deltaAngle = deltaAngle2;
                obb        = obb2;
            }

            double deltaAngle3 = Math.Abs(Distances.BoundAngle180(obb3.Rotation - baseLineAngle));

            if (deltaAngle3 < deltaAngle)
            {
                obb = obb3;
            }

            return(obb);
        }