private static IEnumerable <TextLine> GetLines(List <Word> words, double maxDist, AngleBounds withinLine)
        {
            TextDirection textDirection  = words[0].TextDirection;
            var           groupedIndexes = ClusteringAlgorithms.ClusterNearestNeighbours(words, Distances.Euclidean,
                                                                                         (pivot, candidate) => maxDist,
                                                                                         pivot => pivot.BoundingBox.BottomRight, candidate => candidate.BoundingBox.BottomLeft,
                                                                                         pivot => true,
                                                                                         (pivot, candidate) =>
            {
                // Compare bottom right with bottom left for angle
                var withinLineAngle = Distances.Angle(pivot.BoundingBox.BottomRight, candidate.BoundingBox.BottomLeft);

                return(withinLineAngle >= withinLine.Lower && withinLineAngle <= withinLine.Upper);
            }).ToList();

            Func <IEnumerable <Word>, IReadOnlyList <Word> > orderFunc = l => l.OrderBy(x => x.BoundingBox.Left).ToList();

            if (textDirection == TextDirection.Rotate180)
            {
                orderFunc = l => l.OrderByDescending(x => x.BoundingBox.Right).ToList();
            }
            else if (textDirection == TextDirection.Rotate90)
            {
                orderFunc = l => l.OrderByDescending(x => x.BoundingBox.Top).ToList();
            }
            else if (textDirection == TextDirection.Rotate270)
            {
                orderFunc = l => l.OrderBy(x => x.BoundingBox.Bottom).ToList();
            }

            for (var a = 0; a < groupedIndexes.Count; a++)
            {
                yield return(new TextLine(orderFunc(groupedIndexes[a].Select(i => words[i]))));
            }
        }
Example #2
0
        /// <summary>
        /// Get information on the nearest point, filtered for angle.
        /// </summary>
        /// <param name="words"></param>
        /// <param name="pivot"></param>
        /// <param name="funcPivotDist"></param>
        /// <param name="funcPivotAngle"></param>
        /// <param name="funcPointsDist"></param>
        /// <param name="funcPointsAngle"></param>
        /// <param name="angleStart"></param>
        /// <param name="angleEnd"></param>
        /// <param name="finalDistMEasure"></param>
        /// <returns></returns>
        private double[] GetNearestPointData(Word[] words, Word pivot, Func <PdfRectangle,
                                                                             PdfPoint> funcPivotDist, Func <PdfRectangle, PdfPoint> funcPivotAngle,
                                             Func <PdfRectangle, PdfPoint> funcPointsDist, Func <PdfRectangle, PdfPoint> funcPointsAngle,
                                             double angleStart, double angleEnd,
                                             Func <PdfPoint, PdfPoint, double> finalDistMEasure)
        {
            var pointR = funcPivotDist(pivot.BoundingBox);

            // Filter by angle
            var filtered = words.Where(w =>
            {
                var angleWL = Distances.Angle(funcPivotAngle(pivot.BoundingBox), funcPointsAngle(w.BoundingBox));
                return(angleWL >= angleStart && angleWL <= angleEnd);
            }).ToList();

            filtered.Remove(pivot); // remove itself

            if (filtered.Count > 0)
            {
                int index = pointR.FindIndexNearest(
                    filtered.Select(w => funcPointsDist(w.BoundingBox)).ToList(),
                    Distances.Euclidean, out double distWL);

                if (index >= 0)
                {
                    var matchWL = filtered[index];
                    return(new double[]
                    {
                        (double)pivot.Letters.Select(l => l.FontSize).Mode(),
                        finalDistMEasure(pointR, funcPointsDist(matchWL.BoundingBox))
                    });
                }
            }
            return(null);
        }
Example #3
0
        private PdfRectangle GetBoundingBoxOther(IReadOnlyList <TextLine> lines)
        {
            var points = lines.SelectMany(l => new[]
            {
                l.BoundingBox.BottomLeft,
                l.BoundingBox.BottomRight,
                l.BoundingBox.TopLeft,
                l.BoundingBox.TopRight
            });

            // Candidates bounding boxes
            var obb  = Geometry.GeometryExtensions.MinimumAreaRectangle(points);
            var obb1 = new PdfRectangle(obb.BottomLeft, obb.TopLeft, obb.BottomRight, obb.TopRight);
            var obb2 = new PdfRectangle(obb.BottomRight, obb.BottomLeft, obb.TopRight, obb.TopLeft);
            var obb3 = new PdfRectangle(obb.TopRight, obb.BottomRight, obb.TopLeft, obb.BottomLeft);

            // Find the orientation of the OBB, using the baseline angle
            // Assumes line order is correct
            var lastLine = lines[lines.Count - 1];

            var baseLineAngle = Distances.BoundAngle180(Distances.Angle(lastLine.BoundingBox.BottomLeft, lastLine.BoundingBox.BottomRight));

            double deltaAngle  = Math.Abs(Distances.BoundAngle180(obb.Rotation - baseLineAngle));
            double deltaAngle1 = Math.Abs(Distances.BoundAngle180(obb1.Rotation - baseLineAngle));

            if (deltaAngle1 < deltaAngle)
            {
                deltaAngle = deltaAngle1;
                obb        = obb1;
            }

            double deltaAngle2 = Math.Abs(Distances.BoundAngle180(obb2.Rotation - baseLineAngle));

            if (deltaAngle2 < deltaAngle)
            {
                deltaAngle = deltaAngle2;
                obb        = obb2;
            }

            double deltaAngle3 = Math.Abs(Distances.BoundAngle180(obb3.Rotation - baseLineAngle));

            if (deltaAngle3 < deltaAngle)
            {
                obb = obb3;
            }

            return(obb);
        }
        /// <summary>
        /// Get information on the nearest point, filtered for angle.
        /// </summary>
        private double?GetNearestPointDistance(List <Word> words, Word pivot, Func <PdfRectangle,
                                                                                    PdfPoint> funcPivotDist, Func <PdfRectangle, PdfPoint> funcPivotAngle,
                                               Func <PdfRectangle, PdfPoint> funcPointsDist, Func <PdfRectangle, PdfPoint> funcPointsAngle,
                                               AngleBounds angleBounds,
                                               Func <PdfPoint, PdfPoint, double> finalDistanceMeasure)
        {
            var pointR = funcPivotDist(pivot.BoundingBox);

            var pivotPoint = funcPivotAngle(pivot.BoundingBox);

            var wordsWithinAngleBoundDistancePoints = new List <PdfPoint>();

            // Filter to words within the angle range.
            foreach (var word in words)
            {
                // Ignore the pivot word.
                if (ReferenceEquals(word, pivot))
                {
                    continue;
                }

                var angle = Distances.Angle(pivotPoint, funcPointsAngle(word.BoundingBox));

                if (angleBounds.Contains(angle))
                {
                    wordsWithinAngleBoundDistancePoints.Add(funcPointsDist(word.BoundingBox));
                }
            }

            if (wordsWithinAngleBoundDistancePoints.Count == 0)
            {
                return(null);
            }

            var closestWordIndex = pointR.FindIndexNearest(wordsWithinAngleBoundDistancePoints, p => p,
                                                           p => p, Distances.Euclidean, out _);

            if (closestWordIndex < 0 || closestWordIndex >= wordsWithinAngleBoundDistancePoints.Count)
            {
                return(null);
            }

            return(finalDistanceMeasure(pointR, wordsWithinAngleBoundDistancePoints[closestWordIndex]));
        }
        /// <summary>
        /// Build lines via transitive closure.
        /// </summary>
        private static IEnumerable <TextLine> GetLines(List <Word> words, double maxDist, AngleBounds withinLine)
        {
            /***************************************************************************************************
            * /!\ WARNING: Given how FindIndexNearest() works, if 'maxDist' > 'word Width', the algo might not
            * work as the FindIndexNearest() function might pair the pivot with itself (the pivot's right point
            * (distance = width) is closer than other words' left point).
            * -> Solution would be to find more than one nearest neighbours. Use KDTree?
            ***************************************************************************************************/

            TextDirection textDirection  = words[0].TextDirection;
            var           groupedIndexes = ClusteringAlgorithms.SimpleTransitiveClosure(words, Distances.Euclidean,
                                                                                        (pivot, candidate) => maxDist,
                                                                                        pivot => pivot.BoundingBox.BottomRight, candidate => candidate.BoundingBox.BottomLeft,
                                                                                        pivot => true,
                                                                                        (pivot, candidate) =>
            {
                // Compare bottom right with bottom left for angle
                var withinLineAngle = Distances.Angle(pivot.BoundingBox.BottomRight, candidate.BoundingBox.BottomLeft);

                return(withinLineAngle >= withinLine.Lower && withinLineAngle <= withinLine.Upper);
            }).ToList();

            Func <IEnumerable <Word>, IReadOnlyList <Word> > orderFunc = l => l.OrderBy(x => x.BoundingBox.Left).ToList();

            if (textDirection == TextDirection.Rotate180)
            {
                orderFunc = l => l.OrderByDescending(x => x.BoundingBox.Right).ToList();
            }
            else if (textDirection == TextDirection.Rotate90)
            {
                orderFunc = l => l.OrderByDescending(x => x.BoundingBox.Top).ToList();
            }
            else if (textDirection == TextDirection.Rotate270)
            {
                orderFunc = l => l.OrderBy(x => x.BoundingBox.Bottom).ToList();
            }

            for (var a = 0; a < groupedIndexes.Count; a++)
            {
                yield return(new TextLine(orderFunc(groupedIndexes[a].Select(i => words[i]))));
            }
        }
Example #6
0
        private static PdfRectangle GetBoundingBoxOther(IReadOnlyList <Word> words)
        {
            var baseLinePoints = words.SelectMany(r => new[]
            {
                r.BoundingBox.BottomLeft,
                r.BoundingBox.BottomRight,
            }).ToList();

            // Fitting a line through the base lines points
            // to find the orientation (slope)
            double x0              = baseLinePoints.Average(p => p.X);
            double y0              = baseLinePoints.Average(p => p.Y);
            double sumProduct      = 0;
            double sumDiffSquaredX = 0;

            for (int i = 0; i < baseLinePoints.Count; i++)
            {
                var point  = baseLinePoints[i];
                var x_diff = point.X - x0;
                var y_diff = point.Y - y0;
                sumProduct      += x_diff * y_diff;
                sumDiffSquaredX += x_diff * x_diff;
            }

            double cos = 0;
            double sin = 1;

            if (sumDiffSquaredX > 1e-3)
            {
                // not a vertical line
                double angleRad = Math.Atan(sumProduct / sumDiffSquaredX); // -π/2 ≤ θ ≤ π/2
                cos = Math.Cos(angleRad);
                sin = Math.Sin(angleRad);
            }

            // Rotate the points to build the axis-aligned bounding box (AABB)
            var inverseRotation = new TransformationMatrix(
                cos, -sin, 0,
                sin, cos, 0,
                0, 0, 1);

            var transformedPoints = words.SelectMany(r => new[]
            {
                r.BoundingBox.BottomLeft,
                r.BoundingBox.BottomRight,
                r.BoundingBox.TopLeft,
                r.BoundingBox.TopRight
            }).Distinct().Select(p => inverseRotation.Transform(p));
            var aabb = new PdfRectangle(transformedPoints.Min(p => p.X),
                                        transformedPoints.Min(p => p.Y),
                                        transformedPoints.Max(p => p.X),
                                        transformedPoints.Max(p => p.Y));

            // Rotate back the AABB to obtain to oriented bounding box (OBB)
            var rotateBack = new TransformationMatrix(
                cos, sin, 0,
                -sin, cos, 0,
                0, 0, 1);

            // Candidates bounding boxes
            var obb  = rotateBack.Transform(aabb);
            var obb1 = new PdfRectangle(obb.BottomLeft, obb.TopLeft, obb.BottomRight, obb.TopRight);
            var obb2 = new PdfRectangle(obb.BottomRight, obb.BottomLeft, obb.TopRight, obb.TopLeft);
            var obb3 = new PdfRectangle(obb.TopRight, obb.BottomRight, obb.TopLeft, obb.BottomLeft);

            // Find the orientation of the OBB, using the baseline angle
            // Assumes word order is correct
            var firstWord = words[0];
            var lastWord  = words[words.Count - 1];

            var baseLineAngle = Distances.Angle(firstWord.BoundingBox.BottomLeft, lastWord.BoundingBox.BottomRight);

            double deltaAngle  = Math.Abs(Distances.BoundAngle180(obb.Rotation - baseLineAngle));
            double deltaAngle1 = Math.Abs(Distances.BoundAngle180(obb1.Rotation - baseLineAngle));

            if (deltaAngle1 < deltaAngle)
            {
                deltaAngle = deltaAngle1;
                obb        = obb1;
            }

            double deltaAngle2 = Math.Abs(Distances.BoundAngle180(obb2.Rotation - baseLineAngle));

            if (deltaAngle2 < deltaAngle)
            {
                deltaAngle = deltaAngle2;
                obb        = obb2;
            }

            double deltaAngle3 = Math.Abs(Distances.BoundAngle180(obb3.Rotation - baseLineAngle));

            if (deltaAngle3 < deltaAngle)
            {
                obb = obb3;
            }

            return(obb);
        }