Example #1
0
        public void TestCompareVerticalOverlapRectangle()
        {
            TableRectangle lower = new TableRectangle(new PdfRectangle(0, 0, 10, 5));            //5f, 0f, 10f, 10f);
            TableRectangle upper = new TableRectangle(new PdfRectangle(10, 0, 20, 10));          //0f, 10f, 10f, 10f);

            Assert.True(lower.CompareTo(upper) < 0);
        }
Example #2
0
        public void TestCompareAlignedVerticalRectangle()
        {
            TableRectangle lower = new TableRectangle(new PdfRectangle(0, 10, 10, 20)); //10f, 0f, 10f, 10f);
            TableRectangle upper = new TableRectangle(new PdfRectangle(0, 20, 10, 30)); //20f, 0f, 10f, 10f);

            Assert.True(lower.CompareTo(upper) > 0);                                    // upper precedes lower (reading order) // was < 0
        }
Example #3
0
        public void TestCompareVerticalOverlapLessThresholdRectangle()
        {
            TableRectangle lower = new TableRectangle(new PdfRectangle(10, 0, 20, 10));    //0f, 10f, 10f, 10f);
            TableRectangle upper = new TableRectangle(new PdfRectangle(0, 9.8, 10, 19.8)); //9.8f, 0f, 10f, 10f);

            Assert.True(lower.CompareTo(upper) > 0);                                       // upper precedes lower (reading order) // was < 0
        }
Example #4
0
        public void TestCompareAlignedHorizontalRectangle()
        {
            TableRectangle lower = new TableRectangle(new PdfRectangle(10, 0, 20, 10));            //0f, 10f, 10f, 10f));
            TableRectangle upper = new TableRectangle(new PdfRectangle(20, 0, 30, 10));            //0f, 20f, 10f, 10f));

            Assert.True(lower.CompareTo(upper) < 0);
        }
Example #5
0
        public void TestQuickSortRectangleList()
        {
            // Testing wrong sorting
            // Expected: AARON, JOSHUA, N
            // but was: AARON JOSHUA N , ,
            TableRectangle first = new TableRectangle(new PdfRectangle(51.47999954223633, 172.92999267578125, 51.47999954223633 + 4.0, 172.92999267578125 + 4.309999942779541));             // 172.92999267578125f, 51.47999954223633f, 4.0f, 4.309999942779541f); //A

            Assert.Equal(4, first.Width);
            Assert.Equal(4.309999942779541, first.Height);

            TableRectangle second = new TableRectangle(new PdfRectangle(72.72000122070312, 175.72000122070312, 72.72000122070312 + 1.6699999570846558, 175.72000122070312 + 1.5199999809265137));             //175.72000122070312f, 72.72000122070312f, 1.6699999570846558f, 1.5199999809265137f); //,

            Assert.Equal(1.6699999570846558, second.Width);
            Assert.Equal(1.5199999809265137, second.Height);

            TableRectangle third = new TableRectangle(new PdfRectangle(96.36000061035156, 172.92999267578125, 96.36000061035156 + 4.0, 172.92999267578125 + 4.309999942779541));             //172.92999267578125f, 96.36000061035156f, 4.0f, 4.309999942779541f); //A

            Assert.Equal(4.0, third.Width);
            Assert.Equal(4.309999942779541, third.Height);

            TableRectangle fourth = new TableRectangle(new PdfRectangle(100.31999969482422, 175.72000122070312, 100.31999969482422 + 1.6699999570846558, 175.72000122070312 + 1.5199999809265137));             //175.72000122070312f, 100.31999969482422f, 1.6699999570846558f, 1.5199999809265137f); //,

            Assert.Equal(1.6699999570846558, fourth.Width);
            Assert.Equal(1.5199999809265137, fourth.Height);

            TableRectangle fifth = new TableRectangle(new PdfRectangle(103.68000030517578, 172.92999267578125, 103.68000030517578 + 4.329999923706055, 172.92999267578125 + 4.309999942779541));             //172.92999267578125f, 103.68000030517578f, 4.329999923706055f, 4.309999942779541f); //N

            Assert.Equal(4.329999923706055, fifth.Width);
            Assert.Equal(4.309999942779541, fifth.Height);

            TableRectangle sixth = new TableRectangle(new PdfRectangle(161.16000366210938, 169.2100067138672, 161.16000366210938 + 4.329999923706055, 169.2100067138672 + 4.309999942779541));             //169.2100067138672f, 161.16000366210938f, 4.329999923706055f, 4.309999942779541f); //R

            Assert.Equal(4.329999923706055, sixth.Width);
            Assert.Equal(4.309999942779541, sixth.Height);

            List <TableRectangle> expectedList = new List <TableRectangle>
            {
                first,
                //sixth,
                second,
                third,
                fourth,
                fifth,
                sixth,                 // put here, follows reading order
            };

            List <TableRectangle> toSortList = new List <TableRectangle>
            {
                sixth,
                second,
                third,
                fifth,
                first,
                fourth
            };

            Utils.Sort(toSortList, new TableRectangle.ILL_DEFINED_ORDER());             //Collections.sort(toSortList, TableRectangle.ILL_DEFINED_ORDER);
            Assert.Equal(expectedList, toSortList);
        }
Example #6
0
        public void TestGetHorizontalOverlapShouldReturnZero()
        {
            TableRectangle one = new TableRectangle(new PdfRectangle(0, 0, 10, 10));             //0f, 0f, 10f, 10f);
            TableRectangle two = new TableRectangle(new PdfRectangle(10, 10, 20, 20));           //10f, 10f, 10f, 10f);

            Assert.True(!one.HorizontallyOverlaps(two));
            Assert.Equal(0f, one.OverlapRatio(two), 0);
        }
Example #7
0
        public void TestCompareEqualsRectangles()
        {
            TableRectangle first  = new TableRectangle();
            TableRectangle second = new TableRectangle();

            Assert.True(first.Equals(second));
            Assert.True(second.Equals(first));
        }
Example #8
0
        public void TestBoundsOfTwoRulings()
        {
            TableRectangle r = new TableRectangle(Utils.Bounds(RULINGS)); //RULINGS.ToList();

            Assert.Equal(0, r.MinX, 0);
            Assert.Equal(0, r.MinY, 0);
            Assert.Equal(3, r.Width, 0);
            Assert.Equal(3, r.Height, 0);
        }
Example #9
0
        public void TestBoundsOfOneRectangle()
        {
            List <TableRectangle> shapes = new List <TableRectangle>
            {
                new TableRectangle(new PdfRectangle(0, 0, 20, 40))
            };
            TableRectangle r = Utils.Bounds(shapes);

            Assert.Equal(r, shapes[0]);
        }
Example #10
0
        public void TestIntersects()
        {
            TableRectangle r = new TableRectangle(new PdfRectangle());

            RectangleSpatialIndex <TableRectangle> rSpatialIndex = new RectangleSpatialIndex <TableRectangle>();

            rSpatialIndex.Add(r);

            Assert.True(rSpatialIndex.Intersects(r).Count > 0);
        }
Example #11
0
        public void TestGetOverlapShouldReturnMoreThanZero()
        {
            TableRectangle one = new TableRectangle(new PdfRectangle(0, 0, 10, 10));             // 0f, 0f, 10f, 10f);
            TableRectangle two = new TableRectangle(new PdfRectangle(5, 5, 15, 15));             //5f, 5f, 10f, 10f);

            Assert.True(one.HorizontallyOverlaps(two));
            Assert.True(one.VerticallyOverlaps(two));
            Assert.Equal(5f, one.HorizontalOverlap(two), 0);
            Assert.Equal(5f, one.VerticalOverlap(two), 0);
            Assert.Equal(25f / 175, one.OverlapRatio(two), 0);
        }
Example #12
0
        public void TestGetVerticalOverlapShouldReturnMoreThanZero()
        {
            TableRectangle lower = new TableRectangle(new PdfRectangle(10, 15, 20, 25));            //15f, 10f, 10f, 10f);
            TableRectangle upper = new TableRectangle(new PdfRectangle(0, 20, 10, 30));             //20f, 0f, 10f, 10f);

            double overlap = lower.VerticalOverlap(upper);

            Assert.Equal(5, overlap, 0);
            Assert.True(lower.VerticallyOverlaps(upper));
            Assert.Equal(0.5, lower.VerticalOverlapRatio(upper), 0);
            Assert.Equal(0, lower.OverlapRatio(upper), 0);
        }
Example #13
0
        public void TestGetBoundingBox()
        {
            List <TableRectangle> rectangles = new List <TableRectangle>
            {
                new TableRectangle(new PdfRectangle(0, 0, 10, 10)),  //0f, 0f, 10f, 10f)
                new TableRectangle(new PdfRectangle(30, 10, 40, 20)) //20f, 30f, 10f, 10f)
            };

            TableRectangle boundingBoxOf = TableRectangle.BoundingBoxOf(rectangles);

            Assert.Equal(new TableRectangle(new PdfRectangle(0, 0, 40, 20)), boundingBoxOf);             // 0f, 0f, 40f, 30f)
        }
Example #14
0
        public void TestMergeOverlappingRectangles()
        {
            TableRectangle one = new TableRectangle(new PdfRectangle(0, 0, 10, 10));            //0f, 0f, 10f, 10f);
            TableRectangle two = new TableRectangle(new PdfRectangle(5, 5, 15, 15));            //5f, 5f, 10f, 10f);

            one.Merge(two);

            Assert.Equal(15f, one.Width, 0);
            Assert.Equal(15f, one.Height, 0);
            Assert.Equal(0f, one.Left, 0);
            Assert.Equal(0f, one.Bottom, 0);             // one.getTop()
            Assert.Equal(15, one.Top, 0);
        }
Example #15
0
        public void TestMergeNoOverlappingRectangles()
        {
            TableRectangle one = new TableRectangle(new PdfRectangle(0, 0, 10, 10));             //0f, 0f, 10f, 10f);
            TableRectangle two = new TableRectangle(new PdfRectangle(10, 0, 20, 10));            //0f, 10f, 10f, 10f);

            one.Merge(two);

            Assert.Equal(20f, one.Width, 0);
            Assert.Equal(10f, one.Height, 0);
            Assert.Equal(0f, one.Left, 0);
            Assert.Equal(10, one.Top, 0);                  //0f, one.getTop(), 0);
            Assert.Equal(0, one.Bottom, 0);                //10f, one.getBottom(), 0);
            Assert.Equal(20f * 10f, one.Area, 0);
        }
Example #16
0
        public void TestQuickSortOneUpperThanOther()
        {
            TableRectangle lower = new TableRectangle(new PdfRectangle(72.72, 175.72, 72.72 + 1.67, 175.72 + 1.52));             //175.72f, 72.72f, 1.67f, 1.52f); //, (Comma after AARON)

            Assert.Equal(1.67, lower.Width, 2);
            Assert.Equal(1.52, lower.Height, 2);

            TableRectangle upper = new TableRectangle(new PdfRectangle(161.16, 169.21, 161.16 + 4.33, 169.21 + 4.31));            //169.21f, 161.16f, 4.33f, 4.31f); // R (REGIONAL PULMONARY)

            Assert.Equal(4.33, upper.Width, 2);
            Assert.Equal(4.31, upper.Height, 2);

            Assert.True(lower.CompareTo(upper) < 0);             // > 0
        }
Example #17
0
        [Fact]        //(Skip = "Comparison is not transitive. Transitivity needs to be implemented.")]
        public void TestTransitiveComparison1()
        {
            // +-------+
            // |       |
            // |   a   | +-------+
            // |       | |       |
            // +-------+ |   b   | +-------+
            //           |       | |       |
            //           +-------+ |   c   |
            //                     |       |
            //                     +-------+
            TableRectangle a = new TableRectangle(new PdfRectangle(0, 2, 2, 4));
            TableRectangle b = new TableRectangle(new PdfRectangle(1, 1, 3, 3));
            TableRectangle c = new TableRectangle(new PdfRectangle(2, 0, 4, 2));

            Assert.True(a.CompareTo(b) < 0);
            Assert.True(b.CompareTo(c) < 0);
            Assert.True(a.CompareTo(c) < 0);
        }
Example #18
0
        public void TestRectangleGetPoints()
        {
            TableRectangle one = new TableRectangle(new PdfRectangle(20, 10, 50, 50));             //10f, 20f, 30f, 40f);

            Assert.Equal(30, one.Width);
            Assert.Equal(40, one.Height);

            PdfPoint[] points = one.Points;

            PdfPoint[] expectedPoints = new PdfPoint[]
            {
                new PdfPoint(20, 10),
                new PdfPoint(50, 10),
                new PdfPoint(50, 50),
                new PdfPoint(20, 50)
            };

            Assert.Equal(expectedPoints, points);
        }
Example #19
0
        public void TestTransitiveComparison2()
        {
            // need to rewrite

            //                     +-------+
            //                     |       |
            //           +-------+ |   C   |
            //           |       | |       |
            // +-------+ |   B   | +-------+
            // |       | |       |
            // |   A   | +-------+
            // |       |
            // +-------+
            TableRectangle c = new TableRectangle(new PdfRectangle(0, 2, 2, 4));             // 2, 0, 2, 2); // a
            TableRectangle b = new TableRectangle(new PdfRectangle(1, 1, 3, 3));             // 1, 1, 2, 2);
            TableRectangle a = new TableRectangle(new PdfRectangle(2, 0, 4, 2));             // 0, 2, 2, 2); // c

            Assert.True(a.CompareTo(b) < 0);
            Assert.True(b.CompareTo(c) < 0);
            Assert.True(a.CompareTo(c) < 0);
        }
Example #20
0
        public void TestNaturalOrderOfRectanglesOneMoreTime()
        {
            var parse = UtilsForTesting.LoadCsvLines("Resources/csv/TestBasicExtractor-RECTANGLE_TEST_NATURAL_ORDER.csv");
            List <TableRectangle> rectangles = new List <TableRectangle>();

            foreach (var record in parse)
            {
                var    top  = double.Parse(record[0]);
                var    left = double.Parse(record[1]);
                double w    = double.Parse(record[2]);
                double h    = double.Parse(record[3]);

                rectangles.Add(new TableRectangle(new PdfRectangle(left, top, left + w, top + h)));
            }

            Utils.Sort(rectangles, new TableRectangle.ILL_DEFINED_ORDER());

            for (int i = 0; i < rectangles.Count - 1; i++)
            {
                TableRectangle rectangle     = rectangles[i];
                TableRectangle nextRectangle = rectangles[i + 1];
                Assert.True(rectangle.CompareTo(nextRectangle) < 0);
            }
        }
Example #21
0
        private TableRectangle getTableFromText(List <TableLine> lines, List <TextEdge> relevantEdges, int relevantEdgeCount, List <Ruling> horizontalRulings)
        {
            TableRectangle table = new TableRectangle();

            TableLine prevRow       = null;
            TableLine firstTableRow = null;
            TableLine lastTableRow  = null;

            int    tableSpaceCount = 0;
            double totalRowSpacing = 0;

            // go through the lines and find the ones that have the correct count of the relevant edges
            foreach (TableLine textRow in lines)
            {
                int numRelevantEdges = 0;

                if (firstTableRow != null && tableSpaceCount > 0)
                {
                    // check to make sure this text row is within a line or so of the other lines already added
                    // if it's not, we should stop the table here
                    double tableLineThreshold = (totalRowSpacing / tableSpaceCount) * 2.5;
                    double lineDistance       = prevRow.Bottom - textRow.Bottom; // bobld: textRow.Top - prevRow.Top

                    System.Diagnostics.Debug.Assert(lineDistance >= 0);

                    if (lineDistance > tableLineThreshold)
                    {
                        lastTableRow = prevRow;
                        break;
                    }
                }

                // for larger tables, be a little lenient on the number of relevant rows the text intersects
                // for smaller tables, not so much - otherwise we'll end up treating paragraphs as tables too
                int relativeEdgeDifferenceThreshold = 1;
                if (relevantEdgeCount <= 3)
                {
                    relativeEdgeDifferenceThreshold = 0;
                }

                foreach (TextEdge edge in relevantEdges)
                {
                    if (textRow.IntersectsLine(edge.Line))
                    {
                        numRelevantEdges++;
                    }
                }

                // see if we have a candidate text row
                if (numRelevantEdges >= (relevantEdgeCount - relativeEdgeDifferenceThreshold))
                {
                    // keep track of table row spacing
                    if (prevRow != null && firstTableRow != null)
                    {
                        tableSpaceCount++;
                        totalRowSpacing += prevRow.Bottom - textRow.Bottom; // bobld: textRow.Top - prevRow.Top
                    }

                    // row is part of a table
                    if (table.Area == 0)
                    {
                        firstTableRow = textRow;
                        table.SetRect(textRow);
                    }
                    else
                    {
                        table.SetLeft(Math.Min(table.Left, textRow.Left));
                        table.SetBottom(Math.Min(table.Bottom, textRow.Bottom)); // bobld: Max
                        table.SetRight(Math.Max(table.Right, textRow.Right));
                    }
                }
                else
                {
                    // no dice
                    // if we're at the end of the table, save the last row
                    if (firstTableRow != null && lastTableRow == null)
                    {
                        lastTableRow = prevRow;
                    }
                }

                prevRow = textRow;
            }

            // if we don't have a table now, we won't after the next step either
            if (table.Area == 0)
            {
                return(null);
            }

            if (lastTableRow == null)
            {
                // takes care of one-row tables or tables that end at the bottom of a page
                lastTableRow = prevRow;
            }

            // use the average row height and nearby horizontal lines to extend the table area
            double avgRowHeight;

            if (tableSpaceCount > 0)
            {
                System.Diagnostics.Debug.Assert(totalRowSpacing >= 0);
                avgRowHeight = totalRowSpacing / tableSpaceCount;
            }
            else
            {
                avgRowHeight = lastTableRow.Height;
            }

            double rowHeightThreshold = avgRowHeight * 1.5;

            // check lines after the bottom of the table
            //foreach (Ruling ruling in sortedHorizontalRulings) //Line2D.Float
            for (int i = horizontalRulings.Count - 1; i >= 0; i--) // reverse order
            {
                var ruling = horizontalRulings[i];
                if (ruling.Y1 > table.Bottom) // bobld: <
                {
                    continue;
                }

                double distanceFromTable = table.Bottom - ruling.Y2; // bobld: Y1
                System.Diagnostics.Debug.Assert(distanceFromTable >= 0);
                if (distanceFromTable <= rowHeightThreshold)
                {
                    // use this ruling to help define the table
                    table.SetBottom(Math.Min(table.Bottom, ruling.Y2));  // bobld: Max Y1
                    table.SetLeft(Math.Min(table.Left, ruling.X1));
                    table.SetRight(Math.Max(table.Right, ruling.X2));
                }
                else
                {
                    // no use checking any further
                    break;
                }
            }

            // do the same for lines at the top, but make the threshold greater since table headings tend to be
            // larger to fit up to three-ish rows of text (at least but we don't want to grab too much)
            rowHeightThreshold = avgRowHeight * 3.8;

            //for (int i = horizontalRulings.Count - 1; i >= 0; i--)
            for (int i = 0; i < horizontalRulings.Count; i++)
            {
                Ruling ruling = horizontalRulings[i];

                if (ruling.Y1 < table.Top) //bobld: >
                {
                    continue;
                }

                double distanceFromTable = ruling.Y1 - table.Top; // bobld: table.Top - ruling.Y1
                System.Diagnostics.Debug.Assert(distanceFromTable >= 0);
                if (distanceFromTable <= rowHeightThreshold)
                {
                    table.SetTop(Math.Max(table.Top, ruling.Y2));  // bobld: Min Y1
                    table.SetLeft(Math.Min(table.Left, ruling.X1));
                    table.SetRight(Math.Max(table.Right, ruling.X2));
                }
                else
                {
                    break;
                }
            }

            // add a bit of padding since the halved horizontal lines are a little fuzzy anyways
            table.SetTop(Math.Ceiling(table.Top) + TABLE_PADDING_AMOUNT);       // bobld: Floor -
            table.SetBottom(Math.Floor(table.Bottom) - TABLE_PADDING_AMOUNT);   // bobld: Ceiling +
            table.SetLeft(Math.Floor(table.Left) - TABLE_PADDING_AMOUNT);
            table.SetRight(Math.Ceiling(table.Right) + TABLE_PADDING_AMOUNT);

            return(table);
        }
Example #22
0
        public void TestBoundsOfOneEmptyRectangleAndAnotherNonEmpty()
        {
            TableRectangle r = Utils.Bounds(RECTANGLES.ToList());

            Assert.Equal(r, RECTANGLES[1]);
        }
Example #23
0
        /// <summary>
        /// Gets columns positions.
        /// </summary>
        /// <param name="lines">Must be an array of lines sorted by their +top+ attribute.</param>
        /// <returns>a list of column boundaries (x axis).</returns>
        public static List <double> ColumnPositions(IReadOnlyList <TableLine> lines)
        {
            List <TableRectangle> regions = new List <TableRectangle>();

            foreach (TextChunk tc in lines[0].TextElements)
            {
                if (tc.IsSameChar(TableLine.WHITE_SPACE_CHARS))
                {
                    continue;
                }
                TableRectangle r = new TableRectangle();
                r.SetRect(tc);
                regions.Add(r);
            }

            foreach (TableLine l in lines.SubList(1, lines.Count))
            {
                List <TextChunk> lineTextElements = new List <TextChunk>();
                foreach (TextChunk tc in l.TextElements)
                {
                    if (!tc.IsSameChar(TableLine.WHITE_SPACE_CHARS))
                    {
                        lineTextElements.Add(tc);
                    }
                }

                foreach (TableRectangle cr in regions)
                {
                    List <TextChunk> overlaps = new List <TextChunk>();
                    foreach (TextChunk te in lineTextElements)
                    {
                        if (cr.HorizontallyOverlaps(te))
                        {
                            overlaps.Add(te);
                        }
                    }

                    foreach (TextChunk te in overlaps)
                    {
                        cr.Merge(te);
                    }

                    foreach (var rem in overlaps)
                    {
                        lineTextElements.Remove(rem);
                    }
                }

                // added by bobld
                // We need more checks here

                /*
                 * foreach (TextChunk te in lineTextElements)
                 * {
                 *  TableRectangle r = new TableRectangle();
                 *  r.setRect(te);
                 *  regions.Add(r);
                 * }
                 */

                if (lineTextElements.Count > 0)
                {
                    // because testExtractColumnsCorrectly3() fails
                    // need to check here if the remaining te in lineTextElements do overlap among themselves
                    // might happen with multiline cell
                    TableRectangle r = new TableRectangle();
                    r.SetRect(lineTextElements[0]);
                    foreach (var rem in lineTextElements.SubList(1, lineTextElements.Count))
                    {
                        if (r.HorizontallyOverlaps(rem))
                        {
                            // they overlap!
                            // so this is multiline cell
                            r.Merge(rem);
                        }
                        else
                        {
                            regions.Add(r); // do not overlap (anymore), so add it
                            r = new TableRectangle();
                            r.SetRect(rem);
                            //regions.Add(r);
                        }
                    }
                    regions.Add(r);
                }
                // end added
            }

            List <double> rv = new List <double>();

            foreach (TableRectangle r in regions)
            {
                rv.Add(r.Right);
            }

            rv.Sort(); //Collections.sort(rv);

            return(rv);
        }
Example #24
0
        /// <summary>
        /// Detects the tables in the page.
        /// </summary>
        /// <param name="page"></param>
        public List <TableRectangle> Detect(PageArea page)
        {
            // get horizontal & vertical lines
            // we get these from an image of the PDF and not the PDF itself because sometimes there are invisible PDF
            // instructions that are interpreted incorrectly as visible elements - we really want to capture what a
            // person sees when they look at the PDF
            // BobLd: hack here, we don't convert to an image
            var           pageRulings       = page.GetRulings();
            List <Ruling> horizontalRulings = this.getHorizontalRulings(pageRulings);
            List <Ruling> verticalRulings   = this.getVerticalRulings(pageRulings);
            // end hack here

            List <Ruling> allEdges = new List <Ruling>(horizontalRulings);

            allEdges.AddRange(verticalRulings);

            List <TableRectangle> tableAreas = new List <TableRectangle>();

            // if we found some edges, try to find some tables based on them
            if (allEdges.Count > 0)
            {
                // now we need to snap edge endpoints to a grid
                Utils.SnapPoints(allEdges, POINT_SNAP_DISTANCE_THRESHOLD, POINT_SNAP_DISTANCE_THRESHOLD);

                // normalize the rulings to make sure snapping didn't create any wacky non-horizontal/vertical rulings
                foreach (List <Ruling> rulings in new[] { horizontalRulings, verticalRulings }) //Arrays.asList(horizontalRulings, verticalRulings))
                {
                    //for (Iterator<Ruling> iterator = rulings.iterator(); iterator.hasNext();)
                    foreach (var ruling in rulings.ToList()) // use ToList to be able to remove
                    {
                        ruling.Normalize();
                        if (ruling.IsOblique)
                        {
                            rulings.Remove(ruling);
                        }
                    }
                }

                // merge the edge lines into rulings - this makes finding edges between crossing points in the next step easier
                // we use a larger pixel expansion than the normal spreadsheet extraction method to cover gaps in the
                // edge detection/pixel snapping steps
                horizontalRulings = Ruling.CollapseOrientedRulings(horizontalRulings, 5);
                verticalRulings   = Ruling.CollapseOrientedRulings(verticalRulings, 5);

                // use the rulings and points to find cells
                List <TableRectangle> cells = SpreadsheetExtractionAlgorithm.FindCells(horizontalRulings, verticalRulings).Cast <TableRectangle>().ToList();

                // then use those cells to make table areas
                tableAreas = getTableAreasFromCells(cells);
            }

            // next find any vertical rulings that intersect tables - sometimes these won't have completely been captured as
            // cells if there are missing horizontal lines (which there often are)
            // let's assume though that these lines should be part of the table
            foreach (Ruling verticalRuling in verticalRulings) // Line2D.Float
            {
                foreach (TableRectangle tableArea in tableAreas)
                {
                    if (verticalRuling.Intersects(tableArea) &&
                        !(tableArea.Contains(verticalRuling.P1) && tableArea.Contains(verticalRuling.P2)))
                    {
                        tableArea.SetTop(Math.Ceiling(Math.Max(tableArea.Top, verticalRuling.Y2)));     // bobld: Floor and Min, Y1
                        tableArea.SetBottom(Math.Floor(Math.Min(tableArea.Bottom, verticalRuling.Y1))); // bobld: Ceiling and Max, Y2
                        break;
                    }
                }
            }

            /* BobLd: not sure this is the case in tabula-sharp/PdfPig
             * // the tabula Page coordinate space is half the size of the PDFBox image coordinate space
             * // so halve the table area size before proceeding and add a bit of padding to make sure we capture everything
             * foreach (TableRectangle area in tableAreas)
             * {
             *  area.x = (float)Math.floor(area.x / 2) - TABLE_PADDING_AMOUNT;
             *  area.y = (float)Math.floor(area.y / 2) - TABLE_PADDING_AMOUNT;
             *  area.width = (float)Math.ceil(area.width / 2) + TABLE_PADDING_AMOUNT;
             *  area.height = (float)Math.ceil(area.height / 2) + TABLE_PADDING_AMOUNT;
             * }
             *
             * // we're going to want halved horizontal lines later too
             * foreach (Ruling ruling in horizontalRulings) // Line2D.Float
             * {
             *  ruling.x1 = ruling.x1 / 2;
             *  ruling.y1 = ruling.y1 / 2;
             *  ruling.x2 = ruling.x2 / 2;
             *  ruling.y2 = ruling.y2 / 2;
             * }
             */

            // now look at text rows to help us find more tables and flesh out existing ones
            List <TextChunk> textChunks = TextElement.MergeWords(page.GetText());
            List <TableLine> lines      = TextChunk.GroupByLines(textChunks);

            // first look for text rows that intersect an existing table - those lines should probably be part of the table
            foreach (TableLine textRow in lines)
            {
                foreach (TableRectangle tableArea in tableAreas)
                {
                    if (!tableArea.Contains(textRow) && textRow.Intersects(tableArea))
                    {
                        tableArea.SetLeft(Math.Floor(Math.Min(textRow.Left, tableArea.Left)));
                        tableArea.SetRight(Math.Ceiling(Math.Max(textRow.Right, tableArea.Right)));
                    }
                }
            }

            // get rid of tables that DO NOT intersect any text areas - these are likely graphs or some sort of graphic
            //for (Iterator<Rectangle> iterator = tableAreas.iterator(); iterator.hasNext();)
            foreach (TableRectangle table in tableAreas.ToList()) // use tolist to be able to remove
            {
                bool intersectsText = false;
                foreach (TableLine textRow in lines)
                {
                    if (table.Intersects(textRow))
                    {
                        intersectsText = true;
                        break;
                    }
                }

                if (!intersectsText)
                {
                    tableAreas.Remove(table);
                }
            }

            // lastly, there may be some tables that don't have any vertical rulings at all
            // we'll use text edges we've found to try and guess which text rows are part of a table

            // in his thesis nurminen goes through every row to try to assign a probability that the line is in a table
            // we're going to try a general heuristic instead, trying to find what type of edge (left/right/mid) intersects
            // the most text rows, and then use that magic number of "relevant" edges to decide what text rows should be
            // part of a table.

            bool foundTable;

            do
            {
                foundTable = false;

                // get rid of any text lines contained within existing tables, this allows us to find more tables
                //for (Iterator<TableLine> iterator = lines.iterator(); iterator.hasNext();)
                foreach (var textRow in lines.ToList())
                {
                    foreach (TableRectangle table in tableAreas)
                    {
                        if (table.Contains(textRow))
                        {
                            lines.Remove(textRow);
                            break;
                        }
                    }
                }

                // get text edges from remaining lines in the document
                TextEdges textEdges = getTextEdges(lines);
                //List<TextEdge> leftTextEdges = textEdges[TextEdge.LEFT];
                //List<TextEdge> midTextEdges = textEdges[TextEdge.MID];
                //List<TextEdge> rightTextEdges = textEdges[TextEdge.RIGHT];

                // find the relevant text edges (the ones we think define where a table is)
                RelevantEdges relevantEdgeInfo = getRelevantEdges(textEdges, lines);

                // we found something relevant so let's look for rows that fit our criteria
                if (relevantEdgeInfo.edgeType != -1)
                {
                    List <TextEdge> relevantEdges = null;
                    switch (relevantEdgeInfo.edgeType)
                    {
                    case TextEdge.LEFT:
                        relevantEdges = textEdges[TextEdge.LEFT];       // leftTextEdges;
                        break;

                    case TextEdge.MID:
                        relevantEdges = textEdges[TextEdge.MID];        // midTextEdges;
                        break;

                    case TextEdge.RIGHT:
                        relevantEdges = textEdges[TextEdge.RIGHT];      // rightTextEdges;
                        break;
                    }

                    TableRectangle table = getTableFromText(lines, relevantEdges, relevantEdgeInfo.edgeCount, horizontalRulings);

                    if (table != null)
                    {
                        foundTable = true;
                        tableAreas.Add(table);
                    }
                }
            } while (foundTable);

            // create a set of our current tables that will eliminate duplicate tables
            SortedSet <TableRectangle> tableSet = new SortedSet <TableRectangle>(new TreeSetComparer()); //Set<Rectangle> tableSet = new TreeSet<>(new Comparator<Rectangle>() {...

            foreach (var table in tableAreas.OrderByDescending(t => t.Area))
            {
                tableSet.Add(table);
            }

            return(tableSet.ToList());
        }