/// <summary> /// Detects the tables in the page. /// </summary> /// <param name="page">The page where to detect the tables.</param> public List<TableRectangle> Detect(PageArea page) { List<Cell> cells = SpreadsheetExtractionAlgorithm.FindCells(page.HorizontalRulings, page.VerticalRulings); List<TableRectangle> tables = SpreadsheetExtractionAlgorithm.FindSpreadsheetsFromCells(cells.Cast<TableRectangle>().ToList()); // we want tables to be returned from top to bottom on the page Utils.Sort(tables, new TableRectangle.ILL_DEFINED_ORDER()); return tables; }
[Fact]//(Skip = "TODO")] public void TestFindSpreadsheetsFromCells() { var parse = UtilsForTesting.LoadCsvLines("Resources/csv/TestSpreadsheetExtractor-CELLS.csv"); List <Cell> cells = new List <Cell>(); foreach (var record in parse) { var top = double.Parse(record[0]); // top var left = double.Parse(record[1]); // left var width = double.Parse(record[2]); // width var height = double.Parse(record[3]); // height cells.Add(new Cell(new PdfRectangle(left, top, left + width, top + height))); } List <TableRectangle> expected = EXPECTED_RECTANGLES.ToList(); Utils.Sort(expected, new TableRectangle.ILL_DEFINED_ORDER()); List <TableRectangle> foundRectangles = SpreadsheetExtractionAlgorithm.FindSpreadsheetsFromCells(cells.Cast <TableRectangle>().ToList()); Utils.Sort(foundRectangles, new TableRectangle.ILL_DEFINED_ORDER()); Assert.Equal(foundRectangles, expected); }