/// <summary> /// Joins the horizontal and vertical lines. /// </summary> /// <param name="allLines">All the lines.</param> /// <returns>The orizontal and the vertical lines (eventually joined)</returns> private List <Line> JoinLines(List <Line> allLines) { JoinedVerticalLines = JoinVerticalLines(allLines); JoinedHorizontalLines = JoinHorizontalLines(allLines); return(JoinedHorizontalLines.Union(JoinedVerticalLines).ToList()); }
/// <summary> /// Determines the table structures. /// </summary> public void DetermineTableStructures() { JoinedLines = JoinLines(AllLines); // Find table borders foreach (Line horizontalLine in JoinedHorizontalLines.OrderBy(_ => _.StartPoint.Y)) { // We consider that this line is a top line of a table if // 1. There is not a table with this line inside // 2. There is a vertical line starting from this line if (Tables.Any(_ => _.Contains(horizontalLine.StartPoint.Y))) { continue; } Line?tableLine = JoinedVerticalLines .Where(_ => _.StartPoint == horizontalLine.StartPoint || _.StartPoint == horizontalLine.EndPoint) .OrderByDescending(_ => _.EndPoint.Y - _.StartPoint.Y) .Cast <Line?>() .FirstOrDefault(); if (tableLine == null) { continue; } Table tableStructure = new Table() { TopLeftPoint = horizontalLine.StartPoint, BottomRightPoint = new Point(horizontalLine.EndPoint.X, tableLine.Value.EndPoint.Y) }; Tables.Add(tableStructure); } // Add the first row and the first column to all tables foreach (Table tableStructure in Tables) { tableStructure.Rows.Add(new Row() { BeginY = tableStructure.TopLeftPoint.Y }); tableStructure.Columns.Add(new Column() { BeginX = tableStructure.TopLeftPoint.X }); } // Find rows foreach (Line horizontalLine in JoinedHorizontalLines.OrderBy(_ => _.StartPoint.Y)) { var tableStructure = Tables.FirstOrDefault(_ => _.Contains(horizontalLine)); // No table contains this line if (tableStructure == null) { continue; } // Check if the row already belongs to the table if (tableStructure.Rows.Any(_ => Math.Abs(_.BeginY - horizontalLine.StartPoint.Y) < ContentExtractor.Tolerance)) { continue; } // Check if the row is the bottom edge of the table if (tableStructure.BottomRightPoint.Y - horizontalLine.StartPoint.Y < ContentExtractor.Tolerance) { continue; } tableStructure.Rows.Add(new Row() { BeginY = horizontalLine.StartPoint.Y }); } // Find columns foreach (Line verticalLine in JoinedVerticalLines.OrderBy(_ => _.StartPoint.X)) { var tableStructure = Tables.FirstOrDefault(_ => _.Contains(verticalLine)); // No table contains this line if (tableStructure == null) { continue; } // The row already belongs to the table if (tableStructure.Columns.Any(_ => Math.Abs(_.BeginX - verticalLine.StartPoint.X) < ContentExtractor.Tolerance)) { continue; } // Check if the row is the bottom edge of the table if (tableStructure.BottomRightPoint.X - verticalLine.StartPoint.X < ContentExtractor.Tolerance) { continue; } tableStructure.Columns.Add(new Column() { BeginX = verticalLine.StartPoint.X }); } // Fix EndX and EndY and indexes foreach (Table tableStructure in Tables) { // Fix EndYs for (int i = 0; i < tableStructure.Rows.Count - 1; i++) { tableStructure.Rows[i].EndY = tableStructure.Rows[i + 1].BeginY - ContentExtractor.Tolerance * 0.1f; } tableStructure.Rows[tableStructure.Rows.Count - 1].EndY = tableStructure.BottomRightPoint.Y; // Fix EndXs for (int i = 0; i < tableStructure.Columns.Count - 1; i++) { tableStructure.Columns[i].EndX = tableStructure.Columns[i + 1].BeginX - ContentExtractor.Tolerance * 0.1f; } tableStructure.Columns[tableStructure.Columns.Count - 1].EndX = tableStructure.BottomRightPoint.X; int index; index = 0; foreach (var column in tableStructure.Columns.OrderBy(_ => _.BeginX)) { column.Index = index; index++; } index = 0; foreach (var row in tableStructure.Rows.OrderByDescending(_ => _.BeginY)) { row.Index = index; index++; } tableStructure.CreateContent(); } }