private TableRectangle getTableFromText(List <TableLine> lines, List <TextEdge> relevantEdges, int relevantEdgeCount, List <Ruling> horizontalRulings) { TableRectangle table = new TableRectangle(); TableLine prevRow = null; TableLine firstTableRow = null; TableLine lastTableRow = null; int tableSpaceCount = 0; double totalRowSpacing = 0; // go through the lines and find the ones that have the correct count of the relevant edges foreach (TableLine textRow in lines) { int numRelevantEdges = 0; if (firstTableRow != null && tableSpaceCount > 0) { // check to make sure this text row is within a line or so of the other lines already added // if it's not, we should stop the table here double tableLineThreshold = (totalRowSpacing / tableSpaceCount) * 2.5; double lineDistance = prevRow.Bottom - textRow.Bottom; // bobld: textRow.Top - prevRow.Top System.Diagnostics.Debug.Assert(lineDistance >= 0); if (lineDistance > tableLineThreshold) { lastTableRow = prevRow; break; } } // for larger tables, be a little lenient on the number of relevant rows the text intersects // for smaller tables, not so much - otherwise we'll end up treating paragraphs as tables too int relativeEdgeDifferenceThreshold = 1; if (relevantEdgeCount <= 3) { relativeEdgeDifferenceThreshold = 0; } foreach (TextEdge edge in relevantEdges) { if (textRow.IntersectsLine(edge.Line)) { numRelevantEdges++; } } // see if we have a candidate text row if (numRelevantEdges >= (relevantEdgeCount - relativeEdgeDifferenceThreshold)) { // keep track of table row spacing if (prevRow != null && firstTableRow != null) { tableSpaceCount++; totalRowSpacing += prevRow.Bottom - textRow.Bottom; // bobld: textRow.Top - prevRow.Top } // row is part of a table if (table.Area == 0) { firstTableRow = textRow; table.SetRect(textRow); } else { table.SetLeft(Math.Min(table.Left, textRow.Left)); table.SetBottom(Math.Min(table.Bottom, textRow.Bottom)); // bobld: Max table.SetRight(Math.Max(table.Right, textRow.Right)); } } else { // no dice // if we're at the end of the table, save the last row if (firstTableRow != null && lastTableRow == null) { lastTableRow = prevRow; } } prevRow = textRow; } // if we don't have a table now, we won't after the next step either if (table.Area == 0) { return(null); } if (lastTableRow == null) { // takes care of one-row tables or tables that end at the bottom of a page lastTableRow = prevRow; } // use the average row height and nearby horizontal lines to extend the table area double avgRowHeight; if (tableSpaceCount > 0) { System.Diagnostics.Debug.Assert(totalRowSpacing >= 0); avgRowHeight = totalRowSpacing / tableSpaceCount; } else { avgRowHeight = lastTableRow.Height; } double rowHeightThreshold = avgRowHeight * 1.5; // check lines after the bottom of the table //foreach (Ruling ruling in sortedHorizontalRulings) //Line2D.Float for (int i = horizontalRulings.Count - 1; i >= 0; i--) // reverse order { var ruling = horizontalRulings[i]; if (ruling.Y1 > table.Bottom) // bobld: < { continue; } double distanceFromTable = table.Bottom - ruling.Y2; // bobld: Y1 System.Diagnostics.Debug.Assert(distanceFromTable >= 0); if (distanceFromTable <= rowHeightThreshold) { // use this ruling to help define the table table.SetBottom(Math.Min(table.Bottom, ruling.Y2)); // bobld: Max Y1 table.SetLeft(Math.Min(table.Left, ruling.X1)); table.SetRight(Math.Max(table.Right, ruling.X2)); } else { // no use checking any further break; } } // do the same for lines at the top, but make the threshold greater since table headings tend to be // larger to fit up to three-ish rows of text (at least but we don't want to grab too much) rowHeightThreshold = avgRowHeight * 3.8; //for (int i = horizontalRulings.Count - 1; i >= 0; i--) for (int i = 0; i < horizontalRulings.Count; i++) { Ruling ruling = horizontalRulings[i]; if (ruling.Y1 < table.Top) //bobld: > { continue; } double distanceFromTable = ruling.Y1 - table.Top; // bobld: table.Top - ruling.Y1 System.Diagnostics.Debug.Assert(distanceFromTable >= 0); if (distanceFromTable <= rowHeightThreshold) { table.SetTop(Math.Max(table.Top, ruling.Y2)); // bobld: Min Y1 table.SetLeft(Math.Min(table.Left, ruling.X1)); table.SetRight(Math.Max(table.Right, ruling.X2)); } else { break; } } // add a bit of padding since the halved horizontal lines are a little fuzzy anyways table.SetTop(Math.Ceiling(table.Top) + TABLE_PADDING_AMOUNT); // bobld: Floor - table.SetBottom(Math.Floor(table.Bottom) - TABLE_PADDING_AMOUNT); // bobld: Ceiling + table.SetLeft(Math.Floor(table.Left) - TABLE_PADDING_AMOUNT); table.SetRight(Math.Ceiling(table.Right) + TABLE_PADDING_AMOUNT); return(table); }