private string GetSimpleHTMLTable(PdfTableCell table) { if (table == null) { return(null); } var sb = new StringBuilder(); sb.Append("<table>"); for (int i = 0; i < table.Rows; i++) { sb.Append("<tr>"); for (int j = 0; j < table.Cols; j++) { sb.Append("<td>"); if (table.Children.Count == 0) { sb.Append(table.Text); } else { var idx = i * table.Cols + j; if (idx < table.Children.Count) { var cell = table.Children[idx]; if (cell.Children.Count == 0) { sb.Append(cell.Text); } else { sb.Append(GetSimpleHTMLTable(cell)); } } else { throw new Exception("bad format"); // the table is in bad format // which the extraction should having a bug } } sb.Append("</td>"); } sb.Append("</tr>"); } sb.Append("</table>"); return(sb.ToString()); }
private PdfTableCell MakeTable(List <Rectangle> rects, int level = 0) { if (rects == null || rects.Count == 0) { return(null); } var cell = new PdfTableCell(Rotation); var x = rects[0].Left; var y = rects[0].Top; var maxX = rects.Max(r => r.Left); var minY = rects.Min(r => r.Top); var w = rects.Where(r => r.Top == y).Sum(r => r.Width); var h = rects.Where(r => r.Left == x).Sum(r => r.Height); cell.Rectangle = new Rectangle(x, y - h, x + w, y); //cols cell.Xs.Add(x); while (true) { var rs = rects.Where(r => NearlyEqual(r.Left, x)).ToList(); if (rs.Count() == 0) { break; } x += rs.Max(r => r.Width); // not arrive the border if (x + Variance < cell.Rectangle.Right) { cell.Xs.Add(x); } } //rows cell.Ys.Add(y); while (true) { var rs = rects.Where(r => NearlyEqual(r.Top, y)).ToList(); if (rs.Count() == 0) { break; } y = y - rs.Max(r => r.Height); // not arrive the border if (y - Variance > cell.Rectangle.Bottom) { cell.Ys.Add(y); } } cell.Children = new List <PdfTableCell>(); if (rects.Count > 1 && level < MaxHierarchy) //if (rects.Count > 1) { for (int j = 0; j < cell.RealRows; j++) { for (int i = 0; i < cell.RealCols; i++) { var ix = cell.Xs[i]; var iy = cell.Ys[j]; var iw = 0f; var ih = 0f; if (i == cell.RealCols - 1) { iw = cell.Rectangle.Right - ix; } else { iw = cell.Xs[i + 1] - ix; } if (j == cell.RealRows - 1) { ih = iy - cell.Rectangle.Bottom; } else { ih = iy - cell.Ys[j + 1]; } //var innerRects = rects.Where(r => r.Left >= ix && r.Right <= ix + iw // && r.Top <= iy && r.Bottom >= iy - ih) // .ToList(); var innerRects = rects.Where(r => { return ((r.Left > ix || NearlyEqual(r.Left, ix)) && (r.Right < ix + iw || NearlyEqual(r.Right, ix + iw)) && (r.Top < iy || NearlyEqual(r.Top, iy)) && (r.Bottom > iy - ih || NearlyEqual(r.Bottom, iy - ih))); }).ToList(); if (innerRects.Count > 0) { var innerCell = MakeTable(innerRects, level + 1); cell.Children.Add(innerCell); } } } } else { cell.Text = GetResultantText(new RectangleSection(cell.Rectangle, Variance)); } return(cell); }