예제 #1
0
        private string GetSimpleHTMLTable(PdfTableCell table)
        {
            if (table == null)
            {
                return(null);
            }

            var sb = new StringBuilder();

            sb.Append("<table>");
            for (int i = 0; i < table.Rows; i++)
            {
                sb.Append("<tr>");
                for (int j = 0; j < table.Cols; j++)
                {
                    sb.Append("<td>");
                    if (table.Children.Count == 0)
                    {
                        sb.Append(table.Text);
                    }
                    else
                    {
                        var idx = i * table.Cols + j;
                        if (idx < table.Children.Count)
                        {
                            var cell = table.Children[idx];
                            if (cell.Children.Count == 0)
                            {
                                sb.Append(cell.Text);
                            }
                            else
                            {
                                sb.Append(GetSimpleHTMLTable(cell));
                            }
                        }
                        else
                        {
                            throw new Exception("bad format");
                            // the table is in bad format
                            // which the extraction should having a bug
                        }
                    }

                    sb.Append("</td>");
                }
                sb.Append("</tr>");
            }
            sb.Append("</table>");

            return(sb.ToString());
        }
예제 #2
0
        private PdfTableCell MakeTable(List <Rectangle> rects, int level = 0)
        {
            if (rects == null || rects.Count == 0)
            {
                return(null);
            }

            var cell = new PdfTableCell(Rotation);
            var x    = rects[0].Left;
            var y    = rects[0].Top;
            var maxX = rects.Max(r => r.Left);
            var minY = rects.Min(r => r.Top);
            var w    = rects.Where(r => r.Top == y).Sum(r => r.Width);
            var h    = rects.Where(r => r.Left == x).Sum(r => r.Height);

            cell.Rectangle = new Rectangle(x, y - h, x + w, y);

            //cols
            cell.Xs.Add(x);
            while (true)
            {
                var rs = rects.Where(r => NearlyEqual(r.Left, x)).ToList();
                if (rs.Count() == 0)
                {
                    break;
                }
                x += rs.Max(r => r.Width);
                // not arrive the border
                if (x + Variance < cell.Rectangle.Right)
                {
                    cell.Xs.Add(x);
                }
            }

            //rows
            cell.Ys.Add(y);
            while (true)
            {
                var rs = rects.Where(r => NearlyEqual(r.Top, y)).ToList();
                if (rs.Count() == 0)
                {
                    break;
                }
                y = y - rs.Max(r => r.Height);
                // not arrive the border
                if (y - Variance > cell.Rectangle.Bottom)
                {
                    cell.Ys.Add(y);
                }
            }

            cell.Children = new List <PdfTableCell>();
            if (rects.Count > 1 && level < MaxHierarchy)
            //if (rects.Count > 1)
            {
                for (int j = 0; j < cell.RealRows; j++)
                {
                    for (int i = 0; i < cell.RealCols; i++)
                    {
                        var ix = cell.Xs[i];
                        var iy = cell.Ys[j];
                        var iw = 0f;
                        var ih = 0f;
                        if (i == cell.RealCols - 1)
                        {
                            iw = cell.Rectangle.Right - ix;
                        }
                        else
                        {
                            iw = cell.Xs[i + 1] - ix;
                        }
                        if (j == cell.RealRows - 1)
                        {
                            ih = iy - cell.Rectangle.Bottom;
                        }
                        else
                        {
                            ih = iy - cell.Ys[j + 1];
                        }

                        //var innerRects = rects.Where(r => r.Left >= ix && r.Right <= ix + iw
                        //    && r.Top <= iy && r.Bottom >= iy - ih)
                        //    .ToList();

                        var innerRects = rects.Where(r =>
                        {
                            return
                            ((r.Left > ix || NearlyEqual(r.Left, ix)) &&
                             (r.Right < ix + iw || NearlyEqual(r.Right, ix + iw)) &&
                             (r.Top < iy || NearlyEqual(r.Top, iy)) &&
                             (r.Bottom > iy - ih || NearlyEqual(r.Bottom, iy - ih)));
                        }).ToList();
                        if (innerRects.Count > 0)
                        {
                            var innerCell = MakeTable(innerRects, level + 1);
                            cell.Children.Add(innerCell);
                        }
                    }
                }
            }
            else
            {
                cell.Text = GetResultantText(new RectangleSection(cell.Rectangle, Variance));
            }

            return(cell);
        }