Пример #1
0
        private List <TablePos> GetTablePoses(LinePos tableNamePos, LinePos tableEndPos)
        {
            List <TablePos> tablePoses = new List <TablePos>();

            tableNamePos.AxisValue = tableNamePos.AxisValueWithLineHeight;
            //There is only one page
            if (tableEndPos.PageNum == tableNamePos.PageNum)
            {
                TablePos tablePos = GetTablePos(tableNamePos.PageNum, tableNamePos, tableEndPos, false, false, null);
                if (tablePos != null)
                {
                    tablePoses.Add(tablePos);
                }
            }
            //The page amount is over one
            else
            {
                //Get start page
                //Mark
                TablePos startTablePos = null;
                LinePos  endPos        = pdfTronHelper.GetBottomPosOfPage(tableNamePos.PageNum, false, -1, -1);

                startTablePos = GetTablePos(tableNamePos.PageNum, tableNamePos, endPos, false, false, null);

                if (startTablePos != null)
                {
                    tablePoses.Add(startTablePos);
                }
                bool isStartTablePosNull = startTablePos == null;
                //Get medial page
                for (int i = tableNamePos.PageNum + 1; i < tableEndPos.PageNum; i++)
                {
                    TablePos intervalTablePos = GetTablePos(i, null, pdfTronHelper.GetBottomPosOfPage(i, false, -1, -1),
                                                            false,
                                                            !isStartTablePosNull, isStartTablePosNull ? null : startTablePos.VerticalLines);
                    if (intervalTablePos != null)
                    {
                        tablePoses.Add(intervalTablePos);
                    }
                }
                //Get end page
                int      endPageNum  = tableEndPos.PageNum;
                TablePos endTablePos = GetTablePos(endPageNum, null, tableEndPos, true,
                                                   !isStartTablePosNull, isStartTablePosNull ? null : startTablePos.VerticalLines);
                if (endTablePos != null)
                {
                    tablePoses.Add(endTablePos);
                }
            }
            return(tablePoses);
        }
Пример #2
0
        void CreateTablePosHighlight(TablePos tablePos, PDFDoc pdfDoc)
        {
            double lineWidth = 3;

            Page page = pdfDoc.GetPage(tablePos.PageNum);

            foreach (FormLine line in tablePos.HorizontialLines.Concat(tablePos.VerticalLines).SelectMany(pair => pair.Value))
            {
                lineWidth = line.IsExistent ? 3 : 1;

                Rect rect = line.IsTransverseLine ? new Rect(line.StartPoint.x, line.StartPoint.y, line.EndPoint.x, line.StartPoint.y + lineWidth) :
                            new Rect(line.StartPoint.x, line.StartPoint.y, line.StartPoint.x + lineWidth, line.EndPoint.y);
                PdfTronHelper pdfTronHelper = new PdfTronHelper(pdfDoc);
                pdfTronHelper.RevertTransportRect(tablePos.PageNum, rect);
                CreateHighlight(pdfDoc, page, rect);
            }
        }
Пример #3
0
        private TablePos GetTablePos(int pageNum, LinePos startPos, LinePos endPos, bool isEndTablePos, bool isSubsequentPage, SortedDictionary <double, FormLineList> lastPageVerticalLines)
        {
            FormLineSearcher searcher = new FormLineSearcher(pdfDoc);

            if (endPos != null)
            {
                endPos.AxisValue = endPos.AxisValueWithLineHeight;
            }
            SortedDictionary <double, FormLineList>[] formLines = searcher.GetFormLines(pageNum, startPos, endPos, isSubsequentPage, lastPageVerticalLines);
            if (formLines == null)
            {
                return(null);
            }
            TablePos tablePos;

            tablePos = new TablePos
            {
                PageNum          = pageNum,
                HorizontialLines = formLines[0],
                VerticalLines    = formLines[1]
            };
            //RevertAxisTransform(tablePos);
            return(tablePos);
        }
        /// <summary>
        /// extract pdf table data
        /// </summary>
        /// <param name="doc"></param>
        /// <param name="tablePos"></param>
        /// <returns></returns>
        public static FreeTable Extract(pdftron.PDF.PDFDoc doc, TablePos tablePos)
        {
            PageTextExtractor pdfPageProcess = new PageTextExtractor(doc.GetPage(tablePos.PageNum));

            var vLines = new List <System.Windows.Rect>(tablePos.VerticalLines.Count);
            var hLines = new List <System.Windows.Rect>(tablePos.HorizontialLines.Count);

            foreach (var kv in tablePos.VerticalLines)
            {
                var top    = kv.Value.Min(n => n.StartPoint.y);
                var bottom = kv.Value.Max(n => n.EndPoint.y);
                vLines.Add(new System.Windows.Rect(kv.Key, top, 0, bottom - top));
            }

            foreach (var kv in tablePos.HorizontialLines)
            {
                var left  = kv.Value.Min(n => n.StartPoint.x);
                var right = kv.Value.Max(n => n.EndPoint.x);
                hLines.Add(new System.Windows.Rect(left, kv.Key, right - left, 0));
            }

            if (vLines.Count == 0 || hLines.Count == 0)
            {
                throw new Exception("vertical or horizontal lines is empty");
            }

            hLines.Reverse();

            FreeTable freeTable = new FreeTable();

            for (int i = 1; i < hLines.Count; i++)
            {
                var y = hLines[i].Y;

                var y2 = hLines[i - 1].Y;

                var prej = 0;

                var x = hLines[i - 1].Left;

                while (prej < vLines.Count)
                {
                    if (vLines[prej].X > x - 5)
                    {
                        break;
                    }

                    prej++;
                }

                var x1 = vLines[prej].X;

                FreeTableRow row = new FreeTableRow();

                for (int j = prej + 1; j < vLines.Count; j++)
                {
                    var vline = vLines[j];

                    if (vline.Bottom - y > 5 || j == vLines.Count - 1)
                    {
                        int ii = i;

                        for (; ii < hLines.Count - 1; ii++)
                        {
                            if (vline.X - hLines[ii].Left > 5)
                            {
                                break;
                            }
                        }

                        var data = pdfPageProcess.SearchTextWithStrictMode(
                            new System.Windows.Rect(x1, hLines[ii].Y, vline.X - x1, y2 - hLines[ii].Y));

                        row.Add(new FreeTableCell()
                        {
                            Value = data, ColSpan = j - prej, RowSpan = ii - i + 1
                        });

                        prej = j;

                        x1 = vline.X;
                    }
                }

                freeTable.Add(row);
            }

            return(freeTable);
        }