private List <TablePos> GetTablePoses(LinePos tableNamePos, LinePos tableEndPos) { List <TablePos> tablePoses = new List <TablePos>(); tableNamePos.AxisValue = tableNamePos.AxisValueWithLineHeight; //There is only one page if (tableEndPos.PageNum == tableNamePos.PageNum) { TablePos tablePos = GetTablePos(tableNamePos.PageNum, tableNamePos, tableEndPos, false, false, null); if (tablePos != null) { tablePoses.Add(tablePos); } } //The page amount is over one else { //Get start page //Mark TablePos startTablePos = null; LinePos endPos = pdfTronHelper.GetBottomPosOfPage(tableNamePos.PageNum, false, -1, -1); startTablePos = GetTablePos(tableNamePos.PageNum, tableNamePos, endPos, false, false, null); if (startTablePos != null) { tablePoses.Add(startTablePos); } bool isStartTablePosNull = startTablePos == null; //Get medial page for (int i = tableNamePos.PageNum + 1; i < tableEndPos.PageNum; i++) { TablePos intervalTablePos = GetTablePos(i, null, pdfTronHelper.GetBottomPosOfPage(i, false, -1, -1), false, !isStartTablePosNull, isStartTablePosNull ? null : startTablePos.VerticalLines); if (intervalTablePos != null) { tablePoses.Add(intervalTablePos); } } //Get end page int endPageNum = tableEndPos.PageNum; TablePos endTablePos = GetTablePos(endPageNum, null, tableEndPos, true, !isStartTablePosNull, isStartTablePosNull ? null : startTablePos.VerticalLines); if (endTablePos != null) { tablePoses.Add(endTablePos); } } return(tablePoses); }
void CreateTablePosHighlight(TablePos tablePos, PDFDoc pdfDoc) { double lineWidth = 3; Page page = pdfDoc.GetPage(tablePos.PageNum); foreach (FormLine line in tablePos.HorizontialLines.Concat(tablePos.VerticalLines).SelectMany(pair => pair.Value)) { lineWidth = line.IsExistent ? 3 : 1; Rect rect = line.IsTransverseLine ? new Rect(line.StartPoint.x, line.StartPoint.y, line.EndPoint.x, line.StartPoint.y + lineWidth) : new Rect(line.StartPoint.x, line.StartPoint.y, line.StartPoint.x + lineWidth, line.EndPoint.y); PdfTronHelper pdfTronHelper = new PdfTronHelper(pdfDoc); pdfTronHelper.RevertTransportRect(tablePos.PageNum, rect); CreateHighlight(pdfDoc, page, rect); } }
private TablePos GetTablePos(int pageNum, LinePos startPos, LinePos endPos, bool isEndTablePos, bool isSubsequentPage, SortedDictionary <double, FormLineList> lastPageVerticalLines) { FormLineSearcher searcher = new FormLineSearcher(pdfDoc); if (endPos != null) { endPos.AxisValue = endPos.AxisValueWithLineHeight; } SortedDictionary <double, FormLineList>[] formLines = searcher.GetFormLines(pageNum, startPos, endPos, isSubsequentPage, lastPageVerticalLines); if (formLines == null) { return(null); } TablePos tablePos; tablePos = new TablePos { PageNum = pageNum, HorizontialLines = formLines[0], VerticalLines = formLines[1] }; //RevertAxisTransform(tablePos); return(tablePos); }
/// <summary> /// extract pdf table data /// </summary> /// <param name="doc"></param> /// <param name="tablePos"></param> /// <returns></returns> public static FreeTable Extract(pdftron.PDF.PDFDoc doc, TablePos tablePos) { PageTextExtractor pdfPageProcess = new PageTextExtractor(doc.GetPage(tablePos.PageNum)); var vLines = new List <System.Windows.Rect>(tablePos.VerticalLines.Count); var hLines = new List <System.Windows.Rect>(tablePos.HorizontialLines.Count); foreach (var kv in tablePos.VerticalLines) { var top = kv.Value.Min(n => n.StartPoint.y); var bottom = kv.Value.Max(n => n.EndPoint.y); vLines.Add(new System.Windows.Rect(kv.Key, top, 0, bottom - top)); } foreach (var kv in tablePos.HorizontialLines) { var left = kv.Value.Min(n => n.StartPoint.x); var right = kv.Value.Max(n => n.EndPoint.x); hLines.Add(new System.Windows.Rect(left, kv.Key, right - left, 0)); } if (vLines.Count == 0 || hLines.Count == 0) { throw new Exception("vertical or horizontal lines is empty"); } hLines.Reverse(); FreeTable freeTable = new FreeTable(); for (int i = 1; i < hLines.Count; i++) { var y = hLines[i].Y; var y2 = hLines[i - 1].Y; var prej = 0; var x = hLines[i - 1].Left; while (prej < vLines.Count) { if (vLines[prej].X > x - 5) { break; } prej++; } var x1 = vLines[prej].X; FreeTableRow row = new FreeTableRow(); for (int j = prej + 1; j < vLines.Count; j++) { var vline = vLines[j]; if (vline.Bottom - y > 5 || j == vLines.Count - 1) { int ii = i; for (; ii < hLines.Count - 1; ii++) { if (vline.X - hLines[ii].Left > 5) { break; } } var data = pdfPageProcess.SearchTextWithStrictMode( new System.Windows.Rect(x1, hLines[ii].Y, vline.X - x1, y2 - hLines[ii].Y)); row.Add(new FreeTableCell() { Value = data, ColSpan = j - prej, RowSpan = ii - i + 1 }); prej = j; x1 = vline.X; } } freeTable.Add(row); } return(freeTable); }