/////////////////////////////////////////////////////////////////////// // ParseTable /////////////////////////////////////////////////////////////////////// private static void ParseTable(PdeTable table, String outDir) { StreamWriter file = new System.IO.StreamWriter(outDir + "\\ExtractTables" + tableIndex++ + ".csv"); int rowCount = table.GetNumRows(); int colCount = table.GetNumCols(); for (int row = 0; row < rowCount; row++) { for (int col = 0; col < colCount; col++) { PdeCell cell = (PdeCell)table.GetCell(row, col); if (cell == null) { continue; } int rowSpan = cell.GetRowSpan(); int colSpan = cell.GetColSpan(); int count = cell.GetNumChildren(); if ((rowSpan != 0) && (colSpan != 0) && (count > 0)) { file.Write("\""); for (int i = 0; i < count; i++) { PdeElement child = cell.GetChild(i); if (child != null && (child.GetType_() == PdfElementType.kPdeText)) { ParseText((PdeText)child, file, false); } if (i < count - 1) { file.Write(" "); } } file.Write("\""); } if (col < colCount) { file.Write(","); } } if (row < rowCount) { file.Write("\n"); } } file.Close(); }
/////////////////////////////////////////////////////////////////////// // ParseElement /////////////////////////////////////////////////////////////////////// private static void ParseElement(PdeElement element, String outDir) { // parse element based on type; PdfElementType elemType = element.GetType_(); switch (elemType) { case PdfElementType.kPdeTable: ParseTable((PdeTable)element, outDir); return; } int numChilds = element.GetNumChildren(); for (int i = 0; i < numChilds; i++) { ParseElement(element.GetChild(i), outDir); } }
/////////////////////////////////////////////////////////////////////// // ParseElement /////////////////////////////////////////////////////////////////////// private static void ParseElement(PdeElement element, StreamWriter file) { // parse element based on type; PdfElementType elemType = element.GetType_(); switch (elemType) { case PdfElementType.kPdeText: ParseText((PdeText)element, file); return; } int numChilds = element.GetNumChildren(); for (int i = 0; i < numChilds; i++) { ParseElement(element.GetChild(i), file); } }