/// <summary> /// 通过PDF文档对象解析PDF /// </summary> /// <param name="pdfDocument">PDF文档</param> /// <param name="tableContainType">表格包含样式</param> /// <returns></returns> public static PDFModel Parser(PDDocument pdfDocument, TableContainType tableContainType) { ObjectExtractor extractor = new ObjectExtractor(pdfDocument); PageIterator pageIterator = extractor.extract(); SpreadsheetExtractionAlgorithm tableExtractor = new SpreadsheetExtractionAlgorithm(); PDFModel pdfModel = new PDFModel(); PDFTextStripper pdfStripper = new PDFTextStripper(); pdfStripper.setPageEnd(pageEndMark); //pdfStripper.setParagraphEnd(paragraphEndMark); string[] strs = Regex.Split(pdfStripper.getText(pdfDocument), pageEndMark, RegexOptions.IgnoreCase); if (strs != null && strs.Length > 0) { pdfModel.Pages = new List <PdfPageModel>(); int cp = 0; while (pageIterator.hasNext()) { PdfPageModel pdfPage = new PdfPageModel(); pdfPage.CurrentPage = cp + 1; pdfPage.Text = strs[cp]; List <Table> tables = new List <Table>(); Page page = pageIterator.next(); var pageTables = tableExtractor.extract(page).toArray(); if (pageTables != null && pageTables.Length > 0) { for (int i = 0; i < pageTables.Length; i++) { tables.Add(pageTables[i] as Table); } } pdfPage.Tables = tables; pdfModel.Pages.Add(pdfPage); cp++; } pdfModel.PageNumber = pdfModel.Pages.Count; return(PdfTextFormater(pdfModel, tableContainType)); } return(null); }