void LoadDocument() { MemoryStream memory = new MemoryStream(convertDocToByteArray(@"D:\Code Project\Subrip\Document\HSK Book.pdf")); BinaryReader BRreader = new BinaryReader(memory); StringBuilder text = new StringBuilder(); iText.Kernel.Pdf.PdfReader iTextReader = new iText.Kernel.Pdf.PdfReader(memory); iText.Kernel.Pdf.PdfDocument pdfDoc = new iText.Kernel.Pdf.PdfDocument(iTextReader); int numberofpages = pdfDoc.GetNumberOfPages(); List <Tuple <int, string> > Contents = new List <Tuple <int, string> >(); for (int page = 1; page <= numberofpages; page++) { iText.Kernel.Pdf.Canvas.Parser.Listener.ITextExtractionStrategy strategy = new iText.Kernel.Pdf.Canvas.Parser.Listener.LocationTextExtractionStrategy(); string currentText = iText.Kernel.Pdf.Canvas.Parser.PdfTextExtractor.GetTextFromPage(pdfDoc.GetPage(page), strategy); currentText = Encoding.UTF8.GetString(UTF8Encoding.Convert( Encoding.UTF8, Encoding.UTF8, Encoding.UTF8.GetBytes(currentText))); text.Append(currentText); Tuple <int, string> tuple = new Tuple <int, string>(page, currentText); Contents.Add(tuple); } document = Contents; }
public IEnumerable <StockAdvice> ReadStockAdvice(FileInfo file) { var stockAdvices = new List <StockAdvice>(); var reader = new iText.Kernel.Pdf.PdfReader(file); var doc = new iText.Kernel.Pdf.PdfDocument(reader); var pageCount = doc.GetNumberOfPages(); if (pageCount > 0) { for (int pageNum = 1; pageNum < pageCount; pageNum++) { var page = doc.GetPage(pageNum); var pageData = page.GetContentBytes(); string pageContent = Encoding.UTF8.GetString(pageData); var folder = file.Directory; var fileName = Path.Combine(folder.FullName, file.Name + "_Page_" + pageNum + ".txt"); WritePageContentToFile(fileName, pageContent); } } doc.Close(); reader.Close(); return(stockAdvices); }
public Page[] GetBlocks(byte[] contents) { List <Page> lstPages = new List <Page>(); using (var stm = new System.IO.MemoryStream(contents)) { using (var pdfReader = new iText.Kernel.Pdf.PdfReader(stm)) { using (iText.Kernel.Pdf.PdfDocument doc = new iText.Kernel.Pdf.PdfDocument(pdfReader)) { int numOfPages = doc.GetNumberOfPages(); for (int page = 1; page <= numOfPages; page++) { var pdfPage = doc.GetPage(page); var pg = new Page(); var rotation = pdfPage.GetPageSizeWithRotation(); pg.Height = rotation.GetHeight(); pg.Width = rotation.GetWidth(); var customListener = new CustomEventListener(); var parser = new PdfCanvasProcessor(customListener); parser.ProcessPageContent(pdfPage); var lstBlocks = customListener.Blocks; pg.Blocks = customListener.Blocks.ToArray(); lstPages.Add(pg); } } } } return(lstPages.ToArray()); }
/// <summary> /// Reads PDF file by a given path. /// </summary> /// <param name="path">The path to the file</param> /// <param name="pageCount">The number of pages to read (0=all, 1 by default) </param> /// <returns></returns> public static DocumentTree PdfToText(string path, int pageCount = 1) { var pages = new DocumentTree(); using (iText.Kernel.Pdf.PdfReader reader = new iText.Kernel.Pdf.PdfReader(path)) { using (iText.Kernel.Pdf.PdfDocument pdfDocument = new iText.Kernel.Pdf.PdfDocument(reader)) { var strategy = new iText.Kernel.Pdf.Canvas.Parser.Listener.LocationTextExtractionStrategy(); // set up pages to read int pagesToRead = 1; if (pageCount > 0) { pagesToRead = pageCount; } if (pagesToRead > pdfDocument.GetNumberOfPages() || pageCount == 0) { pagesToRead = pdfDocument.GetNumberOfPages(); } // for each page to read... for (int i = 1; i <= pagesToRead; ++i) { // get the page and save it var page = pdfDocument.GetPage(i); var txt = iText.Kernel.Pdf.Canvas.Parser.PdfTextExtractor.GetTextFromPage(page, strategy); pages.Add(txt); } pdfDocument.Close(); reader.Close(); } } return(pages); }
public override BookAtHome GetPocoBook(string filepath) { iText.Kernel.Pdf.PdfReader reader = null; iText.Kernel.Pdf.PdfDocument pDoc = null; const int extracted_phrase_len = 500; try { if (File.Exists(filepath)) { reader = new iText.Kernel.Pdf.PdfReader(filepath); pDoc = new iText.Kernel.Pdf.PdfDocument(reader); int nPages = pDoc.GetNumberOfPages(); int maxsearch = nPages < 15 ? nPages : 10; string currentText = string.Empty; for (int i = 1; i <= maxsearch; i++) { ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy(); currentText = PdfTextExtractor.GetTextFromPage(pDoc.GetPage(i), strategy); currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText))); if (currentText.ToLower().Contains("isbn", System.StringComparison.InvariantCulture)) { string pattern = @"ISBN(-1(?:(0)|3))?:?\x20(\s)*[0-9]+[- ][0-9]+[- ][0-9]+[- ][0-9]*[- ]*[xX0-9]"; Match m = Regex.Match(currentText.Replace(":", "").Replace("-", " "), pattern); if (m.Success) { PocoBook retBook = base.GetPocoBook(filepath) as PocoBook; retBook.Isbnsearch = m.Value; return(retBook); } } } PocoBook abook = base.GetPocoBook(filepath) as PocoBook; int len = currentText.Length < extracted_phrase_len ? currentText.Length : extracted_phrase_len; abook.SearchPhrase = currentText.Substring(0, len); return(abook); } } catch (IOException) { return(new PocoBook(filepath)); } finally { ((IDisposable)reader)?.Dispose(); ((IDisposable)pDoc)?.Dispose(); reader?.Close(); } return(base.GetPocoBook(filepath)); }
/// <summary> /// 利用itext7生成文字签名 /// </summary> public void ConvertPdf1() { string sourcePath = $"C:\\test\\source.pdf"; string targetPath = $"C:\\test\\target.pdf"; string fontPath = $"C:\\Windows\\Fonts\\simkai.ttf"; string signPath1 = @"C:\Users\Administrator\Desktop\a.png"; string signPath2 = @"C:\Users\Administrator\Desktop\b.png"; string signPath3 = @"C:\Users\Administrator\Desktop\c.png"; string signPath4 = @"C:\Users\Administrator\Desktop\d.png"; //输入PDF using (iText.Kernel.Pdf.PdfReader reader = new iText.Kernel.Pdf.PdfReader(sourcePath)) { //输出PDF using (iText.Kernel.Pdf.PdfWriter writer = new iText.Kernel.Pdf.PdfWriter(targetPath)) { //获取PDF对象 using (iText.Kernel.Pdf.PdfDocument pdfDocument = new iText.Kernel.Pdf.PdfDocument(reader, writer)) { //获取Document对象 using (iText.Layout.Document document = new iText.Layout.Document(pdfDocument)) { //从物理文件加载图片 iText.Layout.Element.Image image1 = new iText.Layout.Element.Image(iText.IO.Image.ImageDataFactory.Create(signPath1)); iText.Layout.Element.Image image2 = new iText.Layout.Element.Image(iText.IO.Image.ImageDataFactory.Create(signPath2)); iText.Layout.Element.Image image3 = new iText.Layout.Element.Image(iText.IO.Image.ImageDataFactory.Create(signPath3)); iText.Layout.Element.Image image4 = new iText.Layout.Element.Image(iText.IO.Image.ImageDataFactory.Create(signPath4)); //将图片绘制到PDF的绝对坐标上,同时缩放图片 //坐标与绘制文字的坐标几乎一致,稍微向左,向上一些 //缩放的宽度与后面的宽度一致,示例中是200 //缩放的高度计算两个签名之间的高度差,例如93-73=20 //注意示例采用的签名图片的尺寸是:400px * 150px,应当采取和它差不多的尺寸效果最佳 document.Add(image1.ScaleToFit(200, 20).SetFixedPosition(1, 3089, 93, 200)); document.Add(image2.ScaleToFit(200, 20).SetFixedPosition(1, 3089, 73, 200)); document.Add(image3.ScaleToFit(200, 20).SetFixedPosition(1, 3089, 53, 200)); document.Add(image4.ScaleToFit(200, 20).SetFixedPosition(1, 3089, 33, 200)); //加载字体 iText.Kernel.Font.PdfFont font = iText.Kernel.Font.PdfFontFactory.CreateFont(fontPath, iText.IO.Font.PdfEncodings.IDENTITY_H, true); //添加文本 document.Add(new iText.Layout.Element.Paragraph("签名1").SetFont(font).SetFontSize(12).SetFixedPosition(1, 3090, 90, 200)); document.Add(new iText.Layout.Element.Paragraph("签名2").SetFont(font).SetFontSize(12).SetFixedPosition(1, 3090, 70, 200)); document.Add(new iText.Layout.Element.Paragraph("签名3").SetFont(font).SetFontSize(12).SetFixedPosition(1, 3090, 50, 200)); document.Add(new iText.Layout.Element.Paragraph("签名4").SetFont(font).SetFontSize(12).SetFixedPosition(1, 3090, 30, 200)); } } } } }
public static bool CompareAgainstPdf(TestContext testContext, string filenameOutput, string filenameShall, int nAcceptedDifferences = 0) { var pdfReaderOutput = new iText.Kernel.Pdf.PdfReader(filenameOutput); var pdfOutput = new iText.Kernel.Pdf.PdfDocument(pdfReaderOutput); var pdfReaderShall = new iText.Kernel.Pdf.PdfReader(filenameShall); var pdfShall = new iText.Kernel.Pdf.PdfDocument(pdfReaderShall); var ct = new iText.Kernel.Utils.CompareTool(); var result = ct.CompareByCatalog(pdfOutput, pdfShall); testContext.WriteLine(string.Format("Diff of {0} <-> {1}", filenameOutput, filenameShall)); foreach (var dif in result.GetDifferences()) { testContext.WriteLine(dif.Value); } return(result.GetDifferences().Count <= nAcceptedDifferences); }
//[Benchmark] //public void iText_Split_125Mb_gt_7500pages_10_pages() //{ // RuniTextBenchmark("sample_125Mb_gt_7500pages.pdf", 10); //} public void RuniTextBenchmark(string fileToSplit, int splitByPagesNumber, int?pagesCountToProcess = null) { var srcFile = Path.Combine(_rootFolder, fileToSplit); var file = new FileInfo(srcFile); var name = file.Name.Substring(0, file.Name.LastIndexOf(".", StringComparison.Ordinal)); using (var reader = new iText.Kernel.Pdf.PdfReader(srcFile)) { var doc = new iText.Kernel.Pdf.PdfDocument(reader); var splitter = new CustomFileSplitter(doc, _resultsiTextFolder, name); var splittedDocuments = splitter.SplitByPageCount(splitByPagesNumber); foreach (var sd in splittedDocuments) { sd.Close(); } doc.Close(); } }
public static PdfDocument CompatibleOpen(MemoryStream inputStream, PdfDocumentOpenMode openMode) { PdfDocument pdfDocument = null; inputStream.Position = 0; try { pdfDocument = PdfReader.Open(inputStream, openMode); } catch (PdfSharp.Pdf.IO.PdfReaderException) { inputStream.Position = 0; MemoryStream outputStream = new MemoryStream(); iText.Kernel.Pdf.WriterProperties writerProperties = new iText.Kernel.Pdf.WriterProperties(); writerProperties.SetPdfVersion(iText.Kernel.Pdf.PdfVersion.PDF_1_4); iText.Kernel.Pdf.PdfReader pdfReader = new iText.Kernel.Pdf.PdfReader(inputStream); iText.Kernel.Pdf.PdfDocument pdfStamper = new iText.Kernel.Pdf.PdfDocument(pdfReader, new iText.Kernel.Pdf.PdfWriter(outputStream, writerProperties)); iText.Forms.PdfAcroForm pdfForm = iText.Forms.PdfAcroForm.GetAcroForm(pdfStamper, true); if (pdfForm != null) { pdfForm.FlattenFields(); } writerProperties.SetFullCompressionMode(false); pdfStamper.GetWriter().SetCloseStream(false); pdfStamper.Close(); pdfDocument = PdfReader.Open(outputStream, openMode); } return(pdfDocument); }