private static void KamilPdfTest(string input) { PDDocument doc = null; try { doc = PDDocument.load(input); PDFTextStripper stripper = new PDFTextStripper(); // stripper.getText(doc); Matrix line = stripper.getTextLineMatrix(); // int page_nr = stripper.getCurrentPageNo(); PDPage page = stripper.getCurrentPage(); Matrix line2 = stripper.getTextMatrix(); int char_cnt = stripper.getTotalCharCnt(); string article_start = stripper.getArticleStart(); string article_end = stripper.getArticleEnd(); string pdf = stripper.getText(doc); // wrzuca caly tekst do sringa - dziala char_cnt = pdf.Length; } finally { if (doc != null) { doc.close(); } } }
/// <summary> /// Get a thumbnail of the document, if possible /// </summary> /// <param name="sizeX">The maximum X size of the thumbnail</param> /// <param name="sizeY">The maximum y size of the thumbnail</param> /// <param name="forceFullSize">True if the thumbnail should be exatly XxY pixels and False if the thumbnail /// should fit inside a XxY box but should maintain its aspect ratio</param> /// <returns>A JPEG byte thumbnail or null if the thumbnail can´t be generated</returns> public override byte[] GetThumbnail(int sizeX, int sizeY, bool forceFullSize) { // If we have no bytes then we can't do anything. if (Bytes == null || Bytes.Length == 0) { return(null); } try { org.pdfbox.pdfviewer.PageDrawer pagedrawer = new org.pdfbox.pdfviewer.PageDrawer(); java.io.ByteArrayInputStream byteStream = new java.io.ByteArrayInputStream(Bytes); PDDocument doc = PDDocument.load(byteStream); int count = doc.getNumberOfPages(); java.util.List pages = doc.getDocumentCatalog().getAllPages(); if (pages.size() > 0) { PDPage page = pagedrawer.getPage(); java.awt.image.BufferedImage image = page.convertToImage(); java.io.ByteArrayOutputStream os = new java.io.ByteArrayOutputStream(); ImageIO.write(image, "jpg", os); byte[] data = os.toByteArray(); return(data); } } catch (Exception e) { log.Error("Failed to get the thumbnail from the PDF file " + Name, e); } return(null); }
internal static bool AddBookmarkTooutline(PDFBookmarkItem bookmarentry, PDDocument document, PDOutlineItem outline) { bool flag; try { if (bookmarentry.BookMarkPage <= document.getNumberOfPages()) { PDPage page = document.getPage(bookmarentry.BookMarkPage - 1); PDPageFitWidthDestination pDPageFitWidthDestination = new PDPageFitWidthDestination(); pDPageFitWidthDestination.setPage(page); outline.setDestination(pDPageFitWidthDestination); outline.setTitle(bookmarentry.BookmarkTitle); } if ((bookmarentry.BookmarkItems == null ? false : bookmarentry.BookmarkItems.Count > 0)) { foreach (PDFBookmarkItem bookmarkItem in bookmarentry.BookmarkItems) { PDOutlineItem pDOutlineItem = new PDOutlineItem(); PDFHelper.AddBookmarkTooutline(bookmarkItem, document, pDOutlineItem); outline.addLast(pDOutlineItem); } } flag = true; } catch (Exception exception) { flag = false; } return(flag); }
/// <summary> /// The below method is an example from https://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/AddWatermarkText.java?revision=1873147&view=markup /// </summary> /// <param name="doc"></param> /// <param name="page"></param> /// <param name="font"></param> /// <param name="text"></param> static void addWatermarkText(PDDocument doc, PDPage page, PDFont font, string text) { using (PDPageContentStream cs = new PDPageContentStream(doc, page, PDPageContentStream.AppendMode.APPEND, true, true)) { float fontHeight = 100; // arbitrary for short text float width = page.getMediaBox().getWidth(); float height = page.getMediaBox().getHeight(); float stringWidth = font.getStringWidth(text) / 1000 * fontHeight; float diagonalLength = (float)System.Math.Sqrt(width * width + height * height); float angle = (float)System.Math.Atan2(height, width); float x = (diagonalLength - stringWidth) / 2; // "horizontal" position in rotated world float y = -fontHeight / 4; // 4 is a trial-and-error thing, this lowers the text a bit cs.transform(Matrix.getRotateInstance(angle, 0, 0)); cs.setFont(font, fontHeight); // cs.setRenderingMode(RenderingMode.STROKE) // for "hollow" effect PDExtendedGraphicsState gs = new PDExtendedGraphicsState(); gs.setNonStrokingAlphaConstant(new Float(0.2f)); gs.setStrokingAlphaConstant(new Float(0.2f)); gs.setBlendMode(BlendMode.MULTIPLY); gs.setLineWidth(new Float(3f)); cs.setGraphicsStateParameters(gs); cs.setNonStrokingColor(Color.red); cs.setStrokingColor(Color.red); cs.beginText(); cs.newLineAtOffset(x, y); cs.showText(text); cs.endText(); } }
private bool ConvertOthersToPDF(string imagePath) { try { PDPage pDPage = new PDPage(); string str = "temp.png"; this.pdfFile.addPage(pDPage); if (Path.GetExtension(imagePath).ToLower() == ".bmp") { ImageToPDFConverter.BmpToPng(new Bitmap(Image.FromFile(imagePath)), str); } else if (Path.GetExtension(imagePath).ToLower() == ".png") { str = imagePath; } else { Image.FromFile(imagePath).Save(str, ImageFormat.Png); } PDImageXObject pDImageXObject = PDImageXObject.createFromFile(str, this.pdfFile); PDRectangle pDRectangle = new PDRectangle((float)(pDImageXObject.getWidth() + 40), (float)(pDImageXObject.getHeight() + 40)); pDPage.setMediaBox(pDRectangle); PDPageContentStream pDPageContentStream = new PDPageContentStream(this.pdfFile, pDPage); pDPageContentStream.drawImage(pDImageXObject, 20f, 20f); pDPageContentStream.close(); } catch (Exception exception1) { Exception exception = exception1; throw new PDFToolkitException(exception.Message, exception); } return(true); }
static void Main(string[] args) { if (args == null || args.Length < 2) { System.Console.WriteLine("Usage: " + AppDomain.CurrentDomain.FriendlyName + " <original PDF filename> <watermark text> [new PDF filename]" + Environment.NewLine + " For example: " + AppDomain.CurrentDomain.FriendlyName + " myDoc.pdf \"This is a Draft\""); } else { string origName = args[0]; string watermarkTxt = args[1]; if (!System.IO.File.Exists(origName)) { System.Console.WriteLine("Error: cannot find the original PDF file(" + origName + "). Please correct the filename or the path and try again."); } else { PDDocument origDoc = PDDocument.load(new java.io.File(origName)); // NOTE: PDDocument.load() only takes java.io.File, not System.IO.File from C#.Net PDPageTree allPages = origDoc.getPages(); PDFont font = PDType1Font.HELVETICA_BOLD; for (int i = 0, len = allPages.getCount(); i < len; ++i) { PDPage pg = (PDPage)allPages.get(i); addWatermarkText(origDoc, pg, font, "This is a draft!!!"); } origDoc.save("watermarked_" + origName); origDoc.close(); } } }
private bool ConvertTiffToPDF(string imagePath) { try { string[] strArrays = ImageToPDFConverter.SplitTiff(imagePath); for (int i = 0; i < (int)strArrays.Length; i++) { string str = strArrays[i]; PDPage pDPage = new PDPage(); this.pdfFile.addPage(pDPage); PDImageXObject pDImageXObject = PDImageXObject.createFromFile(str, this.pdfFile); PDRectangle pDRectangle = new PDRectangle((float)(pDImageXObject.getWidth() + 40), (float)(pDImageXObject.getHeight() + 40)); pDPage.setMediaBox(pDRectangle); PDPageContentStream pDPageContentStream = new PDPageContentStream(this.pdfFile, pDPage); pDPageContentStream.drawImage(pDImageXObject, 20f, 20f); pDPageContentStream.close(); } } catch (Exception exception1) { Exception exception = exception1; throw new PDFToolkitException(exception.Message, exception); } return(true); }
public PdfOcrResult Execute(byte[] bytes) { PDDocument document = null; try { LoadPdf(bytes, ref document); List allPages = document.getDocumentCatalog().getAllPages(); if (allPages.size() == 0) { throw new PdfNotReadableException("Pdf contains no readable content"); } //only first page PDPage page = (PDPage)allPages.get(0); PDStream contents = page.getContents(); if (contents == null) { throw new PdfNotReadableException("Pdf contains no readable content"); } var items = new PdfToCharacters().GetItems(page, page.findResources(), page.getContents().getStream()); if (items.Count == 0) { throw new PdfNotReadableException("Pdf contains no readable content"); } var mediaBox = page.findMediaBox(); var height = mediaBox?.getHeight() ?? 0; var width = mediaBox?.getWidth() ?? 0; var itemsArray = items.ToArray(); var keywords = ""; try { keywords = document.getDocumentInformation()?.getKeywords(); } catch (Exception) { } // we do not know if PDF box can fail on this, if there is no keywords or something else. We dont really care we just want the keywords if possible. return(new PdfOcrResult() { Items = itemsArray, Height = height, Width = width, Keywords = keywords }); } catch (PdfReadException) { throw; } catch (Exception e) { throw new PdfReadException("Pdf could not be loaded. It is not a redable pdf.", e); } finally { document?.close(); } }
public void WhenIUpdateMyAccountTo(string value) { _driver.Url = test_url + "/index.php?controller=identity"; PDPage pdPage = new PDPage(_driver); var tester = this.config.Get("Customer"); pdPage.UpdateFirstNameOnly(value, tester[1]); }
internal string GetText(PDDocument pdfDocument, int pageNumber, bool format) { string text; double num; string str = ""; if (format) { try { PDFHelper.DisplayTrialPopupIfNecessary(); PDFTextStripperByArea pDFTextStripperByArea = new PDFTextStripperByArea(); pDFTextStripperByArea.setSortByPosition(true); if (PDFHelper.AddStamp) { str = string.Concat(str, "You are using a trial license of PDF Toolkit, as a result only the first three pages would be extracted."); } PDPage page = pdfDocument.getPage(pageNumber - 1); PDRectangle cropBox = page.getCropBox(); int rotation = page.getRotation(); if (cropBox == null) { cropBox = page.getMediaBox(); } int num1 = 0; int num2 = 0; if (rotation % 180 != 0) { num = Math.Round((double)cropBox.getWidth()); num1 = int.Parse(num.ToString()); num = Math.Round((double)cropBox.getHeight()); num2 = int.Parse(num.ToString()); } else { num = Math.Round((double)cropBox.getHeight()); num1 = int.Parse(num.ToString()); num = Math.Round((double)cropBox.getWidth()); num2 = int.Parse(num.ToString()); } pDFTextStripperByArea.addRegion("class1", new java.awt.Rectangle(0, 0, num2, num1)); pDFTextStripperByArea.extractRegions(page); str = string.Concat(str, pDFTextStripperByArea.getTextForRegion("class1")); } catch (Exception exception1) { Exception exception = exception1; throw new PDFToolkitException(exception.Message, exception.InnerException); } text = str; } else { text = this.GetText(pdfDocument, pageNumber); } return(text); }
internal PDFBookmarkItem(PDOutlineItem pdfBoxBookmark, PDDocument doc) { this.PDFBoxBookmark = pdfBoxBookmark; PDPage pDPage = pdfBoxBookmark.findDestinationPage(doc); this.BookMarkPage = doc.getPages().indexOf(pDPage) + 1; this.bookmarkTitle = pdfBoxBookmark.getTitle(); this.BookmarkItems = new List <PDFBookmarkItem>(); }
public void ThenMyAccountShouldBe(string value) { _driver.Url = test_url + "/index.php?controller=identity"; PDPage pdPage = new PDPage(_driver); var actual = pdPage.GetFirstName(); Assert.That(value, Is.EqualTo(actual), "Firstname does not match:" + $"Expected: {value} but got {actual}"); }
public String tempTest() { String info = ""; info = pdfDoc.getDocumentInformation().getCreationDate().toString(); PDPage apage = new PDPage(); apage.getMetadata(); return(info); }
public void GivenMyAccountIs(string value) { _driver.Url = test_url + "/index.php?controller=identity"; PDPage pdPage = new PDPage(_driver); var tester = this.config.Get("Customer"); pdPage.UpdateFirstNameOnly(value, tester[1]); _driver.Url = test_url + "/index.php?controller=identity"; var actual = pdPage.GetFirstName(); if (actual != value) { throw new Exception("Precondtion Error: Unable to set firstname default"); } }
public void SplitByTopLevelBookmarks() { if (this.CheckOutput()) { PDFHelper.DisplayTrialPopupIfNecessary(); try { PDDocumentCatalog documentCatalog = this.pdfDocument.PDFBoxDocument.getDocumentCatalog(); PDDocumentOutline documentOutline = documentCatalog.getDocumentOutline(); if (documentOutline != null) { PDOutlineItem firstChild = documentOutline.getFirstChild(); PDPageTree pages = documentCatalog.getPages(); List <int> nums = new List <int>(); while (firstChild != null) { PDPage pDPage = firstChild.findDestinationPage(this.pdfDocument.PDFBoxDocument); nums.Add(pages.indexOf(pDPage)); firstChild = firstChild.getNextSibling(); } nums.Add(pages.getCount()); for (int i = 0; i < nums.Count - 1; i++) { int item = nums[i]; int num = nums[i + 1]; PDDocument pDDocument = new PDDocument(); for (int j = item; j < num; j++) { pDDocument.addPage(this.pdfDocument.PDFBoxDocument.getPage(j)); } pDDocument = PDFHelper.AddTrialStampIfNecessary(pDDocument); string str = string.Format("{0} [{1}].pdf", this.OutputFileName, i); pDDocument.save(Path.Combine(this.OutputFilePath, str)); pDDocument.close(); } } else { Console.WriteLine("This document does not contain any bookmarks."); } } catch (Exception exception1) { Exception exception = exception1; throw new PDFToolkitException(exception.Message, exception.InnerException); } } }
public void SplitByPageRanges(string pageRanges) { int num; int num1; int num2; int num3; if (this.CheckOutput()) { PDFHelper.DisplayTrialPopupIfNecessary(); try { string[] strArrays = pageRanges.Split(new char[] { ',' }); List list = IteratorUtils.toList(this.pdfDocument.PDFBoxDocument.getPages().iterator()); int num4 = list.size(); string[] strArrays1 = strArrays; for (int i = 0; i < (int)strArrays1.Length; i++) { string str = strArrays1[i]; if (this.ConfigureRange(str, num4, out num, out num1, out num2, out num3)) { List list1 = list.subList(num, num1); PDDocument pDDocument = new PDDocument(); for (int j = num2; j < list1.size(); j = j + num3) { PDPage pDPage = (PDPage)list1.@get(j); pDPage.getResources(); pDDocument.addPage(pDPage); } pDDocument = PDFHelper.AddTrialStampIfNecessary(pDDocument); string str1 = string.Format("{0} [{1}].pdf", this.OutputFileName, str); pDDocument.save(Path.Combine(this.OutputFilePath, str1)); } else { Console.WriteLine("Invalid range: {0}", str); } } } catch (Exception exception1) { Exception exception = exception1; throw new PDFToolkitException(exception.Message, exception.InnerException); } } }
private bool ConvertJPGToPDF(string imagePath) { try { PDPage pDPage = new PDPage(); this.pdfFile.addPage(pDPage); PDImageXObject pDImageXObject = PDImageXObject.createFromFile(imagePath, this.pdfFile); PDRectangle pDRectangle = new PDRectangle((float)(pDImageXObject.getWidth() + 40), (float)(pDImageXObject.getHeight() + 40)); pDPage.setMediaBox(pDRectangle); PDPageContentStream pDPageContentStream = new PDPageContentStream(this.pdfFile, pDPage); pDPageContentStream.drawImage(pDImageXObject, 20f, 20f); pDPageContentStream.close(); } catch (Exception exception1) { Exception exception = exception1; throw new PDFToolkitException(exception.Message, exception); } return(true); }
public void ExtractImages(string outputFolder, string prefix, bool addKey, int pageNumber) { PDFHelper.DisplayTrialPopupIfNecessary(); try { this.CheckOutputFolder(outputFolder); if (pageNumber > this.pdfDocument.getNumberOfPages()) { Console.WriteLine("Invalid page number."); } else { PDPage page = this.pdfDocument.getPage(pageNumber - 1); this.ProcessResources(page.getResources(), prefix, addKey); } } catch (Exception exception1) { Exception exception = exception1; throw new PDFToolkitException(exception.Message, exception); } }
internal string GetTextByArea(PDDocument pdfDocument, double X, double Y, double width, double height, int pageNumber) { string empty; try { PDFHelper.DisplayTrialPopupIfNecessary(); string str = string.Empty; if (PDFHelper.AddStamp) { str = string.Concat(str, "You are using a trial license of PDF Toolkit, as a result only the first three pages would be extracted."); } if ((width <= 0 ? true : height <= 0)) { Console.WriteLine("Sorry the length and width you provided are not greater than zero, no text will be extracted."); str = string.Empty; } else { PDPage page = pdfDocument.getPage(pageNumber - 1); string str1 = "region"; Rectangle2D num = new Rectangle2D.Double(X, Y, width, height); PDFTextStripperByArea pDFTextStripperByArea = new PDFTextStripperByArea(); pDFTextStripperByArea.addRegion(str1, num); pDFTextStripperByArea.extractRegions(page); str = string.Concat(str, pDFTextStripperByArea.getTextForRegion(str1).Replace("\r", string.Empty).Replace("\n", string.Empty)); } empty = str; } catch (Exception exception) { Console.WriteLine("Sorry an exception occured when the text was being extracted\n{0}", exception.Message); empty = string.Empty; } return(empty); }
public List <CharItem> GetItems(PDPage aPage, PDResources resources, COSStream cosStream) { processStream(aPage, resources, cosStream); return(_items); }
public static void ConvertToDoc(string pdfFilePath, string tempDir) { var pdfHeader = System.IO.Path.GetFileName(pdfFilePath); var underscoreIndex = pdfHeader.IndexOf('_'); pdfHeader = pdfHeader.Remove(0, underscoreIndex + 1); WordEx.AddTitle(pdfHeader); _pdfDoc = new PDDocument(); try { _pdfDoc = PDDocument.load(pdfFilePath); } catch { MessageBox.Show("Cant load pdf, try re-downloading:" + Environment.NewLine + pdfFilePath, "PDF Error"); return; } var pagelist = _pdfDoc.getDocumentCatalog().getAllPages(); for (int x = 0; x < pagelist.size(); x++) { //string pageTxt = GetPageText(x); //PDFPage pdfPage = GetPageHeader(pageTxt); //WordEx.AddHeader(pdfPage.Header); PDPage page = (PDPage)pagelist.get(x); PDResources pdResources = page.getResources(); Map pageImages = pdResources.getImages(); if (pageImages != null) { Iterator imageIter = pageImages.keySet().iterator(); while (imageIter.hasNext()) { String key = (String)imageIter.next(); PDXObjectImage pdxObjectImage = (PDXObjectImage)pageImages.get(key); var buffImage = pdxObjectImage.getRGBImage(); Bitmap theImage = buffImage.getBitmap(); if (!ContainsDocImage(theImage)) { //WordEx.AddImage(theImage, pdfPage.Header, pdfHeader); WordEx.AddImage(theImage, pdfHeader, pdfHeader); } else { theImage.Dispose(); } } } } string docText = _stripper.getText(_pdfDoc); WordEx.AddText(docText); foreach (Bitmap btmap in _docImages) { btmap.Dispose(); } _docImages.Clear(); _pdfDoc.close(); _pdfDoc = null; }
public override void processPage(PDPage page) { this.AddToPageList(); this.pageNo = this.pageNo + 1; base.processPage(page); }
public PDFPage() { this.page = new PDPage(); }
public PDFPage(PageSize pageSize) { this.page = new PDPage(); this.MediaBox = PDFHelper.GetPDFRectangle(pageSize); }
public PDFPage(PDFRectangle rectangle) { this.page = new PDPage(); this.MediaBox = rectangle; }
internal void createPDFFromText(string pdfFile) { string str; bool flag; PDDocument pDDocument = new PDDocument(); PDFHelper.DisplayTrialPopupIfNecessary(); PDFHelper.CheckOutputFolder(Path.GetDirectoryName(pdfFile)); try { this.pdfFileName = pdfFile; Reader fileReader = new FileReader(this.textFile); int num = 40; if (this.font == null) { this.font = PDFType1Font.TIMES_ROMAN; } float height = this.font.PDFBoxFont.getBoundingBox().getHeight() / 1000f; PDRectangle lETTER = PDRectangle.LETTER; if (this.isLandescape) { lETTER = new PDRectangle(lETTER.getHeight(), lETTER.getWidth()); } height = height * this.fontSize * 1.05f; string[] strArrays = System.IO.File.ReadAllLines(this.textFile); PDPage pDPage = new PDPage(lETTER); PDPageContentStream pDPageContentStream = null; float single = -1f; float width = pDPage.getMediaBox().getWidth() - (float)(2 * num); bool flag1 = true; string[] strArrays1 = strArrays; for (int i = 0; i < (int)strArrays1.Length; i++) { string str1 = strArrays1[i]; flag1 = false; string str2 = Regex.Replace(str1, "\\t|\\n|\\r", ""); string[] strArrays2 = str2.Replace("[\\n\\r]+$", "").Split(new char[] { ' ' }); int num1 = 0; while (num1 < (int)strArrays2.Length) { StringBuilder stringBuilder = new StringBuilder(); float stringWidth = 0f; bool flag2 = false; do { string str3 = ""; int num2 = strArrays2[num1].IndexOf('\f'); if (num2 != -1) { flag2 = true; str = strArrays2[num1].Substring(0, num2); if (num2 < strArrays2[num1].Length) { str3 = strArrays2[num1].Substring(num2 + 1); } } else { str = strArrays2[num1]; } if ((str.Length > 0 ? true : !flag2)) { stringBuilder.Append(str); stringBuilder.Append(" "); } if ((!flag2 ? false : str3.Length != 0)) { strArrays2[num1] = str3; } else { num1++; } if (!flag2) { if (num1 < (int)strArrays2.Length) { string str4 = strArrays2[num1]; num2 = str4.IndexOf('\f'); if (num2 != -1) { str4 = str4.Substring(0, num2); } string str5 = string.Concat(stringBuilder.ToString(), " ", str4); try { stringWidth = this.font.PDFBoxFont.getStringWidth(str5) / 1000f * this.fontSize; } catch (Exception exception) { } } flag = (num1 >= (int)strArrays2.Length ? false : stringWidth < width); } else { break; } }while (flag); if (single < (float)num) { pDPage = new PDPage(lETTER); pDDocument.addPage(pDPage); if (pDPageContentStream != null) { pDPageContentStream.endText(); pDPageContentStream.close(); } pDPageContentStream = new PDPageContentStream(pDDocument, pDPage); pDPageContentStream.setFont(this.font.PDFBoxFont, this.fontSize); pDPageContentStream.beginText(); single = pDPage.getMediaBox().getHeight() - (float)num + height; pDPageContentStream.newLineAtOffset((float)num, single); } if (pDPageContentStream == null) { throw new java.io.IOException("Error:Expected non-null content stream."); } pDPageContentStream.newLineAtOffset(0f, -height); single = single - height; try { pDPageContentStream.showText(stringBuilder.ToString()); if (flag2) { pDPage = new PDPage(lETTER); pDDocument.addPage(pDPage); pDPageContentStream.endText(); pDPageContentStream.close(); pDPageContentStream = new PDPageContentStream(pDDocument, pDPage); pDPageContentStream.setFont(this.font.PDFBoxFont, this.fontSize); pDPageContentStream.beginText(); single = pDPage.getMediaBox().getHeight() - (float)num + height; pDPageContentStream.newLineAtOffset((float)num, single); } } catch (Exception exception1) { } } } if (flag1) { pDDocument.addPage(pDPage); } if (pDPageContentStream != null) { pDPageContentStream.endText(); pDPageContentStream.close(); } if (PDFHelper.AddStamp) { pDDocument = PDFHelper.AddTrialStampIfNecessary(pDDocument); } try { pDDocument.save(pdfFile); } catch (Exception exception3) { Exception exception2 = exception3; throw new PDFToolkitException(exception2.Message, exception2.InnerException); } } catch (Exception exception5) { Exception exception4 = exception5; if (pDDocument != null) { pDDocument.close(); } throw exception4; } }