/** * Put all data from given day into given table */ public static void collectDataforDay(DateTime dateTime, SQLiteConnection m_dbConnection) { string zeroMonth = dateTime.Month < 10 ? "0" : ""; string zeroDay = dateTime.Day < 10 ? "0" : ""; string date = zeroMonth + dateTime.Month + "/" + zeroDay + dateTime.Day + "/" + dateTime.Year; PdfReader reader; try { reader = new PdfReader("http://www.equibase.com/premium/eqbPDFChartPlus.cfm?RACE=A&BorP=P&TID=SAR&CTRY=USA&DT=" + date + "&DAY=D&STYLE=EQB"); } catch (Exception e) { Console.WriteLine("CAPTCHA TIME"); Console.ReadKey(); Console.ReadKey(); reader = new PdfReader("http://www.equibase.com/premium/eqbPDFChartPlus.cfm?RACE=A&BorP=P&TID=SAR&CTRY=USA&DT=" + date + "&DAY=D&STYLE=EQB"); } StringBuilder builder = new StringBuilder(); for (int x = 1; x <= reader.NumberOfPages; x++) { PdfDictionary page = reader.GetPageN(x); IRenderListener listener = new SBTextRenderer(builder); PdfContentStreamProcessor processor = new PdfContentStreamProcessor(listener); PdfDictionary pageDic = reader.GetPageN(x); PdfDictionary resourcesDic = pageDic.GetAsDict(PdfName.RESOURCES); processor.ProcessContent(ContentByteUtils.GetContentBytesForPage(reader, x), resourcesDic); } if (pages.Count != 0) { DataHandler handler = new DataHandler(dateTime, pages, m_dbConnection); Thread thread = new Thread(new ThreadStart(handler.extractPdfData)); thread.Start(); thread.Join(); reader.Dispose(); pages.Clear(); } else { // If there were no races on this particular day, simply skip it! :D Console.WriteLine("Invalid Date: " + date); } }
virtual public void TestConstructionForType0WithoutToUnicodeMap() { int pageNum = 2; PdfName fontIdName = new PdfName("TT9"); string testFile = TestResourceUtils.GetResourceAsTempFile(TEST_RESOURCES_PATH, "type0FontWithoutToUnicodeMap.pdf"); RandomAccessFileOrArray f = new RandomAccessFileOrArray(testFile); PdfReader reader = new PdfReader(f, null); try { PdfDictionary fontsDic = reader.GetPageN(pageNum).GetAsDict(PdfName.RESOURCES).GetAsDict(PdfName.FONT); PdfDictionary fontDicDirect = fontsDic.GetAsDict(fontIdName); PRIndirectReference fontDicIndirect = (PRIndirectReference)fontsDic.Get(fontIdName); Assert.AreEqual(PdfName.TYPE0, fontDicDirect.GetAsName(PdfName.SUBTYPE)); Assert.AreEqual("/Identity-H", fontDicDirect.GetAsName(PdfName.ENCODING).ToString()); Assert.IsNull(fontDicDirect.Get(PdfName.TOUNICODE), "This font should not have a ToUnicode map"); new DocumentFont(fontDicIndirect); // this used to throw an NPE } finally { reader.Close(); } }
public static void CropPdf() { var xll = 200; var yll = 170; var w = 800; var h = 800; var reader = new iTextSharp.text.pdf.PdfReader(@"C:\Projects\31g\trunk\temp\pdf\20140208110036_20.pdf"); var n = reader.NumberOfPages; iTextSharp.text.pdf.PdfDictionary pageDict; var pfgRect = new iTextSharp.text.pdf.PdfRectangle(xll, yll, w, h); for (var i = 1; i <= n; i++) { pageDict = reader.GetPageN(i); pageDict.Put(iTextSharp.text.pdf.PdfName.CROPBOX, pfgRect); } var stamper = new iTextSharp.text.pdf.PdfStamper(reader, new System.IO.FileStream(string.Format(@"C:\Projects\31g\trunk\Notes\misc\Maps\Europe_565BCE.pdf", xll, yll, w, h), FileMode.Create)); stamper.Close(); reader.Close(); }
/// <summary> /// Removes layers from a PDF document </summary> /// <param name="reader"> a PdfReader containing a PDF document </param> /// <param name="layers"> a sequence of names of OCG layers </param> /// <exception cref="IOException"> </exception> public virtual void RemoveLayers(PdfReader reader, params string[] layers) { int n = reader.NumberOfPages; ICollection<string> ocgs = new HashSet2<string>(); for (int i = 0; i < layers.Length; i++) { ocgs.Add(layers[i]); } OCGParser parser = new OCGParser(ocgs); for (int i = 1; i <= n; i++) { PdfDictionary page = reader.GetPageN(i); Parse(parser, page); page.Remove(new PdfName("PieceInfo")); RemoveAnnots(page, ocgs); RemoveProperties(page, ocgs); } PdfDictionary root = reader.Catalog; PdfDictionary ocproperties = root.GetAsDict(PdfName.OCPROPERTIES); RemoveOCGsFromArray(ocproperties, PdfName.OCGS, ocgs); PdfDictionary d = ocproperties.GetAsDict(PdfName.D); if (d != null) { RemoveOCGsFromArray(d, PdfName.ON, ocgs); RemoveOCGsFromArray(d, PdfName.OFF, ocgs); RemoveOCGsFromArray(d, PdfName.LOCKED, ocgs); RemoveOCGsFromArray(d, PdfName.RBGROUPS, ocgs); RemoveOCGsFromArray(d, PdfName.ORDER, ocgs); RemoveOCGsFromArray(d, PdfName.AS, ocgs); } reader.RemoveUnusedObjects(); }
public static bool IsScannedPdf(string pdfFilePath) { // Start analyzing the PDF PdfReader reader = new PdfReader(pdfFilePath); PdfDictionary resources; try { // Go through all the pages for (int p = 1; p <= reader.NumberOfPages; p++) { // Find the embedded resources PdfDictionary dic = reader.GetPageN(p); resources = dic.GetAsDict(PdfName.RESOURCES); if (resources != null) { // If we have any embedded font, it's not scanned if (resources.GetAsDict(PdfName.FONT) != null) return false; } } return true; } finally { reader.Close(); } }
public static void ExtractImagesFromPDF(string password, string key, string docPath, string pagePath, PageCollection pages) { Page page = null; // NOTE: This will only get the first image it finds per page. PdfReader pdf = new PdfReader(Utility.Security.AES.DecryptFile(key, docPath)); //RandomAccessFileOrArray raf = new iTextSharp.text.pdf.RandomAccessFileOrArray(p); try { for (int pageNumber = 1; pageNumber <= pdf.NumberOfPages; pageNumber++) { PdfDictionary pg = pdf.GetPageN(pageNumber); // recursively search pages, forms and groups for images. PdfObject obj = FindImageInPDFDictionary(pg); if (obj != null) { int XrefIndex = Convert.ToInt32(((PRIndirectReference)obj).Number.ToString(System.Globalization.CultureInfo.InvariantCulture)); PdfObject pdfObj = pdf.GetPdfObject(XrefIndex); PdfStream pdfStrem = (PdfStream)pdfObj; byte[] bytes = PdfReader.GetStreamBytesRaw((PRStream)pdfStrem); if ((bytes != null)) { using (System.IO.MemoryStream memStream = new System.IO.MemoryStream(bytes)) { memStream.Position = 0; System.Drawing.Image img = System.Drawing.Image.FromStream(memStream); // must save the file while stream is open. page = new Page(); page.Order = pages.Count; page.Save(); page.Token = Utility.Security.AES.GetToken(page.Id, password); //string path = System.IO.Path.Combine(page.Filename, String.Format(@"{0}.jpg", pageNumber)); System.Drawing.Imaging.EncoderParameters parms = new System.Drawing.Imaging.EncoderParameters(1); parms.Param[0] = new System.Drawing.Imaging.EncoderParameter(System.Drawing.Imaging.Encoder.Compression, 0); System.Drawing.Imaging.ImageCodecInfo jpegEncoder = System.Drawing.Imaging.ImageCodecInfo.GetImageEncoders().FirstOrDefault(e => e.FormatDescription == "JPEG"); System.IO.MemoryStream ms = new System.IO.MemoryStream(); img.Save(ms, jpegEncoder, parms); System.IO.File.WriteAllBytes(System.IO.Path.Combine(pagePath, page.Filename), SoftFluent.Samples.GED.Utility.Security.AES.EncryptStream(page.Token, ms.ToArray()).ToArray()); ms.Close(); pages.Add(page); } } } } } catch { throw; } finally { pdf.Close(); //raf.Close(); } }
/** * Gets the content bytes of a page from a reader * @param reader the reader to get content bytes from * @param pageNum the page number of page you want get the content stream from * @return a byte array with the effective content stream of a page * @throws IOException * @since 5.0.1 */ public static byte[] GetContentBytesForPage(PdfReader reader, int pageNum) { PdfDictionary pageDictionary = reader.GetPageN(pageNum); PdfObject contentObject = pageDictionary.Get(PdfName.CONTENTS); if (contentObject == null) return new byte[0]; byte[] contentBytes = ContentByteUtils.GetContentBytesFromContentObject(contentObject); return contentBytes; }
// --------------------------------------------------------------------------- /** * Creates a HashSet containing information about the fonts in the src PDF file. * @param src the PDF file * * HashSet only available in .NET >= 3.5 */ public HashSet<String> ListFonts(byte[] src) { HashSet<String> set = new HashSet<String>(); PdfReader reader = new PdfReader(src); PdfDictionary resources; for (int k = 1; k <= reader.NumberOfPages; ++k) { resources = reader.GetPageN(k).GetAsDict(PdfName.RESOURCES); ProcessResource(set, resources); } return set; }
public static void ProcessContentPage(PdfReader reader, int page, Test_iTextSharp.ITextExtractionStrategy strategy) { PdfReaderContentParser parser = new PdfReaderContentParser(reader); PdfDictionary pageDic = reader.GetPageN(page); PdfDictionary resourcesDic = pageDic.GetAsDict(PdfName.RESOURCES); Test_iTextSharp.PdfContentStreamProcessor processor = new Test_iTextSharp.PdfContentStreamProcessor(strategy); byte[] bytes = ContentByteUtils.GetContentBytesForPage(reader, page); processor.ProcessContent(bytes, resourcesDic); }
public static void ExtractImagesFromPDF(string sourcePdf, string outputPath) { // NOTE: This will only get the first image it finds per page. PdfReader pdf = new PdfReader(sourcePdf); RandomAccessFileOrArray raf = new iTextSharp.text.pdf.RandomAccessFileOrArray(sourcePdf); try { for (int pageNumber = 1; pageNumber <= pdf.NumberOfPages; pageNumber++) { PdfDictionary pg = pdf.GetPageN(pageNumber); // recursively search pages, forms and groups for images. PdfObject obj = FindImageInPDFDictionary(pg); if (obj != null) { int XrefIndex = Convert.ToInt32(((PRIndirectReference)obj).Number.ToString(System.Globalization.CultureInfo.InvariantCulture)); PdfObject pdfObj = pdf.GetPdfObject(XrefIndex); PdfStream pdfStrem = (PdfStream)pdfObj; byte[] bytes = PdfReader.GetStreamBytesRaw((PRStream)pdfStrem); if ((bytes != null)) { using (System.IO.MemoryStream memStream = new System.IO.MemoryStream(bytes)) { memStream.Position = 0; System.Drawing.Image img = System.Drawing.Image.FromStream(memStream); // must save the file while stream is open. if (!Directory.Exists(outputPath)) Directory.CreateDirectory(outputPath); string path = Path.Combine(outputPath, String.Format(@"{0}.jpg", pageNumber)); System.Drawing.Imaging.EncoderParameters parms = new System.Drawing.Imaging.EncoderParameters(1); parms.Param[0] = new System.Drawing.Imaging.EncoderParameter(System.Drawing.Imaging.Encoder.Compression, 0); System.Drawing.Imaging.ImageCodecInfo jpegEncoder = GetImageEncoder("JPEG"); img.Save(path, jpegEncoder, parms); } } } } } catch { throw; } finally { pdf.Close(); raf.Close(); } }
public static void RotatePdf() { var reader = new iTextSharp.text.pdf.PdfReader(@"C:\Projects\31g\trunk\temp\pdf\Europe_565BCE.pdf"); var pageDict = reader.GetPageN(1); pageDict.Put(iTextSharp.text.pdf.PdfName.ROTATE, new iTextSharp.text.pdf.PdfNumber(270)); var stamper = new iTextSharp.text.pdf.PdfStamper(reader, new System.IO.FileStream(@"C:\Projects\31g\trunk\Notes\misc\Maps\Europe_565BCE.pdf", FileMode.Create)); stamper.Close(); reader.Close(); }
public void testPageResources() { string testFile = TestResourceUtils.GetResourceAsTempFile(TEST_RESOURCES_PATH, "getLinkTest2.pdf"); String filename = testFile; PdfReader rdr = new PdfReader(new RandomAccessFileOrArray(filename), new byte[0]); PdfDictionary pageResFromNum = rdr.GetPageResources(1); PdfDictionary pageResFromDict = rdr.GetPageResources(rdr.GetPageN(1)); // same size & keys Assert.IsTrue(pageResFromNum.Keys.Equals(pageResFromDict.Keys)); rdr.Close(); }
public void extractSnippets(String src, String dest) { TextWriter output = new StreamWriter(new FileStream(dest, FileMode.Create)); PdfReader reader = new PdfReader(src); IRenderListener listener = new MyTextRenderListener(output); PdfContentStreamProcessor processor = new PdfContentStreamProcessor(listener); PdfDictionary pageDic = reader.GetPageN(1); PdfDictionary resourcesDic = pageDic.GetAsDict(PdfName.RESOURCES); processor.ProcessContent(ContentByteUtils.GetContentBytesForPage(reader, 1), resourcesDic); output.Flush(); output.Close(); reader.Close(); }
public void TrimPDFFile(Stream outputStream, Stream inputStream, PageLayout pageLayout) { using (var reader = new PdfReader(inputStream)) { var inputPageSize = reader.GetPageSize(1); var inputPage = reader.GetPageN(1); inputPage.Put(PdfName.MEDIABOX, new PdfRectangle(PageLayoutA4.GetLabelRect(0))); using (var stamper = new PdfStamper(reader, outputStream)) { stamper.Writer.CloseStream = false; stamper.MarkUsed(inputPage); } } }
// --------------------------------------------------------------------------- /** * Manipulates a PDF file src with the file dest as result * @param src the original PDF */ public byte[] ManipulatePdf(byte[] src) { PdfReader reader = new PdfReader(src); int n = reader.NumberOfPages; PdfDictionary pageDict; PdfRectangle rect = new PdfRectangle(55, 76, 560, 816); for (int i = 1; i <= n; i++) { pageDict = reader.GetPageN(i); pageDict.Put(PdfName.CROPBOX, rect); } using (MemoryStream ms = new MemoryStream()) { using (PdfStamper stamper = new PdfStamper(reader, ms)) { } return ms.ToArray(); } }
// --------------------------------------------------------------------------- /** * Manipulates a PDF file src * @param src the original PDF */ public byte[] ManipulatePdf(byte[] src) { PdfReader reader = new PdfReader(src); int n = reader.NumberOfPages; int rot; PdfDictionary pageDict; for (int i = 1; i <= n; i++) { rot = reader.GetPageRotation(i); pageDict = reader.GetPageN(i); pageDict.Put(PdfName.ROTATE, new PdfNumber(rot + 90)); } using (MemoryStream ms = new MemoryStream()) { using (PdfStamper stamper = new PdfStamper(reader, ms)) { } return ms.ToArray(); } }
public void ManipulatePdf(string src, string dest) { PdfReader reader = new PdfReader(src); // We assume that there's a single large picture on the first page PdfDictionary page = reader.GetPageN(1); PdfDictionary resources = page.GetAsDict(PdfName.RESOURCES); PdfDictionary xobjects = resources.GetAsDict(PdfName.XOBJECT); Dictionary<PdfName, PdfObject>.KeyCollection.Enumerator enumerator = xobjects.Keys.GetEnumerator(); enumerator.MoveNext(); PdfName imgName = enumerator.Current; Image img = Image.GetInstance((PRIndirectReference) xobjects.GetAsIndirectObject(imgName)); img.SetAbsolutePosition(0, 0); img.ScaleAbsolute(reader.GetPageSize(1)); PdfStamper stamper = new PdfStamper(reader, new FileStream(dest,FileMode.Create)); stamper.GetOverContent(1).AddImage(img); stamper.Close(); reader.Close(); }
// --------------------------------------------------------------------------- /** * Parses object and content information of a PDF into a text file. * @param pdf the original PDF * * this method uses code from; * PdfContentReaderTool.ListContentStreamForPage() * so i can pass in a byte array instead of file path * */ public string InspectPdf(byte[] pdf) { PdfReader reader = new PdfReader(pdf); int maxPageNum = reader.NumberOfPages; StringBuilder sb = new StringBuilder(); for (int pageNum = 1; pageNum <= maxPageNum; pageNum++){ sb.AppendLine("==============Page " + pageNum + "===================="); sb.AppendLine("- - - - - Dictionary - - - - - -"); PdfDictionary pageDictionary = reader.GetPageN(pageNum); sb.AppendLine( PdfContentReaderTool.GetDictionaryDetail(pageDictionary) ); sb.AppendLine("- - - - - XObject Summary - - - - - -"); sb.AppendLine(PdfContentReaderTool.GetXObjectDetail( pageDictionary.GetAsDict(PdfName.RESOURCES)) ); sb.AppendLine("- - - - - Content Stream - - - - - -"); RandomAccessFileOrArray f = reader.SafeFile; byte[] contentBytes = reader.GetPageContent(pageNum, f); f.Close(); foreach (byte b in contentBytes) { sb.Append((char)b); } sb.AppendLine("- - - - - Text Extraction - - - - - -"); String extractedText = PdfTextExtractor.GetTextFromPage( reader, pageNum, new LocationTextExtractionStrategy() ); if (extractedText.Length != 0) { sb.AppendLine(extractedText); } else { sb.AppendLine("No text found on page " + pageNum); } sb.AppendLine(); } return sb.ToString(); }
// --------------------------------------------------------------------------- /** * Manipulates a PDF file src with the file dest as result * @param src the original PDF */ public byte[] ManipulatePdf(string src) { PdfReader reader = new PdfReader(src); PdfDictionary root = reader.Catalog; PdfDictionary form = root.GetAsDict(PdfName.ACROFORM); PdfArray fields = form.GetAsArray(PdfName.FIELDS); PdfDictionary page; PdfArray annots; for (int i = 1; i <= reader.NumberOfPages; i++) { page = reader.GetPageN(i); annots = page.GetAsArray(PdfName.ANNOTS); for (int j = 0; j < annots.Size; j++) { fields.Add(annots.GetAsIndirectObject(j)); } } using (MemoryStream ms = new MemoryStream()) { using (PdfStamper stamper = new PdfStamper(reader, ms)) { } return ms.ToArray(); } }
// --------------------------------------------------------------------------- /** * Extracts attachments from an existing PDF. * @param src the path to the existing PDF * @param zip the ZipFile object to add the extracted images */ public void ExtractAttachments(byte[] src, ZipFile zip) { PdfReader reader = new PdfReader(src); for (int i = 1; i <= reader.NumberOfPages; i++) { PdfArray array = reader.GetPageN(i).GetAsArray(PdfName.ANNOTS); if (array == null) continue; for (int j = 0; j < array.Size; j++) { PdfDictionary annot = array.GetAsDict(j); if (PdfName.FILEATTACHMENT.Equals( annot.GetAsName(PdfName.SUBTYPE))) { PdfDictionary fs = annot.GetAsDict(PdfName.FS); PdfDictionary refs = fs.GetAsDict(PdfName.EF); foreach (PdfName name in refs.Keys) { zip.AddEntry( fs.GetAsString(name).ToString(), PdfReader.GetStreamBytes((PRStream)refs.GetAsStream(name)) ); } } } } }
public virtual void SpaceTrimColumnTextTest() { Document doc = new Document(PageSize.A4, 50, 30, 50, 30); PdfWriter writer = PdfWriter.GetInstance(doc, new FileStream(OUTSPTRIMCT, FileMode.Create)); doc.Open(); Phrase under = new Phrase(); under.Font = new Font(Font.FontFamily.TIMES_ROMAN, 12, Font.UNDERLINE); under.Add(new Chunk(" 1 1 9 ")); Paragraph underlineTest = new Paragraph(under); underlineTest.KeepTogether = true; doc.Add(underlineTest); doc.Close(); writer.Close(); PdfReader reader = new PdfReader(OUTSPTRIMCT); MyTextRenderListener listener = new MyTextRenderListener(); PdfContentStreamProcessor processor = new PdfContentStreamProcessor(listener); PdfDictionary pageDic = reader.GetPageN(1); PdfDictionary resourcesDic = pageDic.GetAsDict(PdfName.RESOURCES); processor.ProcessContent(ContentByteUtils.GetContentBytesForPage(reader, 1), resourcesDic); Assert.IsTrue(listener.GetText().Length == 60, "Unexpected text length"); }
/// <summary>Parses images from pdf document.</summary> /// <param name="filePath">The pdf-file full path.</param> /// <returns>Collection of images and streams that are associated with them.</returns> public static List<ParsedImage> ParseImages(string filePath) { var imgList = new List<ParsedImage>(); var raf = new RandomAccessFileOrArray(filePath); var reader = new PdfReader(raf, null); try { for (var pageNumber = 1; pageNumber <= reader.NumberOfPages; pageNumber++) { var pg = reader.GetPageN(pageNumber); var size = reader.GetPageSize(pageNumber); var res = (PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES)); var xobj = (PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT)); if (xobj == null) { continue; } foreach (var name in xobj.Keys) { var obj = xobj.Get(name); if (!obj.IsIndirect()) { continue; } var tg = (PdfDictionary)PdfReader.GetPdfObject(obj); var type = (PdfName)PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE)); if (!PdfName.IMAGE.Equals(type)) { continue; } var refIndex = Convert.ToInt32(((PRIndirectReference)obj).Number.ToString(CultureInfo.InvariantCulture)); var pdfObj = reader.GetPdfObject(refIndex); var pdfStrem = (PdfStream)pdfObj; var bytes = PdfReader.GetStreamBytesRaw((PRStream)pdfStrem); if (bytes == null) { continue; } var memStream = new MemoryStream(bytes) { Position = 0 }; var img = Image.FromStream(memStream); imgList.Add(new ParsedImage { Image = img, ImageStream = memStream, Format = img.RawFormat, Width = size.Width, Height = size.Height, PerformedRotation = RotateFlipType.RotateNoneFlipNone }); } } } catch (Exception exception) { Console.WriteLine(exception.Message); } finally { reader.Close(); raf.Close(); } return imgList; }
void ManipulatePdf(string src, string dest, MetaData md) { using (var reader = new ip.PdfReader(src)) { var catalog = reader.Catalog; var structTreeRoot = catalog.GetAsDict(ip.PdfName.STRUCTTREEROOT); Manipulate(structTreeRoot); using (var stamper = new ip.PdfStamper(reader, new FileStream(dest, FileMode.Create))) { var page = reader.GetPageN(1); using (var ms = new MemoryStream()) { var dic = new ip.PdfDictionary(); DateTime time = DateTime.Now; if (reader.Info.ContainsKey(ip.PdfName.CREATIONDATE.ToString().Substring(1))) { var temp = reader.Info[ip.PdfName.CREATIONDATE.ToString().Substring(1)].Substring(2).Replace('\'', ':'); temp = temp.Substring(0, temp.Length - 1); time = DateTime.ParseExact(temp, "yyyyMMddHHmmsszzz", CultureInfo.InvariantCulture); } dic.Put(ip.PdfName.PRODUCER, new ip.PdfString(md.Creator)); dic.Put(ip.PdfName.TITLE, new ip.PdfString(Path.GetFileNameWithoutExtension(dest))); dic.Put(ip.PdfName.CREATOR, new ip.PdfString(md.Creator)); dic.Put(ip.PdfName.AUTHOR, new ip.PdfString(md.Author)); dic.Put(ip.PdfName.CREATIONDATE, new ip.PdfDate(time)); var xmp = new XmpWriter(ms, dic); xmp.Close(); var reference = stamper.Writer.AddToBody(new ip.PdfStream(ms.ToArray())); page.Put(ip.PdfName.METADATA, reference.IndirectReference); if (ms != null) { var d = Encoding.UTF8.GetString(ms.ToArray()); var xml = new XmlDocument(); xml.LoadXml(d); var node = xml.DocumentElement.FirstChild; node = node.FirstChild; if (node != null) { //node.AppendAttribute("xmlns:pdfaid", "http://www.aiim.org/pdfa/ns/id/"); var attrId = xml.CreateAttribute("xmlns:pdfaid"); attrId.Value = "http://www.aiim.org/pdfa/ns/id/"; node.Attributes.Append(attrId); var attrPart = xml.CreateAttribute("pdfaid:part", "http://www.aiim.org/pdfa/ns/id/"); attrPart.Value = "1"; node.Attributes.Append(attrPart); var attrConf = xml.CreateAttribute("pdfaid:conformance", "http://www.aiim.org/pdfa/ns/id/"); attrConf.Value = "A"; node.Attributes.Append(attrConf); if (md.CustomMetadata != null && md.CustomMetadata.Length > 0) { var dataNode = node.OwnerDocument.CreateElement("CustomMetaData"); node.AppendChild(dataNode); dataNode.InnerText = System.Convert.ToBase64String(md.CustomMetadata); } } ms.Position = 0; xml.Save(ms); d = Encoding.UTF8.GetString(ms.ToArray()); } stamper.XmpMetadata = ms.ToArray(); stamper.Close(); reader.Close(); } } } }
/** * Gets a list of the document fonts in a particular page. Each element of the <CODE>ArrayList</CODE> * contains a <CODE>Object[]{String,PRIndirectReference}</CODE> with the font name * and the indirect reference to it. * @param reader the document where the fonts are to be listed from * @param page the page to list the fonts from * @return the list of fonts and references */ public static List<object[]> GetDocumentFonts(PdfReader reader, int page) { IntHashtable hits = new IntHashtable(); List<object[]> fonts = new List<object[]>(); RecourseFonts(reader.GetPageN(page), hits, fonts, 1); return fonts; }
/** * Writes information about a specific page from PdfReader to the specified output stream. * @since 2.1.5 * @param reader the PdfReader to read the page content from * @param pageNum the page number to read * @param out the output stream to send the content to * @throws IOException */ public static void ListContentStreamForPage(PdfReader reader, int pageNum, TextWriter outp) { outp.WriteLine("==============Page " + pageNum + "===================="); outp.WriteLine("- - - - - Dictionary - - - - - -"); PdfDictionary pageDictionary = reader.GetPageN(pageNum); outp.WriteLine(GetDictionaryDetail(pageDictionary)); outp.WriteLine("- - - - - XObject Summary - - - - - -"); outp.WriteLine(GetXObjectDetail(pageDictionary.GetAsDict(PdfName.RESOURCES))); outp.WriteLine("- - - - - Content Stream - - - - - -"); RandomAccessFileOrArray f = reader.SafeFile; byte[] contentBytes = reader.GetPageContent(pageNum, f); f.Close(); outp.Flush(); foreach (byte b in contentBytes) { outp.Write((char)b); } outp.Flush(); outp.WriteLine("- - - - - Text Extraction - - - - - -"); String extractedText = PdfTextExtractor.GetTextFromPage(reader, pageNum, new LocationTextExtractionStrategy()); if (extractedText.Length != 0) outp.WriteLine(extractedText); else outp.WriteLine("No text found on page " + pageNum); outp.WriteLine(); }
virtual public void CopyTaggedPdf19() { InitializeDocument("19"); PdfReader reader = new PdfReader(SOURCE18); copy.AddPage(copy.GetImportedPage(reader, 1, true)); document.Close(); reader.Close(); reader = new PdfReader(output); PdfDictionary page1 = reader.GetPageN(1); PdfDictionary t1_0 = page1.GetAsDict(PdfName.RESOURCES).GetAsDict(PdfName.XOBJECT).GetAsStream(new PdfName("Fm0")).GetAsDict(PdfName.RESOURCES).GetAsDict(PdfName.FONT).GetAsDict(new PdfName("T1_0")); Assert.NotNull(t1_0); reader.Close(); }
/** * Gets a list of all document fonts. Each element of the <CODE>ArrayList</CODE> * contains a <CODE>Object[]{String,PRIndirectReference}</CODE> with the font name * and the indirect reference to it. * @param reader the document where the fonts are to be listed from * @return the list of fonts and references */ public static List<object[]> GetDocumentFonts(PdfReader reader) { IntHashtable hits = new IntHashtable(); List<object[]> fonts = new List<object[]>(); int npages = reader.NumberOfPages; for (int k = 1; k <= npages; ++k) RecourseFonts(reader.GetPageN(k), hits, fonts, 1); return fonts; }
/** * Gets a list of all document fonts. Each element of the <CODE>ArrayList</CODE> * contains a <CODE>Object[]{String,PRIndirectReference}</CODE> with the font name * and the indirect reference to it. * @param reader the document where the fonts are to be listed from * @return the list of fonts and references */ public static ArrayList GetDocumentFonts(PdfReader reader) { IntHashtable hits = new IntHashtable(); ArrayList fonts = new ArrayList(); int npages = reader.NumberOfPages; for (int k = 1; k <= npages; ++k) RecourseFonts(reader.GetPageN(k), hits, fonts, 1); return fonts; }
/** * Retrieves comments from a file */ private string GetComments(string path) { PdfReader pdfReader = new PdfReader(path); string txt = ""; for (int page = 1; page <= pdfReader.NumberOfPages; ++page) { PdfDictionary pagedic = pdfReader.GetPageN(page); PdfArray annotarray = (PdfArray)PdfReader.GetPdfObject(pagedic.Get(PdfName.ANNOTS)); if (annotarray == null || annotarray.Size == 0) continue; foreach (PdfObject A in annotarray.ArrayList) { PdfDictionary AnnotationDictionary = (PdfDictionary)PdfReader.GetPdfObject(A); if (AnnotationDictionary.Get(PdfName.SUBTYPE).Equals(PdfName.TEXT) && AnnotationDictionary.GetAsString(PdfName.CONTENTS) != null) { txt += AnnotationDictionary.GetAsString(PdfName.CONTENTS).ToString() + " | "; } } } pdfReader.Close(); return txt; }
/** * Gets a list of the document fonts in a particular page. Each element of the <CODE>ArrayList</CODE> * contains a <CODE>Object[]{String,PRIndirectReference}</CODE> with the font name * and the indirect reference to it. * @param reader the document where the fonts are to be listed from * @param page the page to list the fonts from * @return the list of fonts and references */ public static ArrayList GetDocumentFonts(PdfReader reader, int page) { IntHashtable hits = new IntHashtable(); ArrayList fonts = new ArrayList(); RecourseFonts(reader.GetPageN(page), hits, fonts, 1); return fonts; }
virtual public void CopyTaggedPdf17() { InitializeDocument("17"); PdfReader reader1 = new PdfReader(SOURCE10); PdfReader reader2 = new PdfReader(SOURCE19); copy.AddPage(copy.GetImportedPage(reader1, 1, true)); copy.AddPage(copy.GetImportedPage(reader2, 1, false)); document.Close(); reader1.Close(); reader2.Close(); PdfReader reader = new PdfReader(output); Assert.AreEqual(2, reader.NumberOfPages); Assert.NotNull(reader.GetPageN(1)); Assert.NotNull(reader.GetPageN(2)); reader.Close(); }
public override void SaveDocument(ViewModels.DocumentViewModel document) { string srcDocPath; string targetFilePath = SafeFilePath(document.DocName); Stream stream; iTextSharp.text.Image image; PdfDictionary pageDict; PdfImportedPage importedPage; PdfContentByte contentByte; //iTextSharp.text.Paragraph para; PdfCopy targetPdf; iTextSharp.text.Document doc; //iTextSharp.text.pdf.BaseFont baseFont; //iTextSharp.text.Font font; PdfReader srcReader; //ColumnText ct; PdfCopy.PageStamp pageStamp; try { if (!Directory.Exists(Path.GetDirectoryName(targetFilePath))) { Directory.CreateDirectory(Path.GetDirectoryName(targetFilePath)); } stream = new FileStream(targetFilePath, FileMode.Create); doc = new iTextSharp.text.Document(); targetPdf = new PdfCopy(doc, stream); doc.Open(); //baseFont = iTextSharp.text.pdf.BaseFont.CreateFont(iTextSharp.text.pdf.BaseFont.TIMES_ROMAN, iTextSharp.text.pdf.BaseFont.CP1252, false); //font = new iTextSharp.text.Font(baseFont, 12f, iTextSharp.text.Font.NORMAL, iTextSharp.text.Color.BLACK); foreach (ViewModels.PageViewModel vm in document.Pages) { srcDocPath = FileIO.ToTempFileName(vm.DocName); // Copy pageDict from source... if (Path.GetExtension(srcDocPath).ToUpperInvariant() == ".PDF") { srcReader = new iTextSharp.text.pdf.PdfReader(srcDocPath); pageDict = srcReader.GetPageN(vm.Number); importedPage = targetPdf.GetImportedPage(srcReader, vm.Number); pageStamp = targetPdf.CreatePageStamp(importedPage); //add any strings foreach (Common.UIString str in vm.Strings) { ColumnText.ShowTextAligned(pageStamp.GetOverContent(), iTextSharp.text.Element.ALIGN_LEFT, new iTextSharp.text.Phrase(str.String), (float)str.X, (float)(importedPage.Height - str.Y - (str.Height * 0.75)), 0); } // apply any added rotation pageDict.Put(PdfName.ROTATE, new PdfNumber((vm.FlatRotation) % 360f)); pageStamp.AlterContents(); targetPdf.AddPage(importedPage); targetPdf.FreeReader(srcReader); srcReader.Close(); } if (vm.ImageStream != null && targetPdf.NewPage()) { contentByte = new PdfContentByte(targetPdf); image = iTextSharp.text.Image.GetInstance(vm.ImageStream); image.ScalePercent(72f / image.DpiX * 100); image.SetAbsolutePosition(0, 0); contentByte.AddImage(image); contentByte.ToPdf(targetPdf); } } targetPdf.Close(); doc.Close(); stream.Close(); } catch (System.IO.IOException e) { Toolbox.MessageBox(e.Message); } catch (Exception e) { Toolbox.MessageBoxException(e); } finally { //if (targetPdf != null) // targetPdf.Close(); //doc.Close(); //memStream.Close(); } }
virtual public void CopyTaggedPdf21() { InitializeDocument("21"); copy.SetMergeFields(); PdfReader reader1 = new PdfReader(SOURCE73); copy.AddDocument(reader1); document.Close(); reader1.Close(); PdfReader reader = new PdfReader(output); PdfDictionary page = reader.GetPageN(1); PdfDictionary resources = page.GetAsDict(PdfName.RESOURCES); PdfDictionary xObject = resources.GetAsDict(PdfName.XOBJECT); PdfStream img = xObject.GetAsStream(new PdfName("Im0")); PdfArray decodeParms = img.GetAsArray(PdfName.DECODEPARMS); Assert.AreEqual(2, decodeParms.Size); PdfObject iref = decodeParms[0]; Assert.IsTrue(iref is PdfIndirectReference); Assert.IsTrue(reader.GetPdfObjectRelease(((PdfIndirectReference) iref).Number) is PdfNull); reader.Close(); }
virtual protected void TestXObject(bool shouldExist, int page, String xObjectName) { PdfReader reader = null; RandomAccessFileOrArray raf = null; raf = new RandomAccessFileOrArray(pdfContent[output]); reader = new PdfReader(raf, null); try { PdfDictionary dictionary = reader.GetPageN(page); PdfDictionary resources = (PdfDictionary)dictionary.Get(PdfName.RESOURCES); PdfDictionary xobject = (PdfDictionary)resources.Get(PdfName.XOBJECT); PdfObject directXObject = xobject.GetDirectObject(new PdfName(xObjectName)); PdfObject indirectXObject = xobject.Get(new PdfName(xObjectName)); if (shouldExist) { Assert.NotNull(indirectXObject); Assert.NotNull(directXObject); } else { Assert.IsNull(indirectXObject); Assert.IsNull(directXObject); } } finally { reader.Close(); } }