GetPageN() public method

public GetPageN ( int pageNum ) : PdfDictionary
pageNum int
return PdfDictionary
        /**
         * Put all data from given day into given table
         */
        public static void collectDataforDay(DateTime dateTime, SQLiteConnection m_dbConnection)
        {
            string zeroMonth = dateTime.Month < 10 ? "0" : "";
              string zeroDay = dateTime.Day < 10 ? "0" : "";
              string date = zeroMonth + dateTime.Month + "/" + zeroDay + dateTime.Day + "/" + dateTime.Year;

              PdfReader reader;
              try
              {
            reader = new PdfReader("http://www.equibase.com/premium/eqbPDFChartPlus.cfm?RACE=A&BorP=P&TID=SAR&CTRY=USA&DT=" + date + "&DAY=D&STYLE=EQB");
              }
              catch (Exception e)
              {
            Console.WriteLine("CAPTCHA TIME");
            Console.ReadKey();
            Console.ReadKey();

            reader = new PdfReader("http://www.equibase.com/premium/eqbPDFChartPlus.cfm?RACE=A&BorP=P&TID=SAR&CTRY=USA&DT=" + date + "&DAY=D&STYLE=EQB");
              }
              StringBuilder builder = new StringBuilder();

              for (int x = 1; x <= reader.NumberOfPages; x++)
              {
            PdfDictionary page = reader.GetPageN(x);
            IRenderListener listener = new SBTextRenderer(builder);
            PdfContentStreamProcessor processor = new PdfContentStreamProcessor(listener);
            PdfDictionary pageDic = reader.GetPageN(x);
            PdfDictionary resourcesDic = pageDic.GetAsDict(PdfName.RESOURCES);
            processor.ProcessContent(ContentByteUtils.GetContentBytesForPage(reader, x), resourcesDic);
              }

              if (pages.Count != 0)
              {

            DataHandler handler = new DataHandler(dateTime, pages, m_dbConnection);
            Thread thread = new Thread(new ThreadStart(handler.extractPdfData));

            thread.Start();
            thread.Join();
            reader.Dispose();
            pages.Clear();
              }
              else
              {
            // If there were no races on this particular day, simply skip it! :D
            Console.WriteLine("Invalid Date: " + date);
              }
        }
示例#2
0
        virtual public void TestConstructionForType0WithoutToUnicodeMap()
        {
            int pageNum = 2;
            PdfName fontIdName = new PdfName("TT9");

            string testFile = TestResourceUtils.GetResourceAsTempFile(TEST_RESOURCES_PATH, "type0FontWithoutToUnicodeMap.pdf");
            RandomAccessFileOrArray f = new RandomAccessFileOrArray(testFile);
            PdfReader reader = new PdfReader(f, null);

            try
            {
                PdfDictionary fontsDic = reader.GetPageN(pageNum).GetAsDict(PdfName.RESOURCES).GetAsDict(PdfName.FONT);
                PdfDictionary fontDicDirect = fontsDic.GetAsDict(fontIdName);
                PRIndirectReference fontDicIndirect = (PRIndirectReference)fontsDic.Get(fontIdName);

                Assert.AreEqual(PdfName.TYPE0, fontDicDirect.GetAsName(PdfName.SUBTYPE));
                Assert.AreEqual("/Identity-H", fontDicDirect.GetAsName(PdfName.ENCODING).ToString());
                Assert.IsNull(fontDicDirect.Get(PdfName.TOUNICODE), "This font should not have a ToUnicode map");

                new DocumentFont(fontDicIndirect); // this used to throw an NPE
            }
            finally
            {
                reader.Close();
            }
        }
示例#3
0
        public static void CropPdf()
        {
            var xll    = 200;
            var yll    = 170;
            var w      = 800;
            var h      = 800;
            var reader = new iTextSharp.text.pdf.PdfReader(@"C:\Projects\31g\trunk\temp\pdf\20140208110036_20.pdf");
            var n      = reader.NumberOfPages;

            iTextSharp.text.pdf.PdfDictionary pageDict;

            var pfgRect = new iTextSharp.text.pdf.PdfRectangle(xll, yll, w, h);

            for (var i = 1; i <= n; i++)
            {
                pageDict = reader.GetPageN(i);
                pageDict.Put(iTextSharp.text.pdf.PdfName.CROPBOX, pfgRect);
            }

            var stamper = new iTextSharp.text.pdf.PdfStamper(reader,
                                                             new System.IO.FileStream(string.Format(@"C:\Projects\31g\trunk\Notes\misc\Maps\Europe_565BCE.pdf", xll, yll, w, h), FileMode.Create));

            stamper.Close();
            reader.Close();
        }
示例#4
0
 /// <summary>
 /// Removes layers from a PDF document </summary>
 /// <param name="reader">	a PdfReader containing a PDF document </param>
 /// <param name="layers">	a sequence of names of OCG layers </param>
 /// <exception cref="IOException"> </exception>
 public virtual void RemoveLayers(PdfReader reader, params string[] layers)
 {
     int n = reader.NumberOfPages;
     ICollection<string> ocgs = new HashSet2<string>();
     for (int i = 0; i < layers.Length; i++)
     {
         ocgs.Add(layers[i]);
     }
     OCGParser parser = new OCGParser(ocgs);
     for (int i = 1; i <= n; i++)
     {
         PdfDictionary page = reader.GetPageN(i);
         Parse(parser, page);
         page.Remove(new PdfName("PieceInfo"));
         RemoveAnnots(page, ocgs);
         RemoveProperties(page, ocgs);
     }
     PdfDictionary root = reader.Catalog;
     PdfDictionary ocproperties = root.GetAsDict(PdfName.OCPROPERTIES);
     RemoveOCGsFromArray(ocproperties, PdfName.OCGS, ocgs);
     PdfDictionary d = ocproperties.GetAsDict(PdfName.D);
     if (d != null)
     {
         RemoveOCGsFromArray(d, PdfName.ON, ocgs);
         RemoveOCGsFromArray(d, PdfName.OFF, ocgs);
         RemoveOCGsFromArray(d, PdfName.LOCKED, ocgs);
         RemoveOCGsFromArray(d, PdfName.RBGROUPS, ocgs);
         RemoveOCGsFromArray(d, PdfName.ORDER, ocgs);
         RemoveOCGsFromArray(d, PdfName.AS, ocgs);
     }
     reader.RemoveUnusedObjects();
 }
        public static bool IsScannedPdf(string pdfFilePath)
        {
            // Start analyzing the PDF
            PdfReader reader = new PdfReader(pdfFilePath);
            PdfDictionary resources;

            try
            {
                // Go through all the pages
                for (int p = 1; p <= reader.NumberOfPages; p++)
                {
                    // Find the embedded resources
                    PdfDictionary dic = reader.GetPageN(p);
                    resources = dic.GetAsDict(PdfName.RESOURCES);
                    if (resources != null)
                    {
                        // If we have any embedded font, it's not scanned
                        if (resources.GetAsDict(PdfName.FONT) != null)
                            return false;
                    }
                }
                return true;
            }
            finally
            {
                reader.Close();
            }
        }
        public static void ExtractImagesFromPDF(string password, string key, string docPath, string pagePath, PageCollection pages)
        {
            Page page = null;
            // NOTE:  This will only get the first image it finds per page.
            PdfReader pdf = new PdfReader(Utility.Security.AES.DecryptFile(key, docPath));
            //RandomAccessFileOrArray raf = new iTextSharp.text.pdf.RandomAccessFileOrArray(p);

            try
            {
                for (int pageNumber = 1; pageNumber <= pdf.NumberOfPages; pageNumber++)
                {
                    PdfDictionary pg = pdf.GetPageN(pageNumber);

                    // recursively search pages, forms and groups for images.
                    PdfObject obj = FindImageInPDFDictionary(pg);
                    if (obj != null)
                    {

                        int XrefIndex = Convert.ToInt32(((PRIndirectReference)obj).Number.ToString(System.Globalization.CultureInfo.InvariantCulture));
                        PdfObject pdfObj = pdf.GetPdfObject(XrefIndex);
                        PdfStream pdfStrem = (PdfStream)pdfObj;
                        byte[] bytes = PdfReader.GetStreamBytesRaw((PRStream)pdfStrem);
                        if ((bytes != null))
                        {
                            using (System.IO.MemoryStream memStream = new System.IO.MemoryStream(bytes))
                            {
                                memStream.Position = 0;
                                System.Drawing.Image img = System.Drawing.Image.FromStream(memStream);
                                // must save the file while stream is open.

                                page = new Page();
                                page.Order = pages.Count;
                                page.Save();
                                page.Token = Utility.Security.AES.GetToken(page.Id, password);
                                //string path = System.IO.Path.Combine(page.Filename, String.Format(@"{0}.jpg", pageNumber));
                                System.Drawing.Imaging.EncoderParameters parms = new System.Drawing.Imaging.EncoderParameters(1);
                                parms.Param[0] = new System.Drawing.Imaging.EncoderParameter(System.Drawing.Imaging.Encoder.Compression, 0);
                                System.Drawing.Imaging.ImageCodecInfo jpegEncoder = System.Drawing.Imaging.ImageCodecInfo.GetImageEncoders().FirstOrDefault(e => e.FormatDescription == "JPEG");
                                System.IO.MemoryStream ms = new System.IO.MemoryStream();
                                img.Save(ms, jpegEncoder, parms);
                                System.IO.File.WriteAllBytes(System.IO.Path.Combine(pagePath, page.Filename), SoftFluent.Samples.GED.Utility.Security.AES.EncryptStream(page.Token, ms.ToArray()).ToArray());
                                ms.Close();
                                pages.Add(page);
                            }
                        }
                    }
                }
            }
            catch
            {
                throw;
            }
            finally
            {
                pdf.Close();
                //raf.Close();
            }
        }
示例#7
0
        /**
         * Gets the content bytes of a page from a reader
         * @param reader  the reader to get content bytes from
         * @param pageNum   the page number of page you want get the content stream from
         * @return  a byte array with the effective content stream of a page
         * @throws IOException
         * @since 5.0.1
         */
        public static byte[] GetContentBytesForPage(PdfReader reader, int pageNum)
        {
            PdfDictionary pageDictionary = reader.GetPageN(pageNum);
            PdfObject contentObject = pageDictionary.Get(PdfName.CONTENTS);
            if (contentObject == null)
                return new byte[0];

            byte[] contentBytes = ContentByteUtils.GetContentBytesFromContentObject(contentObject);
            return contentBytes;
        }
示例#8
0
// ---------------------------------------------------------------------------    
    /**
     * Creates a HashSet containing information about the fonts in the src PDF file.
     * @param src the PDF file
     * 
     * HashSet only available in .NET >= 3.5
     */
    public HashSet<String> ListFonts(byte[] src) {
      HashSet<String> set = new HashSet<String>();
      PdfReader reader = new PdfReader(src);
      PdfDictionary resources;
      for (int k = 1; k <= reader.NumberOfPages; ++k) {
        resources = reader.GetPageN(k).GetAsDict(PdfName.RESOURCES);
        ProcessResource(set, resources);
      }
      return set;
    }
示例#9
0
        public static void ProcessContentPage(PdfReader reader, int page, Test_iTextSharp.ITextExtractionStrategy strategy)
        {
            PdfReaderContentParser parser = new PdfReaderContentParser(reader);

            PdfDictionary pageDic = reader.GetPageN(page);
            PdfDictionary resourcesDic = pageDic.GetAsDict(PdfName.RESOURCES);

            Test_iTextSharp.PdfContentStreamProcessor processor = new Test_iTextSharp.PdfContentStreamProcessor(strategy);
            byte[] bytes = ContentByteUtils.GetContentBytesForPage(reader, page);
            processor.ProcessContent(bytes, resourcesDic);
        }
        public static void ExtractImagesFromPDF(string sourcePdf, string outputPath)
        {
            // NOTE:  This will only get the first image it finds per page.
            PdfReader pdf = new PdfReader(sourcePdf);
            RandomAccessFileOrArray raf = new iTextSharp.text.pdf.RandomAccessFileOrArray(sourcePdf);

            try
            {
                for (int pageNumber = 1; pageNumber <= pdf.NumberOfPages; pageNumber++)
                {
                    PdfDictionary pg = pdf.GetPageN(pageNumber);

                    // recursively search pages, forms and groups for images.
                    PdfObject obj = FindImageInPDFDictionary(pg);
                    if (obj != null)
                    {

                        int XrefIndex = Convert.ToInt32(((PRIndirectReference)obj).Number.ToString(System.Globalization.CultureInfo.InvariantCulture));
                        PdfObject pdfObj = pdf.GetPdfObject(XrefIndex);
                        PdfStream pdfStrem = (PdfStream)pdfObj;
                        byte[] bytes = PdfReader.GetStreamBytesRaw((PRStream)pdfStrem);
                        if ((bytes != null))
                        {
                            using (System.IO.MemoryStream memStream = new System.IO.MemoryStream(bytes))
                            {
                                memStream.Position = 0;
                                System.Drawing.Image img = System.Drawing.Image.FromStream(memStream);
                                // must save the file while stream is open.
                                if (!Directory.Exists(outputPath))
                                    Directory.CreateDirectory(outputPath);

                                string path = Path.Combine(outputPath, String.Format(@"{0}.jpg", pageNumber));
                                System.Drawing.Imaging.EncoderParameters parms = new System.Drawing.Imaging.EncoderParameters(1);
                                parms.Param[0] = new System.Drawing.Imaging.EncoderParameter(System.Drawing.Imaging.Encoder.Compression, 0);
                                System.Drawing.Imaging.ImageCodecInfo jpegEncoder = GetImageEncoder("JPEG");
                                img.Save(path, jpegEncoder, parms);
                            }
                        }
                    }
                }
            }
            catch
            {
                throw;
            }
            finally
            {
                pdf.Close();
                raf.Close();
            }


        }
示例#11
0
        public static void RotatePdf()
        {
            var reader   = new iTextSharp.text.pdf.PdfReader(@"C:\Projects\31g\trunk\temp\pdf\Europe_565BCE.pdf");
            var pageDict = reader.GetPageN(1);

            pageDict.Put(iTextSharp.text.pdf.PdfName.ROTATE, new iTextSharp.text.pdf.PdfNumber(270));

            var stamper = new iTextSharp.text.pdf.PdfStamper(reader,
                                                             new System.IO.FileStream(@"C:\Projects\31g\trunk\Notes\misc\Maps\Europe_565BCE.pdf", FileMode.Create));

            stamper.Close();
            reader.Close();
        }
示例#12
0
        public void testPageResources()
        {
            string testFile = TestResourceUtils.GetResourceAsTempFile(TEST_RESOURCES_PATH, "getLinkTest2.pdf");
            String filename = testFile;
            PdfReader rdr = new PdfReader(new RandomAccessFileOrArray(filename), new byte[0]);

            PdfDictionary pageResFromNum = rdr.GetPageResources(1);
            PdfDictionary pageResFromDict = rdr.GetPageResources(rdr.GetPageN(1));
            // same size & keys
            Assert.IsTrue(pageResFromNum.Keys.Equals(pageResFromDict.Keys));

            rdr.Close();
        }
 public void extractSnippets(String src, String dest)
 {
     TextWriter output = new StreamWriter(new FileStream(dest, FileMode.Create));
     PdfReader reader = new PdfReader(src);
     IRenderListener listener = new MyTextRenderListener(output);
     PdfContentStreamProcessor processor =
         new PdfContentStreamProcessor(listener);
     PdfDictionary pageDic = reader.GetPageN(1);
     PdfDictionary resourcesDic = pageDic.GetAsDict(PdfName.RESOURCES);
     processor.ProcessContent(ContentByteUtils.GetContentBytesForPage(reader, 1), resourcesDic);
     output.Flush();
     output.Close();
     reader.Close();
 }
示例#14
0
 public void TrimPDFFile(Stream outputStream, Stream inputStream, PageLayout pageLayout)
 {
     using (var reader = new PdfReader(inputStream))
     {
         var inputPageSize = reader.GetPageSize(1);
         var inputPage = reader.GetPageN(1);
         inputPage.Put(PdfName.MEDIABOX, new PdfRectangle(PageLayoutA4.GetLabelRect(0)));
         using (var stamper = new PdfStamper(reader, outputStream))
         {
             stamper.Writer.CloseStream = false;
             stamper.MarkUsed(inputPage);
         }
     }
 }
示例#15
0
// --------------------------------------------------------------------------- 
    /**
     * Manipulates a PDF file src with the file dest as result
     * @param src the original PDF
     */
    public byte[] ManipulatePdf(byte[] src) {
      PdfReader reader = new PdfReader(src);
      int n = reader.NumberOfPages;
      PdfDictionary pageDict;
      PdfRectangle rect = new PdfRectangle(55, 76, 560, 816);
      for (int i = 1; i <= n; i++) {
        pageDict = reader.GetPageN(i);
        pageDict.Put(PdfName.CROPBOX, rect);
      }
      using (MemoryStream ms = new MemoryStream()) {
        using (PdfStamper stamper = new PdfStamper(reader, ms)) {
        }
        return ms.ToArray();
      }
    }
示例#16
0
// --------------------------------------------------------------------------- 
    /**
     * Manipulates a PDF file src
     * @param src the original PDF
     */
    public byte[] ManipulatePdf(byte[] src) {
      PdfReader reader = new PdfReader(src);
      int n = reader.NumberOfPages;
      int rot;
      PdfDictionary pageDict;
      for (int i = 1; i <= n; i++) {
        rot = reader.GetPageRotation(i);
        pageDict = reader.GetPageN(i);
        pageDict.Put(PdfName.ROTATE, new PdfNumber(rot + 90));        
      }        
      using (MemoryStream ms = new MemoryStream()) {
        using (PdfStamper stamper = new PdfStamper(reader, ms)) {
        }
        return ms.ToArray();
      }
    }
示例#17
0
 public void ManipulatePdf(string src, string dest)
 {
     PdfReader reader = new PdfReader(src);
     // We assume that there's a single large picture on the first page
     PdfDictionary page = reader.GetPageN(1);
     PdfDictionary resources = page.GetAsDict(PdfName.RESOURCES);
     PdfDictionary xobjects = resources.GetAsDict(PdfName.XOBJECT);
     Dictionary<PdfName, PdfObject>.KeyCollection.Enumerator enumerator = xobjects.Keys.GetEnumerator();
     enumerator.MoveNext();
     PdfName imgName = enumerator.Current;
     Image img = Image.GetInstance((PRIndirectReference) xobjects.GetAsIndirectObject(imgName));
     img.SetAbsolutePosition(0, 0);
     img.ScaleAbsolute(reader.GetPageSize(1));
     PdfStamper stamper = new PdfStamper(reader, new FileStream(dest,FileMode.Create));
     stamper.GetOverContent(1).AddImage(img);
     stamper.Close();
     reader.Close();
 }
        // ---------------------------------------------------------------------------
        /**
         * Parses object and content information of a PDF into a text file.
         * @param pdf the original PDF
         *
         * this method uses code from;
         * PdfContentReaderTool.ListContentStreamForPage()
         * so i can pass in a byte array instead of file path
         *
         */
        public string InspectPdf(byte[] pdf)
        {
            PdfReader reader = new PdfReader(pdf);
              int maxPageNum = reader.NumberOfPages;
              StringBuilder sb = new StringBuilder();
              for (int pageNum = 1; pageNum <= maxPageNum; pageNum++){
            sb.AppendLine("==============Page " + pageNum + "====================");
            sb.AppendLine("- - - - - Dictionary - - - - - -");
            PdfDictionary pageDictionary = reader.GetPageN(pageNum);
            sb.AppendLine(
              PdfContentReaderTool.GetDictionaryDetail(pageDictionary)
            );

            sb.AppendLine("- - - - - XObject Summary - - - - - -");
            sb.AppendLine(PdfContentReaderTool.GetXObjectDetail(
              pageDictionary.GetAsDict(PdfName.RESOURCES))
            );

            sb.AppendLine("- - - - - Content Stream - - - - - -");
            RandomAccessFileOrArray f = reader.SafeFile;

            byte[] contentBytes = reader.GetPageContent(pageNum, f);
            f.Close();

            foreach (byte b in contentBytes) {
              sb.Append((char)b);
            }

            sb.AppendLine("- - - - - Text Extraction - - - - - -");
            String extractedText = PdfTextExtractor.GetTextFromPage(
              reader, pageNum, new LocationTextExtractionStrategy()
            );
            if (extractedText.Length != 0) {
              sb.AppendLine(extractedText);
            }
            else {
              sb.AppendLine("No text found on page " + pageNum);
            }
            sb.AppendLine();
              }
              return sb.ToString();
        }
示例#19
0
// ---------------------------------------------------------------------------
    /**
     * Manipulates a PDF file src with the file dest as result
     * @param src the original PDF
     */
    public byte[] ManipulatePdf(string src) {
      PdfReader reader = new PdfReader(src);
      PdfDictionary root = reader.Catalog;
      PdfDictionary form = root.GetAsDict(PdfName.ACROFORM);
      PdfArray fields = form.GetAsArray(PdfName.FIELDS);
      PdfDictionary page;
      PdfArray annots;
      for (int i = 1; i <= reader.NumberOfPages; i++) {
        page = reader.GetPageN(i);
        annots = page.GetAsArray(PdfName.ANNOTS);
        for (int j = 0; j < annots.Size; j++) {
          fields.Add(annots.GetAsIndirectObject(j));
        }
      }
      using (MemoryStream ms = new MemoryStream()) {
        using (PdfStamper stamper = new PdfStamper(reader, ms)) {
        }
        return ms.ToArray();
      }
    }
示例#20
0
// ---------------------------------------------------------------------------    
    /**
     * Extracts attachments from an existing PDF.
     * @param src the path to the existing PDF
     * @param zip the ZipFile object to add the extracted images
     */
    public void ExtractAttachments(byte[] src, ZipFile zip) {
      PdfReader reader = new PdfReader(src);
      for (int i = 1; i <= reader.NumberOfPages; i++) {
        PdfArray array = reader.GetPageN(i).GetAsArray(PdfName.ANNOTS);
        if (array == null) continue;
        for (int j = 0; j < array.Size; j++) {
          PdfDictionary annot = array.GetAsDict(j);
          if (PdfName.FILEATTACHMENT.Equals(
              annot.GetAsName(PdfName.SUBTYPE)))
          {
            PdfDictionary fs = annot.GetAsDict(PdfName.FS);
            PdfDictionary refs = fs.GetAsDict(PdfName.EF);
            foreach (PdfName name in refs.Keys) {
              zip.AddEntry(
                fs.GetAsString(name).ToString(), 
                PdfReader.GetStreamBytes((PRStream)refs.GetAsStream(name))
              );
            }
          }
        }
      }
    }
示例#21
0
        public virtual void SpaceTrimColumnTextTest() {
            Document doc = new Document(PageSize.A4, 50, 30, 50, 30);
            PdfWriter writer = PdfWriter.GetInstance(doc, new FileStream(OUTSPTRIMCT, FileMode.Create));
            doc.Open();

            Phrase under = new Phrase();
            under.Font = new Font(Font.FontFamily.TIMES_ROMAN, 12, Font.UNDERLINE);
            under.Add(new Chunk(" 1                                                      1                                                                                                                             9      "));

            Paragraph underlineTest = new Paragraph(under);
            underlineTest.KeepTogether = true;
            doc.Add(underlineTest);

            doc.Close();
            writer.Close();

            PdfReader reader = new PdfReader(OUTSPTRIMCT);
            MyTextRenderListener listener = new MyTextRenderListener();
            PdfContentStreamProcessor processor = new PdfContentStreamProcessor(listener);
            PdfDictionary pageDic = reader.GetPageN(1);
            PdfDictionary resourcesDic = pageDic.GetAsDict(PdfName.RESOURCES);
            processor.ProcessContent(ContentByteUtils.GetContentBytesForPage(reader, 1), resourcesDic);
            Assert.IsTrue(listener.GetText().Length == 60, "Unexpected text length");
        }
示例#22
0
        /// <summary>Parses images from pdf document.</summary>
        /// <param name="filePath">The pdf-file full path.</param>
        /// <returns>Collection of images and streams that are associated with them.</returns>
        public static List<ParsedImage> ParseImages(string filePath)
        {
            var imgList = new List<ParsedImage>();
            var raf = new RandomAccessFileOrArray(filePath);
            var reader = new PdfReader(raf, null);

            try
            {
                for (var pageNumber = 1; pageNumber <= reader.NumberOfPages; pageNumber++)
                {
                    var pg = reader.GetPageN(pageNumber);
                    var size = reader.GetPageSize(pageNumber);
                    var res = (PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES));

                    var xobj = (PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT));
                    if (xobj == null)
                    {
                        continue;
                    }

                    foreach (var name in xobj.Keys)
                    {
                        var obj = xobj.Get(name);
                        if (!obj.IsIndirect())
                        {
                            continue;
                        }

                        var tg = (PdfDictionary)PdfReader.GetPdfObject(obj);

                        var type = (PdfName)PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE));

                        if (!PdfName.IMAGE.Equals(type))
                        {
                            continue;
                        }

                        var refIndex = Convert.ToInt32(((PRIndirectReference)obj).Number.ToString(CultureInfo.InvariantCulture));
                        var pdfObj = reader.GetPdfObject(refIndex);
                        var pdfStrem = (PdfStream)pdfObj;
                        var bytes = PdfReader.GetStreamBytesRaw((PRStream)pdfStrem);
                        if (bytes == null)
                        {
                            continue;
                        }

                        var memStream = new MemoryStream(bytes) { Position = 0 };
                        var img = Image.FromStream(memStream);
                        imgList.Add(new ParsedImage
                        {
                            Image = img,
                            ImageStream = memStream,
                            Format = img.RawFormat,
                            Width = size.Width,
                            Height = size.Height,
                            PerformedRotation = RotateFlipType.RotateNoneFlipNone
                        });
                    }
                }
            }
            catch (Exception exception)
            {
                Console.WriteLine(exception.Message);
            }
            finally
            {
                reader.Close();
                raf.Close();
            }

            return imgList;
        }
示例#23
0
        void ManipulatePdf(string src, string dest, MetaData md)
        {
            using (var reader = new ip.PdfReader(src))
            {
                var catalog        = reader.Catalog;
                var structTreeRoot = catalog.GetAsDict(ip.PdfName.STRUCTTREEROOT);

                Manipulate(structTreeRoot);
                using (var stamper = new ip.PdfStamper(reader, new FileStream(dest, FileMode.Create)))
                {
                    var page = reader.GetPageN(1);
                    using (var ms = new MemoryStream())
                    {
                        var dic = new ip.PdfDictionary();

                        DateTime time = DateTime.Now;

                        if (reader.Info.ContainsKey(ip.PdfName.CREATIONDATE.ToString().Substring(1)))
                        {
                            var temp = reader.Info[ip.PdfName.CREATIONDATE.ToString().Substring(1)].Substring(2).Replace('\'', ':');
                            temp = temp.Substring(0, temp.Length - 1);
                            time = DateTime.ParseExact(temp, "yyyyMMddHHmmsszzz", CultureInfo.InvariantCulture);
                        }

                        dic.Put(ip.PdfName.PRODUCER, new ip.PdfString(md.Creator));
                        dic.Put(ip.PdfName.TITLE, new ip.PdfString(Path.GetFileNameWithoutExtension(dest)));
                        dic.Put(ip.PdfName.CREATOR, new ip.PdfString(md.Creator));
                        dic.Put(ip.PdfName.AUTHOR, new ip.PdfString(md.Author));
                        dic.Put(ip.PdfName.CREATIONDATE, new ip.PdfDate(time));


                        var xmp = new XmpWriter(ms, dic);
                        xmp.Close();
                        var reference = stamper.Writer.AddToBody(new ip.PdfStream(ms.ToArray()));
                        page.Put(ip.PdfName.METADATA, reference.IndirectReference);

                        if (ms != null)
                        {
                            var d   = Encoding.UTF8.GetString(ms.ToArray());
                            var xml = new XmlDocument();
                            xml.LoadXml(d);
                            var node = xml.DocumentElement.FirstChild;
                            node = node.FirstChild;

                            if (node != null)
                            {
                                //node.AppendAttribute("xmlns:pdfaid", "http://www.aiim.org/pdfa/ns/id/");
                                var attrId = xml.CreateAttribute("xmlns:pdfaid");
                                attrId.Value = "http://www.aiim.org/pdfa/ns/id/";
                                node.Attributes.Append(attrId);

                                var attrPart = xml.CreateAttribute("pdfaid:part", "http://www.aiim.org/pdfa/ns/id/");
                                attrPart.Value = "1";
                                node.Attributes.Append(attrPart);

                                var attrConf = xml.CreateAttribute("pdfaid:conformance", "http://www.aiim.org/pdfa/ns/id/");
                                attrConf.Value = "A";
                                node.Attributes.Append(attrConf);

                                if (md.CustomMetadata != null && md.CustomMetadata.Length > 0)
                                {
                                    var dataNode = node.OwnerDocument.CreateElement("CustomMetaData");
                                    node.AppendChild(dataNode);
                                    dataNode.InnerText = System.Convert.ToBase64String(md.CustomMetadata);
                                }
                            }

                            ms.Position = 0;
                            xml.Save(ms);
                            d = Encoding.UTF8.GetString(ms.ToArray());
                        }

                        stamper.XmpMetadata = ms.ToArray();

                        stamper.Close();
                        reader.Close();
                    }
                }
            }
        }
示例#24
0
 /**
 * Gets a list of the document fonts in a particular page. Each element of the <CODE>ArrayList</CODE>
 * contains a <CODE>Object[]{String,PRIndirectReference}</CODE> with the font name
 * and the indirect reference to it.
 * @param reader the document where the fonts are to be listed from
 * @param page the page to list the fonts from
 * @return the list of fonts and references
 */    
 public static List<object[]> GetDocumentFonts(PdfReader reader, int page) {
     IntHashtable hits = new IntHashtable();
     List<object[]> fonts = new List<object[]>();
     RecourseFonts(reader.GetPageN(page), hits, fonts, 1);
     return fonts;
 }
示例#25
0
        /**
         * Writes information about a specific page from PdfReader to the specified output stream.
         * @since 2.1.5
         * @param reader    the PdfReader to read the page content from
         * @param pageNum   the page number to read
         * @param out       the output stream to send the content to
         * @throws IOException
         */
        public static void ListContentStreamForPage(PdfReader reader, int pageNum, TextWriter outp) {
            outp.WriteLine("==============Page " + pageNum + "====================");
            outp.WriteLine("- - - - - Dictionary - - - - - -");
            PdfDictionary pageDictionary = reader.GetPageN(pageNum);
            outp.WriteLine(GetDictionaryDetail(pageDictionary));

            outp.WriteLine("- - - - - XObject Summary - - - - - -");
            outp.WriteLine(GetXObjectDetail(pageDictionary.GetAsDict(PdfName.RESOURCES)));
            
            outp.WriteLine("- - - - - Content Stream - - - - - -");
            RandomAccessFileOrArray f = reader.SafeFile;

            byte[] contentBytes = reader.GetPageContent(pageNum, f);
            f.Close();

            outp.Flush();

            foreach (byte b in contentBytes) {
                outp.Write((char)b);
            }

            outp.Flush();
            
            outp.WriteLine("- - - - - Text Extraction - - - - - -");
            String extractedText = PdfTextExtractor.GetTextFromPage(reader, pageNum, new LocationTextExtractionStrategy());
            if (extractedText.Length != 0)
                outp.WriteLine(extractedText);
            else
                outp.WriteLine("No text found on page " + pageNum);

            outp.WriteLine();

        }
示例#26
0
        virtual public void CopyTaggedPdf19() {
            InitializeDocument("19");

            PdfReader reader = new PdfReader(SOURCE18);
            copy.AddPage(copy.GetImportedPage(reader, 1, true));

            document.Close();
            reader.Close();

            reader = new PdfReader(output);

            PdfDictionary page1 = reader.GetPageN(1);
            PdfDictionary t1_0 = page1.GetAsDict(PdfName.RESOURCES).GetAsDict(PdfName.XOBJECT).GetAsStream(new PdfName("Fm0")).GetAsDict(PdfName.RESOURCES).GetAsDict(PdfName.FONT).GetAsDict(new PdfName("T1_0"));
            Assert.NotNull(t1_0);

            reader.Close();
        }
示例#27
0
 /**
 * Gets a list of all document fonts. Each element of the <CODE>ArrayList</CODE>
 * contains a <CODE>Object[]{String,PRIndirectReference}</CODE> with the font name
 * and the indirect reference to it.
 * @param reader the document where the fonts are to be listed from
 * @return the list of fonts and references
 */    
 public static List<object[]> GetDocumentFonts(PdfReader reader) {
     IntHashtable hits = new IntHashtable();
     List<object[]> fonts = new List<object[]>();
     int npages = reader.NumberOfPages;
     for (int k = 1; k <= npages; ++k)
         RecourseFonts(reader.GetPageN(k), hits, fonts, 1);
     return fonts;
 }
示例#28
0
 /**
 * Gets a list of all document fonts. Each element of the <CODE>ArrayList</CODE>
 * contains a <CODE>Object[]{String,PRIndirectReference}</CODE> with the font name
 * and the indirect reference to it.
 * @param reader the document where the fonts are to be listed from
 * @return the list of fonts and references
 */    
 public static ArrayList GetDocumentFonts(PdfReader reader) {
     IntHashtable hits = new IntHashtable();
     ArrayList fonts = new ArrayList();
     int npages = reader.NumberOfPages;
     for (int k = 1; k <= npages; ++k)
         RecourseFonts(reader.GetPageN(k), hits, fonts, 1);
     return fonts;
 }
示例#29
0
        /**
         * Retrieves comments from a file
         */
        private string GetComments(string path)
        {

            PdfReader pdfReader = new PdfReader(path);
            string txt = "";
            for (int page = 1; page <= pdfReader.NumberOfPages; ++page)
            {
                PdfDictionary pagedic = pdfReader.GetPageN(page);
                PdfArray annotarray = (PdfArray)PdfReader.GetPdfObject(pagedic.Get(PdfName.ANNOTS));
                if (annotarray == null || annotarray.Size == 0)
                    continue;

                foreach (PdfObject A in annotarray.ArrayList)
                {
                    PdfDictionary AnnotationDictionary = (PdfDictionary)PdfReader.GetPdfObject(A);
                    if (AnnotationDictionary.Get(PdfName.SUBTYPE).Equals(PdfName.TEXT) && AnnotationDictionary.GetAsString(PdfName.CONTENTS) != null)
                    {
                        txt += AnnotationDictionary.GetAsString(PdfName.CONTENTS).ToString() + " | ";
                    }
                }
            }
            pdfReader.Close();
            return txt;
        }
示例#30
0
 /**
 * Gets a list of the document fonts in a particular page. Each element of the <CODE>ArrayList</CODE>
 * contains a <CODE>Object[]{String,PRIndirectReference}</CODE> with the font name
 * and the indirect reference to it.
 * @param reader the document where the fonts are to be listed from
 * @param page the page to list the fonts from
 * @return the list of fonts and references
 */    
 public static ArrayList GetDocumentFonts(PdfReader reader, int page) {
     IntHashtable hits = new IntHashtable();
     ArrayList fonts = new ArrayList();
     RecourseFonts(reader.GetPageN(page), hits, fonts, 1);
     return fonts;
 }
示例#31
0
        virtual public void CopyTaggedPdf17() {
            InitializeDocument("17");

            PdfReader reader1 = new PdfReader(SOURCE10);
            PdfReader reader2 = new PdfReader(SOURCE19);
            copy.AddPage(copy.GetImportedPage(reader1, 1, true));
            copy.AddPage(copy.GetImportedPage(reader2, 1, false));

            document.Close();
            reader1.Close();
            reader2.Close();

            PdfReader reader = new PdfReader(output);
            Assert.AreEqual(2, reader.NumberOfPages);
            Assert.NotNull(reader.GetPageN(1));
            Assert.NotNull(reader.GetPageN(2));
            reader.Close();
        }
示例#32
0
        public override void SaveDocument(ViewModels.DocumentViewModel document)
        {
            string srcDocPath;
            string targetFilePath = SafeFilePath(document.DocName);
            Stream stream;

            iTextSharp.text.Image image;
            PdfDictionary         pageDict;
            PdfImportedPage       importedPage;
            PdfContentByte        contentByte;
            //iTextSharp.text.Paragraph para;
            PdfCopy targetPdf;

            iTextSharp.text.Document doc;
            //iTextSharp.text.pdf.BaseFont baseFont;
            //iTextSharp.text.Font font;
            PdfReader srcReader;

            //ColumnText ct;
            PdfCopy.PageStamp pageStamp;

            try
            {
                if (!Directory.Exists(Path.GetDirectoryName(targetFilePath)))
                {
                    Directory.CreateDirectory(Path.GetDirectoryName(targetFilePath));
                }

                stream = new FileStream(targetFilePath, FileMode.Create);

                doc       = new iTextSharp.text.Document();
                targetPdf = new PdfCopy(doc, stream);
                doc.Open();

                //baseFont = iTextSharp.text.pdf.BaseFont.CreateFont(iTextSharp.text.pdf.BaseFont.TIMES_ROMAN, iTextSharp.text.pdf.BaseFont.CP1252, false);
                //font = new iTextSharp.text.Font(baseFont, 12f, iTextSharp.text.Font.NORMAL, iTextSharp.text.Color.BLACK);

                foreach (ViewModels.PageViewModel vm in document.Pages)
                {
                    srcDocPath = FileIO.ToTempFileName(vm.DocName);

                    // Copy pageDict from source...
                    if (Path.GetExtension(srcDocPath).ToUpperInvariant() == ".PDF")
                    {
                        srcReader    = new iTextSharp.text.pdf.PdfReader(srcDocPath);
                        pageDict     = srcReader.GetPageN(vm.Number);
                        importedPage = targetPdf.GetImportedPage(srcReader, vm.Number);
                        pageStamp    = targetPdf.CreatePageStamp(importedPage);

                        //add any strings
                        foreach (Common.UIString str in vm.Strings)
                        {
                            ColumnText.ShowTextAligned(pageStamp.GetOverContent(),
                                                       iTextSharp.text.Element.ALIGN_LEFT,
                                                       new iTextSharp.text.Phrase(str.String),
                                                       (float)str.X,
                                                       (float)(importedPage.Height - str.Y - (str.Height * 0.75)),
                                                       0);
                        }
                        // apply any added rotation
                        pageDict.Put(PdfName.ROTATE, new PdfNumber((vm.FlatRotation) % 360f));
                        pageStamp.AlterContents();
                        targetPdf.AddPage(importedPage);

                        targetPdf.FreeReader(srcReader);
                        srcReader.Close();
                    }

                    if (vm.ImageStream != null && targetPdf.NewPage())
                    {
                        contentByte = new PdfContentByte(targetPdf);

                        image = iTextSharp.text.Image.GetInstance(vm.ImageStream);

                        image.ScalePercent(72f / image.DpiX * 100);
                        image.SetAbsolutePosition(0, 0);

                        contentByte.AddImage(image);
                        contentByte.ToPdf(targetPdf);
                    }
                }
                targetPdf.Close();
                doc.Close();
                stream.Close();
            }
            catch (System.IO.IOException e)
            {
                Toolbox.MessageBox(e.Message);
            }
            catch (Exception e)
            {
                Toolbox.MessageBoxException(e);
            }
            finally
            {
                //if (targetPdf != null)
                //    targetPdf.Close();

                //doc.Close();
                //memStream.Close();
            }
        }
示例#33
0
        virtual public void CopyTaggedPdf21() {
            InitializeDocument("21");
            copy.SetMergeFields();

            PdfReader reader1 = new PdfReader(SOURCE73);
            copy.AddDocument(reader1);
            document.Close();
            reader1.Close();

            PdfReader reader = new PdfReader(output);
            PdfDictionary page = reader.GetPageN(1);
            PdfDictionary resources = page.GetAsDict(PdfName.RESOURCES);
            PdfDictionary xObject = resources.GetAsDict(PdfName.XOBJECT);
            PdfStream img = xObject.GetAsStream(new PdfName("Im0"));
            PdfArray decodeParms = img.GetAsArray(PdfName.DECODEPARMS);
            Assert.AreEqual(2, decodeParms.Size);
            PdfObject iref = decodeParms[0];
            Assert.IsTrue(iref is PdfIndirectReference);
            Assert.IsTrue(reader.GetPdfObjectRelease(((PdfIndirectReference) iref).Number) is PdfNull);

            reader.Close();
        }
示例#34
0
        virtual protected void TestXObject(bool shouldExist, int page, String xObjectName)
        {
            PdfReader reader = null;
            RandomAccessFileOrArray raf = null;
            raf = new RandomAccessFileOrArray(pdfContent[output]);
            reader = new PdfReader(raf, null);
            try
            {
                PdfDictionary dictionary = reader.GetPageN(page);

                PdfDictionary resources = (PdfDictionary)dictionary.Get(PdfName.RESOURCES);
                PdfDictionary xobject = (PdfDictionary)resources.Get(PdfName.XOBJECT);
                PdfObject directXObject = xobject.GetDirectObject(new PdfName(xObjectName));
                PdfObject indirectXObject = xobject.Get(new PdfName(xObjectName));

                if (shouldExist)
                {
                    Assert.NotNull(indirectXObject);
                    Assert.NotNull(directXObject);
                }
                else
                {
                    Assert.IsNull(indirectXObject);
                    Assert.IsNull(directXObject);
                }
            }
            finally
            {
                reader.Close();
            }


        }