Exemplo n.º 1
0
        /**
         * Copies the output intent dictionary from other document to this one.
         * @param reader the other document
         * @param checkExistence <CODE>true</CODE> to just check for the existence of a valid output intent
         * dictionary, <CODE>false</CODE> to insert the dictionary if it exists
         * @throws IOException on error
         * @return <CODE>true</CODE> if the output intent dictionary exists, <CODE>false</CODE>
         * otherwise
         */
        public override bool SetOutputIntents(PdfReader reader, bool checkExistence)
        {
            PdfDictionary catalog = reader.Catalog;
            PdfArray      outs    = catalog.GetAsArray(PdfName.OUTPUTINTENTS);

            if (outs == null)
            {
                return(false);
            }
            if (outs.Size == 0)
            {
                return(false);
            }
            PdfDictionary outa = outs.GetAsDict(0);
            PdfObject     obj  = PdfReader.GetPdfObject(outa.Get(PdfName.S));

            if (obj == null || !PdfName.GTS_PDFA1.Equals(obj))
            {
                return(false);
            }
            if (checkExistence)
            {
                return(true);
            }
            PRStream stream = (PRStream)PdfReader.GetPdfObject(outa.Get(PdfName.DESTOUTPUTPROFILE));

            byte[] destProfile = null;
            if (stream != null)
            {
                destProfile = PdfReader.GetStreamBytes(stream);
            }
            SetOutputIntents(GetNameString(outa, PdfName.OUTPUTCONDITIONIDENTIFIER), GetNameString(outa, PdfName.OUTPUTCONDITION),
                             GetNameString(outa, PdfName.REGISTRYNAME), GetNameString(outa, PdfName.INFO), destProfile);
            return(true);
        }
Exemplo n.º 2
0
        /// <summary>
        /// Updates the pagination in the footer.
        /// </summary>
        /// <param name="inputPdf">The pdf to modify.</param>
        /// <param name="outputPdf">The pdf created with updated pagination.</param>
        public static void UpdateFooterPagination(string inputPdf, string outputPdf)
        {
            PdfReader  reader = new PdfReader(inputPdf);
            FileStream fs     = new FileStream(outputPdf, FileMode.Create, FileAccess.Write);
            int        n      = reader.NumberOfPages;

            for (int i = 1; i <= n; i++)
            {
                PdfDictionary dict = reader.GetPageN(i);
                PdfObject     obj  = dict.GetDirectObject(PdfName.CONTENTS);
                if (obj.GetType() == typeof(PRStream))
                {
                    PRStream stream = (PRStream)obj;
                    byte[]   data   = PdfReader.GetStreamBytes(stream);
                    String   oldStr = System.Text.Encoding.UTF8.GetString(data);

                    //Get the string matching the pagination
                    String pageString = CommonUtils.MatchRegex(oldStr, @"\[\(Seite \)\]TJ.*\[\(");

                    //Regex replacement of page string with updated page number
                    String updatedPageString = Regex.Replace(pageString, @"\[\(\d+\)\]", "[(" + i + ")]");
                    String newString         = Regex.Replace(oldStr, @"\[\(Seite \)\]TJ.*\[\(", updatedPageString, RegexOptions.Singleline);
                    stream.SetData(System.Text.Encoding.UTF8.GetBytes(newString));
                }
            }
            PdfStamper stamper = new PdfStamper(reader, fs);

            stamper.Close();
            reader.Close();
        }
Exemplo n.º 3
0
        public static byte[] GetPictureFromPdf(Stream stream)
        {
            byte[] imageData = new byte[0];
            string path      = Path.Combine(Directory.GetParent(Directory.GetCurrentDirectory()).FullName, "wwwroot", "CV Parser", "Pictures");

            using (PdfReader pdfReader = new PdfReader(stream))
            {
                for (int i = 0; i < pdfReader.XrefSize; i++)
                {
                    PdfObject po = pdfReader.GetPdfObject(i);

                    if (po == null || !po.IsStream()) //object not found so continue
                    {
                        continue;
                    }

                    PRStream  pst  = (PRStream)po;
                    PdfObject type = pst.Get(PdfName.SUBTYPE); //get the object type
                                                               //check if the object is the image type object
                    if (type != null && type.ToString().Equals(PdfName.IMAGE.ToString()))
                    {
                        PdfImageObject pio = new PdfImageObject(pst);

                        int imageLength = pio.GetImageAsBytes().Length;

                        if (imageLength != WatermarkSize && imageLength > imageData.Length)
                        {
                            imageData = pio.GetImageAsBytes();
                        }
                    }
                }
            }

            return(imageData);
        }
Exemplo n.º 4
0
        /// <summary>
        /// Reemplaza las cadenas de texto que encuentre en el array de datos del PDF.
        /// </summary>
        /// <param name="obj"></param>
        private void ReplacePRStream(PdfObject obj)
        {
            PRStream prStream = (PRStream)obj;

            byte[] data = PdfReader.GetStreamBytes(prStream);
            prStream.SetData(ReplaceValues(data));
        }
Exemplo n.º 5
0
        private void TestFile(String filename, int page, String objectid)
        {
            PdfReader pdfReader = TestResourceUtils.GetResourceAsPdfReader(TEST_RESOURCES_PATH, filename);

            try
            {
                PdfDictionary        resources = pdfReader.GetPageResources(page);
                PdfDictionary        xobjects  = resources.GetAsDict(PdfName.XOBJECT);
                PdfIndirectReference objRef    = xobjects.GetAsIndirectObject(new PdfName(objectid));
                if (objRef == null)
                {
                    throw new NullReferenceException("Reference " + objectid + " not found - Available keys are " + xobjects.Keys);
                }
                PRStream      stream        = (PRStream)PdfReader.GetPdfObject(objRef);
                PdfDictionary colorSpaceDic = resources != null?resources.GetAsDict(PdfName.COLORSPACE) : null;

                PdfImageObject img    = new PdfImageObject(stream, colorSpaceDic);
                byte[]         result = img.GetImageAsBytes();
                Assert.NotNull(result);
                int zeroCount = 0;
                foreach (byte b in result)
                {
                    if (b == 0)
                    {
                        zeroCount++;
                    }
                }
                Assert.IsTrue(zeroCount > 0);
            }
            finally
            {
                pdfReader.Close();
            }
        }
        private static List <PdfObject> SearchAllStreams(PdfReader reader, PdfObject o)
        {
            List <PdfObject> results = new List <PdfObject>();

            if (o.IsArray())
            {
                var array = (o as PdfArray);
                // for (int i = 0; i < array.ArrayList; i++)

                foreach (var item in array.ArrayList)
                {
                    results.AddRange(SearchAllStreams(reader, item));
                }
            }
            else if (o.IsIndirect())
            {
                var indirect = o as PdfIndirectReference;

                int num   = indirect.Number;
                var child = reader.GetPdfObject(num);
                results.AddRange(SearchAllStreams(reader, child));
            }
            else if (o.IsStream())
            {
                PRStream stream = (PRStream)o;
                results.Add(stream);
            }

            return(results);
        }
Exemplo n.º 7
0
        private static void ExtractLines(PdfReader reader, StringBuilder sb, PdfObject content)
        {
            var ir = (PRIndirectReference)content;

            var value = reader.GetPdfObject(ir.Number);

            if (value.IsStream())
            {
                PRStream stream = (PRStream)value;

                var streamBytes = PdfReader.GetStreamBytes(stream);

                var tokenizer = new PRTokeniser(new RandomAccessFileOrArray(streamBytes));

                try
                {
                    while (tokenizer.NextToken())
                    {
                        if (tokenizer.TokenType == PRTokeniser.TK_STRING)
                        {
                            string str = tokenizer.StringValue;
                            sb.Append(str);
                        }
                    }
                }
                finally
                {
                    tokenizer.Close();
                }
            }
        }
Exemplo n.º 8
0
        /// <summary>
        /// Uses the OCGParser on a page </summary>
        /// <param name="parser">	the OCGParser </param>
        /// <param name="page">		the page dictionary of the page that needs to be parsed. </param>
        /// <exception cref="IOException"> </exception>
        private void Parse(OCGParser parser, PdfDictionary page)
        {
            PRStream      stream    = (PRStream)page.GetAsStream(PdfName.CONTENTS);
            PdfDictionary resources = page.GetAsDict(PdfName.RESOURCES);

            parser.Parse(stream, resources);
        }
Exemplo n.º 9
0
// ---------------------------------------------------------------------------

        /**
         * Extracts document level attachments
         * @param PDF from which document level attachments will be extracted
         * @param zip the ZipFile object to add the extracted images
         */
        public void ExtractDocLevelAttachments(byte[] pdf, ZipFile zip)
        {
            PdfReader     reader        = new PdfReader(pdf);
            PdfDictionary root          = reader.Catalog;
            PdfDictionary documentnames = root.GetAsDict(PdfName.NAMES);
            PdfDictionary embeddedfiles =
                documentnames.GetAsDict(PdfName.EMBEDDEDFILES);
            PdfArray filespecs = embeddedfiles.GetAsArray(PdfName.NAMES);

            for (int i = 0; i < filespecs.Size;)
            {
                filespecs.GetAsString(i++);
                PdfDictionary filespec = filespecs.GetAsDict(i++);
                PdfDictionary refs     = filespec.GetAsDict(PdfName.EF);
                foreach (PdfName key in refs.Keys)
                {
                    PRStream stream = (PRStream)PdfReader.GetPdfObject(
                        refs.GetAsIndirectObject(key)
                        );
                    zip.AddEntry(
                        filespec.GetAsString(key).ToString(),
                        PdfReader.GetStreamBytes(stream)
                        );
                }
            }
        }
Exemplo n.º 10
0
        /**
         * Gets OCSP responses from the Document Security Store.
         * @return	a list of BasicOCSPResp objects
         * @throws IOException
         * @throws GeneralSecurityException
         */
        virtual public List <BasicOcspResp> GetOCSPResponsesFromDSS()
        {
            List <BasicOcspResp> ocsps = new List <BasicOcspResp>();

            if (dss == null)
            {
                return(ocsps);
            }
            PdfArray ocsparray = dss.GetAsArray(PdfName.OCSPS);

            if (ocsparray == null)
            {
                return(ocsps);
            }
            for (int i = 0; i < ocsparray.Size; i++)
            {
                PRStream stream       = (PRStream)ocsparray.GetAsStream(i);
                OcspResp ocspResponse = new OcspResp(PdfReader.GetStreamBytes(stream));
                if (ocspResponse.Status == 0)
                {
                    try {
                        ocsps.Add((BasicOcspResp)ocspResponse.GetResponseObject());
                    } catch (OcspException e) {
                        throw new GeneralSecurityException(e.ToString());
                    }
                }
            }
            return(ocsps);
        }
Exemplo n.º 11
0
        public Boolean PDFPortfolio(byte[] pdf, string filename, Boolean weitergabe, SPFolder tempfolder, string tempuploadurl, string DocID)
        {
            Boolean portfolio;

            using (PdfReader reader = new PdfReader(pdf))
            {
                PdfReader.unethicalreading = true;
                PdfDictionary documentNames = null;
                PdfDictionary embeddedFiles = null;
                PdfDictionary fileArray     = null;
                PdfDictionary file          = null;
                PRStream      stream        = null;
                PdfDictionary catalog       = reader.Catalog;
                documentNames = (PdfDictionary)PdfReader.GetPdfObject(catalog.Get(PdfName.NAMES));
                if (documentNames != null)
                {
                    //Erster Check: Portfolio ist vorhanden
                    embeddedFiles = (PdfDictionary)PdfReader.GetPdfObject(documentNames.Get(PdfName.EMBEDDEDFILES));
                    if (embeddedFiles != null)
                    {
                        //Zweiter Check: Portfolio ist wirklich vorhanden
                        portfolio = true;
                        PdfArray filespecs = embeddedFiles.GetAsArray(PdfName.NAMES);
                        for (int i = 0; i < filespecs.Size; i++)
                        {
                            i++;
                            fileArray = filespecs.GetAsDict(i);
                            file      = fileArray.GetAsDict(PdfName.EF);
                            int filecount = 0;
                            foreach (PdfName key in file.Keys)
                            {
                                stream = (PRStream)PdfReader.GetPdfObject(file.GetAsIndirectObject(key));
                                //string attachedFileName = folderName + fileArray.GetAsString(key).ToString();
                                filecount++;
                                string     attachedFileName     = DocID + "_" + filename + "_ExportFromPortfolio_" + i + ".pdf";
                                byte[]     attachedFileBytes    = PdfReader.GetStreamBytes(stream);
                                SPFile     attachmentuploadfile = tempfolder.Files.Add(tempuploadurl + attachedFileName, attachedFileBytes, true);
                                SPListItem attachmentitem       = attachmentuploadfile.Item;
                                attachmentitem["Weitergabe"] = weitergabe;
                                attachmentitem.SystemUpdate();
                            }
                        }
                    }
                    else
                    {
                        //Zweiter Check: Doch kein Portfolio vorhanden
                        portfolio = false;
                    }
                }
                else
                {
                    //Erster check: Kein Portfolio vorhanden
                    portfolio = false;
                }
            }
            return(portfolio);
        }
Exemplo n.º 12
0
        private void PrepareImageObject()
        {
            if (imageObject != null)
            {
                return;
            }

            PRStream stream = (PRStream)PdfReader.GetPdfObject(refi);

            imageObject = new PdfImageObject(stream);
        }
Exemplo n.º 13
0
        public ImageInfo(PdfObject pdfObject, Matrix matrix, float pageUnits, int pageNumber, ImageRenderInfo renderInfo)
        {
            IsImage = false;
            ID      = Guid.NewGuid();

            PageNumber = pageNumber;

            _matrix     = matrix;
            _pageUnits  = pageUnits;
            _renderInfo = renderInfo;

            _imgObject = (PdfDictionary)PdfReader.GetPdfObject(pdfObject);

            PdfObject subType = _imgObject.Get(PdfName.SUBTYPE);

            if (subType == null)
            {
                return;
            }

            _stream     = _imgObject as PRStream;
            _maskStream = (PRStream)_imgObject.GetAsStream(PdfName.MASK) ?? (PRStream)_imgObject.GetAsStream(PdfName.SMASK);

            IsMask = _maskStream != null;

            _fltr = _imgObject.Get(PdfName.FILTER);

            WidthPx      = _imgObject.GetAsNumber(PdfName.WIDTH).IntValue;
            HeightPx     = _imgObject.GetAsNumber(PdfName.HEIGHT).IntValue;
            BitsPerPixel = _imgObject.GetAsNumber(PdfName.BITSPERCOMPONENT).IntValue;

            PixelFormat   = _DetectPixelFormat();
            ImageFilter   = _DetectImageFilter();
            ImageFileType = _GetFileType();

            ImageBytes = _GetImageBytes();

            var ctmWidth  = matrix[Matrix.I11];
            var ctmHeight = matrix[Matrix.I22];

            var ImgSize        = new SizeF(WidthPx, HeightPx);
            var CtmSize        = new SizeF(ctmWidth, ctmHeight);
            var ImgWidthScale  = ImgSize.Width / CtmSize.Width;
            var ImgHeightScale = ImgSize.Height / CtmSize.Height;

            Dpix = ImgWidthScale * pageUnits;
            DpiY = ImgHeightScale * pageUnits;

            WidthMm  = Utilities.PointsToMillimeters(ctmWidth);
            HeightMm = Utilities.PointsToMillimeters(ctmHeight);

            IsImage = true;
        }
        private void Do_Form(PdfStream stream)
        {
            PdfDictionary resources = stream.GetAsDict(PdfName.RESOURCES);

            byte[] contentBytes = ContentByteUtils.GetContentBytesFromContentObject(stream);

            contentBytes = _modifier.Modify(contentBytes, resources);

            PRStream prStream = stream as PRStream;

            prStream.SetData(contentBytes);
        }
Exemplo n.º 15
0
        public virtual bool CompareStreams(PRStream outStream, PRStream cmpStream)
        {
            bool decodeStreams = PdfName.FLATEDECODE.Equals(outStream.Get(PdfName.FILTER));

            byte[] outStreamBytes = PdfReader.GetStreamBytesRaw(outStream);
            byte[] cmpStreamBytes = PdfReader.GetStreamBytesRaw(cmpStream);
            if (decodeStreams)
            {
                outStreamBytes = PdfReader.DecodeBytes(outStreamBytes, outStream);
                cmpStreamBytes = PdfReader.DecodeBytes(cmpStreamBytes, cmpStream);
            }
            return(Util.ArraysAreEqual(outStreamBytes, cmpStreamBytes));
        }
Exemplo n.º 16
0
 /**
  * Creates a PdfImage object.
  * @param stream a PRStream
  * @throws IOException
  */
 public PdfImageObject(PRStream stream)
 {
     this.dictionary = stream;
     try {
         streamBytes = PdfReader.GetStreamBytes(stream);
         decoded     = true;
     }
     catch {
         try {
             streamBytes = PdfReader.GetStreamBytesRaw(stream);
         }
         catch {}
     }
 }
Exemplo n.º 17
0
        public static string ExtractEdiconData(string srcEdiconPdfFileName)
        {
            PdfReader reader = null;

            //
            try
            {
                reader = new PdfReader(srcEdiconPdfFileName);
                //
                PdfDictionary root          = reader.Catalog;
                PdfDictionary documentnames = root.GetAsDict(PdfName.NAMES);
                PdfDictionary embeddedfiles = documentnames.GetAsDict(PdfName.EMBEDDEDFILES);
                PdfArray      filespecs     = embeddedfiles.GetAsArray(PdfName.NAMES);
                //
                for (int i = 0; i < filespecs.Size;)
                {
                    filespecs.GetAsString(i++);
                    PdfDictionary filespec = filespecs.GetAsDict(i++);
                    PdfDictionary refs     = filespec.GetAsDict(PdfName.EF);
                    //
                    foreach (PdfName key in refs.Keys)
                    {
                        PRStream stream = (PRStream)PdfReader.GetPdfObject(refs.GetAsIndirectObject(key));
                        //
                        var outName = filespec.GetAsString(key).ToString();
                        //
                        if (outName.Equals(EDICON_EMBED_FILENAME, StringComparison.OrdinalIgnoreCase))
                        {
                            byte[] outByteArray = PdfReader.GetStreamBytes(stream);
                            string ediconData   = Encoding.UTF8.GetString(outByteArray);
                            return(ediconData);
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                //Console.WriteLine("ExtractEdiconData error: " + ex.Message);
                throw new EdiconException("Soubor neobsahuje platnou přílohu Edicon");
            }
            finally
            {
                if (reader != null)
                {
                    reader.Close();
                }
            }
            return(null);
        }
Exemplo n.º 18
0
        public static void ReplaceStream(PRStream orig, PdfStream stream)
        {
            orig.Clear();
            MemoryStream ms = new MemoryStream();

            stream.WriteContent(ms);
            orig.SetData(ms.ToArray(), false);

            Console.WriteLine("Iterating keys");

            foreach (System.Collections.Generic.KeyValuePair <PdfName, PdfObject> keyValuePair in stream)
            {
                orig.Put(keyValuePair.Key, stream.Get(keyValuePair.Key));
            }
        }
Exemplo n.º 19
0
 private bool TryToReadImage(PRStream stream)
 {
     try
     {
         var pdfImage = new PdfImageObject(stream);
         picImage.Image = null;
         picImage.Image = pdfImage.GetDrawingImage();
         tsMessage.Text = "Image Size = " + stream.Length;
         return(true);
     }
     catch (Exception e)
     {
         return(false);
     }
 }
Exemplo n.º 20
0
        private void PrepareImageObject()
        {
            if (imageObject != null)
            {
                return;
            }

            if (refi != null)
            {
                PRStream stream = (PRStream)PdfReader.GetPdfObject(refi);
                imageObject = new PdfImageObject(stream, colorSpaceDictionary);
            }
            else if (inlineImageInfo != null)
            {
                imageObject = new PdfImageObject(inlineImageInfo.ImageDictionary, inlineImageInfo.Samples, colorSpaceDictionary);
            }
        }
Exemplo n.º 21
0
        private void PrepareImageObject()
        {
            if (imageObject != null)
            {
                return;
            }

            if (refi != null)
            {
                PRStream stream = (PRStream)iTextSharp.text.pdf.PdfReader.GetPdfObject(refi);
                imageObject = new PdfImageObject(stream, colorSpaceDictionary);
            }
            else if (inlineImageInfo != null)
            {
                // 'iTextSharp.text.pdf.parser.PdfImageObject.PdfImageObject(iTextSharp.text.pdf.PdfDictionary, byte[], iTextSharp.text.pdf.PdfDictionary)' is inaccessible due to its protection level
                imageObject = new PdfImageObject(inlineImageInfo.ImageDictionary, inlineImageInfo.Samples, colorSpaceDictionary);
            }
        }
Exemplo n.º 22
0
        public List <DataTable> Load(MemoryStream stream)
        {
            var tables = new List <DataTable>();
            var sb     = new StringBuilder();
            var reader = new PdfReader(stream);

            for (int page = 1; page <= reader.NumberOfPages; page++)
            {
                var cpage   = reader.GetPageN(page);
                var content = cpage.Get(PdfName.CONTENTS);

                var ir = (PRIndirectReference)content;

                var value = reader.GetPdfObject(ir.Number);

                if (value.IsStream())
                {
                    PRStream prstream = (PRStream)value;

                    var streamBytes = PdfReader.GetStreamBytes(prstream);

                    var tokenizer = new PRTokeniser(new RandomAccessFileOrArray(streamBytes));

                    try
                    {
                        while (tokenizer.NextToken())
                        {
                            if (tokenizer.TokenType == PRTokeniser.TK_STRING)
                            {
                                string str = tokenizer.StringValue;
                                sb.AppendLine(str);
                            }
                        }
                    }
                    finally
                    {
                        tokenizer.Close();
                    }
                }
            }
            Console.WriteLine(sb.ToString());
            return(tables);
        }
        public static CMapToUnicode GetFontCMapToUnicode(PdfReader pdfReader, int pageNumber, string fontKey)
        {
            PdfDictionary resources = pdfReader.GetPageN(pageNumber).GetAsDict(PdfName.RESOURCES);
            var           fontDict  = FindFontDictionary(resources, fontKey);

            if (fontDict == null)
            {
                return(null);
            }
            PRStream toUnicodeIndirectReference = (PRStream)PdfReader.GetPdfObject(fontDict.Get(PdfName.TOUNICODE));

            if (toUnicodeIndirectReference == null)
            {
                return(null);
            }
            string toUnicode = Encoding.UTF8.GetString(PdfReader.GetStreamBytes(toUnicodeIndirectReference));

            return(CMapToUnicode.Parse(toUnicode));
        }
Exemplo n.º 24
0
        /// <summary>
        /// Parses a stream object and removes OCGs. </summary>
        /// <param name="stream">	a stream object </param>
        /// <param name="resources">	the resources dictionary of that object (containing info about the OCGs) </param>
        public virtual void Parse(PRStream stream, PdfDictionary resources)
        {
            baos       = new MemoryStream();
            properties = resources.GetAsDict(PdfName.PROPERTIES);
            xobj       = new HashSet2 <PdfName>();
            PdfDictionary xobjects = resources.GetAsDict(PdfName.XOBJECT);

            if (xobjects != null)
            {
                // remove XObject (form or image) that belong to an OCG that needs to be removed
                foreach (PdfName name in xobjects.Keys)
                {
                    PRStream      xobject = (PRStream)xobjects.GetAsStream(name);
                    PdfDictionary oc      = xobject.GetAsDict(PdfName.OC);
                    if (oc != null)
                    {
                        PdfString ocname = oc.GetAsString(PdfName.NAME);
                        if (ocname != null && ocgs.Contains(ocname.ToString()))
                        {
                            xobj.Add(name);
                        }
                    }
                }
                foreach (PdfName name in xobj)
                {
                    xobjects.Remove(name);
                }
            }
            // parse the content stream
            byte[]           contentBytes = PdfReader.GetStreamBytes(stream);
            PRTokeniser      tokeniser    = new PRTokeniser(new RandomAccessFileOrArray(contentBytes));
            PdfContentParser ps           = new PdfContentParser(tokeniser);
            List <PdfObject> operands     = new List <PdfObject>();

            while (ps.Parse(operands).Count > 0)
            {
                PdfLiteral @operator = (PdfLiteral)operands[operands.Count - 1];
                ProcessOperator(this, @operator, operands);
            }
            baos.Flush();
            baos.Close();
            stream.SetData(baos.GetBuffer());
        }
Exemplo n.º 25
0
        internal void ExtractAttachments(string file_name, string folderName, PdfWriter write)
        {
            PdfDictionary documentNames = null;
            PdfDictionary embeddedFiles = null;
            PdfDictionary fileArray     = null;
            PdfDictionary file          = null;
            PRStream      stream        = null;

            PdfReader     reader  = new PdfReader(file_name);
            PdfDictionary catalog = reader.Catalog;

            documentNames = (PdfDictionary)PdfReader.GetPdfObject(catalog.Get(PdfName.NAMES));

            if (documentNames != null)
            {
                embeddedFiles = (PdfDictionary)PdfReader.GetPdfObject(documentNames.Get(PdfName.EMBEDDEDFILES));
                if (embeddedFiles != null)
                {
                    PdfArray filespecs = embeddedFiles.GetAsArray(PdfName.NAMES);

                    for (int i = 0; i < filespecs.Size; i++)
                    {
                        i++;
                        fileArray = filespecs.GetAsDict(i);
                        file      = fileArray.GetAsDict(PdfName.EF);

                        foreach (PdfName key in file.Keys)
                        {
                            stream = (PRStream)PdfReader.GetPdfObject(file.GetAsIndirectObject(key));
                            string attachedFileName  = folderName + fileArray.GetAsString(key).ToString();
                            byte[] attachedFileBytes = PdfReader.GetStreamBytes(stream);
                            //graba el anexo extraido
                            System.IO.File.WriteAllBytes(attachedFileName, attachedFileBytes);
                            //adjunta los anexos
                            PdfFileSpecification pfs = PdfFileSpecification.FileEmbedded(write, attachedFileName, fileArray.GetAsString(key).ToString(), null);
                            write.AddFileAttachment(pfs);
                            //borramos los archivos extraidos
                            System.IO.File.Delete(attachedFileName);
                        }
                    }
                }
            }
        }
        private static void FixPageNumberOnStream(PRStream stream, string initialLabel, string newPageLabel)
        {
            if (stream == null)
            {
                throw new Exception("The stream is null");
            }

            byte[] data           = PdfReader.GetStreamBytes(stream);
            var    utf8           = new UTF8Encoding();
            string originalString = utf8.GetString(data);

            if (originalString.Contains($"({initialLabel})Tj"))
            {
                string newString = originalString
                                   .Replace($"({initialLabel})Tj", $"({newPageLabel})Tj")
                                   .Replace("/F2 1", "/F1 1");
                byte[] newData = utf8.GetBytes(newString);
                stream.SetData(newData);
            }
        }
Exemplo n.º 27
0
        /**
         * Searches for a tag in a page.
         *
         * @param tag
         *            the name of the tag
         * @param obj
         *            an identifier to find the marked content
         * @param page
         *            a page dictionary
         * @throws IOException
         */
        public void ParseTag(String tag, PdfObject obj, PdfDictionary page)
        {
            PRStream stream = (PRStream)page.GetAsStream(PdfName.CONTENTS);

            // if the identifier is a number, we can extract the content right away
            if (obj is PdfNumber)
            {
                PdfNumber                  mcid      = (PdfNumber)obj;
                RenderFilter               filter    = new MarkedContentRenderFilter(mcid.IntValue);
                ITextExtractionStrategy    strategy  = new SimpleTextExtractionStrategy();
                FilteredTextRenderListener listener  = new FilteredTextRenderListener(strategy, new RenderFilter[] { filter });
                PdfContentStreamProcessor  processor = new PdfContentStreamProcessor(
                    listener);
                processor.ProcessContent(PdfReader.GetStreamBytes(stream), page
                                         .GetAsDict(PdfName.RESOURCES));
                outp.Write(SimpleXMLParser.EscapeXML(listener.GetResultantText(), true));
            }
            // if the identifier is an array, we call the parseTag method
            // recursively
            else if (obj is PdfArray)
            {
                PdfArray arr = (PdfArray)obj;
                int      n   = arr.Size;
                for (int i = 0; i < n; i++)
                {
                    ParseTag(tag, arr[i], page);
                    if (i < n - 1)
                    {
                        outp.WriteLine();
                    }
                }
            }
            // if the identifier is a dictionary, we get the resources from the
            // dictionary
            else if (obj is PdfDictionary)
            {
                PdfDictionary mcr = (PdfDictionary)obj;
                ParseTag(tag, mcr.GetDirectObject(PdfName.MCID), mcr
                         .GetAsDict(PdfName.PG));
            }
        }
        static void Main(string[] args)
        {
            string        pdfPath = "C:\\mypdf.pdf";
            PdfReader     reader  = new PdfReader(pdfPath);
            StringBuilder sb      = new StringBuilder();

            for (int page = 1; page <= reader.NumberOfPages; page++)
            {
                var cpage   = reader.GetPageN(page);
                var content = cpage.Get(PdfName.CONTENTS);
                var ir      = (PRIndirectReference)content;
                var value   = reader.GetPdfObject(ir.Number);
                if (value.IsStream())
                {
                    PRStream stream      = (PRStream)value;
                    var      streamBytes = PdfReader.GetStreamBytes(stream);
                    var      tokenizer   = new PRTokeniser(new RandomAccessFileOrArray(streamBytes));

                    try
                    {
                        while (tokenizer.NextToken())
                        {
                            if (tokenizer.TokenType == PRTokeniser.TK_STRING)
                            {
                                string str = tokenizer.StringValue;
                                sb.Append(str);
                            }
                        }
                    }
                    finally
                    {
                        tokenizer.Close();
                    }
                }
            }

            Console.Write("PDF Content:" + Environment.NewLine);
            Console.Write(sb.ToString());
            Console.Write(Environment.NewLine + "--EOF--");
        }
Exemplo n.º 29
0
        /**
         * Gets the content bytes from a content object, which may be a reference
         * a stream or an array.
         * @param contentObject the object to read bytes from
         * @return the content bytes
         * @throws IOException
         */
        public static byte[] GetContentBytesFromContentObject(PdfObject contentObject)
        {
            byte[] result;
            switch (contentObject.Type)
            {
            case PdfObject.INDIRECT:
                PRIndirectReference refi         = (PRIndirectReference)contentObject;
                PdfObject           directObject = PdfReader.GetPdfObject(refi);
                result = GetContentBytesFromContentObject(directObject);
                break;

            case PdfObject.STREAM:
                PRStream stream = (PRStream)PdfReader.GetPdfObject(contentObject);
                result = PdfReader.GetStreamBytes(stream);
                break;

            case PdfObject.ARRAY:
                // Stitch together all content before calling ProcessContent(), because
                // ProcessContent() resets state.
                MemoryStream             allBytes     = new MemoryStream();
                PdfArray                 contentArray = (PdfArray)contentObject;
                ListIterator <PdfObject> iter         = contentArray.GetListIterator();
                while (iter.HasNext())
                {
                    PdfObject element = iter.Next();
                    byte[]    b;
                    allBytes.Write(b = GetContentBytesFromContentObject(element), 0, b.Length);
                    allBytes.WriteByte((byte)' ');
                }
                result = allBytes.ToArray();
                break;

            default:
                String msg = "Unable to handle Content of type " + contentObject.GetType();
                throw new InvalidOperationException(msg);
            }
            return(result);
        }
Exemplo n.º 30
0
        private string PdfObjectContents(PdfObject obj)
        {
            if (obj.IsStream())
            {
                PRStream stream = (PRStream)obj;
                byte[]   b;
                try
                {
                    b = PdfReader.GetStreamBytes(stream);
                }
                catch (UnsupportedPdfException)
                {
                    b = PdfReader.GetStreamBytesRaw(stream);
                }
                return(new string(b.ToCharArray()));
            }

            if (obj.IsDictionary())
            {
                PdfDictionary dict = (PdfDictionary)obj;
            }
            return("yet unsupported:(");
        }
Exemplo n.º 31
0
 /**
  * Creates a PdfImage object.
  * @param stream a PRStream
  * @param colorSpaceDic a color space dictionary
  * @throws IOException
  */
 public PdfImageObject(PRStream stream, PdfDictionary colorSpaceDic) : this(stream, PdfReader.GetStreamBytesRaw(stream), colorSpaceDic) {
 }
Exemplo n.º 32
0
 /**
  * Creates a PdfImage object.
  * @param stream a PRStream
  * @throws IOException
  */
 public PdfImageObject(PRStream stream) : this(stream, PdfReader.GetStreamBytesRaw(stream), null) {
 }