Esempio n. 1
        /// <summary>
        /// This method will look for markup annotations on EVERY page
        /// </summary>
        /// <returns>Count of apparent markup annotations</returns>
        private int ScanLinearizedPdf()
            int CountAnnots = 0;

            for (int P = 1; P < MyReader.NumberOfPages; P++)
                PdfDictionary Next       = MyReader.GetPageN(P);
                PdfArray      annotArray = Next.GetAsArray(PdfName.ANNOTS);

                if (annotArray != null)
                    for (int i = 0; i < annotArray.Size - 1; i++)
                        PdfDictionary NextAnnot = annotArray.GetAsDict(i);
                        if (NextAnnot.Contains(PdfName.POPUP) ||
                            (NextAnnot.Contains(PdfName.T) && NextAnnot.Contains(PdfName.POPUP)) ||

Esempio n. 2
        public void CopySignedDocuments()
            string file = RESOURCES + "hello_signed1.pdf";

            Document pdfDocument = new Document();
            PdfCopy  copier      = new PdfCopy(pdfDocument, new FileStream("PdfCopyTest/CopySignedDocuments.pdf", FileMode.Create));


            PdfReader reader1 = new PdfReader(file);

            copier.AddPage(copier.GetImportedPage(reader1, 1));

            reader1 = new PdfReader(file);
            copier.AddPage(copier.GetImportedPage(reader1, 1));


            PdfReader     reader = new PdfReader("PdfCopyTest/CopySignedDocuments.pdf");
            PdfDictionary sig    = (PdfDictionary)reader.GetPdfObject(9);
            PdfDictionary sigRef = sig.GetAsArray(PdfName.REFERENCE).GetAsDict(0);

            sig    = (PdfDictionary)reader.GetPdfObject(21);
            sigRef = sig.GetAsArray(PdfName.REFERENCE).GetAsDict(0);
Esempio n. 3
 protected override void CheckPdfObject(PdfWriter writer, int key, Object obj1)
     if (obj1 is PdfNumber)
         PdfNumber number = (PdfNumber)obj1;
         if (Math.Abs(number.DoubleValue) > maxRealValue && number.ToString().Contains("."))
             throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage(""));
     else if (obj1 is PdfString)
         PdfString str = (PdfString)obj1;
         if (str.GetBytes().Length > maxStringLength)
             throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage(""));
     else if (obj1 is PdfArray)
         PdfArray array = (PdfArray)obj1;
         if (array.Size > maxArrayLength)
             throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage(""));
     else if (obj1 is PdfDictionary)
         PdfDictionary dictionary = (PdfDictionary)obj1;
         if (dictionary.Size > maxDictionaryLength)
             throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage(""));
         if (PdfName.CATALOG.Equals(dictionary.GetAsName(PdfName.TYPE)))
             if (dictionary.Contains(PdfName.AA))
                 throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage(""));
             if (CheckStructure(conformanceLevel))
                 PdfDictionary markInfo = dictionary.GetAsDict(PdfName.MARKINFO);
                 if (markInfo == null || markInfo.GetAsBoolean(PdfName.MARKED) == null || markInfo.GetAsBoolean(PdfName.MARKED).BooleanValue == false)
                     throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("document.catalog.dictionary.shall.include.a.markinfo.dictionary.whose.entry.marked.shall.have.a.value.of.true"));
                 if (!dictionary.Contains(PdfName.LANG))
                     throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("document.catalog.dictionary.should.contain.lang.entry"));
        public void PdfDictionaryContainsReturnsFalseIfKeyIsNull()
            PdfDictionary dictionary = new PdfDictionary();

            bool contained = dictionary.Contains(null);

        private void CheckAnnotationSize(Stream inputStream, int expectedAnnotationsSize)
            PdfReader     reader         = new PdfReader(inputStream);
            PdfDictionary pageDictionary = reader.GetPageN(1);

            if (pageDictionary.Contains(PdfName.ANNOTS))
                PdfArray annotations = pageDictionary.GetAsArray(PdfName.ANNOTS);
                Assert.True(annotations.Size == expectedAnnotationsSize);
Esempio n. 6
        protected override void CheckGState(PdfWriter writer, int key, Object obj1)
            if (obj1 is PdfDictionary)
                PdfDictionary gs  = (PdfDictionary)obj1;
                PdfObject     obj = gs.Get(PdfName.BM);
                if (obj != null && !PdfGState.BM_NORMAL.Equals(obj) && !PdfGState.BM_COMPATIBLE.Equals(obj))
                    throw new PdfAConformanceException(obj1,
                                                       MessageLocalization.GetComposedMessage("blend.mode.1.not.allowed", obj.ToString()));
                obj = gs.Get(PdfName.CA);
                double v = 0.0;
                if (obj != null && (v = ((PdfNumber)obj).DoubleValue) != 1.0)
                    throw new PdfAConformanceException(obj1,
                                                       MessageLocalization.GetComposedMessage("", v.ToString()));
                obj = gs.Get(;
                v   = 0.0;
                if (obj != null && (v = ((PdfNumber)obj).DoubleValue) != 1.0)
                    throw new PdfAConformanceException(obj1,
                                                       MessageLocalization.GetComposedMessage("", v.ToString()));

                if (gs.Contains(PdfName.TR))
                    throw new PdfAConformanceException(obj1,
                PdfName tr2 = gs.GetAsName(PdfName.TR2);
                if (tr2 != null && !tr2.Equals(PdfName.DEFAULT))
                    throw new PdfAConformanceException(obj1,
                PdfName ri = gs.GetAsName(PdfName.RI);
                if (ri != null &&
                    !(PdfName.RELATIVECOLORIMETRIC.Equals(ri) || PdfName.ABSOLUTECOLORIMETRIC.Equals(ri) ||
                      PdfName.PERCEPTUAL.Equals(ri) || PdfName.SATURATION.Equals(ri)))
                    throw new PdfAConformanceException(obj1,
                                                       MessageLocalization.GetComposedMessage("", ri.ToString()));
                if (gs.Get(PdfName.SMASK) != null && !PdfName.NONE.Equals(gs.GetAsName(PdfName.SMASK)))
                    throw new PdfAConformanceException(obj1,
         * Parses the samples of the image from the underlying content parser, ignoring all filters.
         * The parser must be positioned immediately after the ID operator that ends the inline image's dictionary.
         * The parser will be left positioned immediately following the EI operator.
         * This is primarily useful if no filters have been applied.
         * @param imageDictionary the dictionary of the inline image
         * @param ps the content parser
         * @return the samples of the image
         * @throws IOException if anything bad happens during parsing
        private static byte[] ParseUnfilteredSamples(PdfDictionary imageDictionary, PdfDictionary colorSpaceDic, PdfContentParser ps)
            // special case:  when no filter is specified, we just read the number of bits
            // per component, multiplied by the width and height.
            if (imageDictionary.Contains(PdfName.FILTER))
                throw new ArgumentException("Dictionary contains filters");

            PdfNumber h = imageDictionary.GetAsNumber(PdfName.HEIGHT);

            int bytesToRead = ComputeBytesPerRow(imageDictionary, colorSpaceDic) * h.IntValue;

            byte[]      bytes     = new byte[bytesToRead];
            PRTokeniser tokeniser = ps.GetTokeniser();

            int shouldBeWhiteSpace = tokeniser.Read(); // skip next character (which better be a whitespace character - I suppose we could check for this)
            // from the PDF spec:  Unless the image uses ASCIIHexDecode or ASCII85Decode as one of its filters, the ID operator shall be followed by a single white-space character, and the next character shall be interpreted as the first byte of image data.
            // unfortunately, we've seen some PDFs where there is no space following the ID, so we have to capture this case and handle it
            int startIndex = 0;

            if (!PRTokeniser.IsWhitespace(shouldBeWhiteSpace) || shouldBeWhiteSpace == 0)  // tokeniser treats 0 as whitespace, but for our purposes, we shouldn't)
                bytes[0] = (byte)shouldBeWhiteSpace;
            for (int i = startIndex; i < bytesToRead; i++)
                int ch = tokeniser.Read();
                if (ch == -1)
                    throw new InlineImageParseException("End of content stream reached before end of image data");

                bytes[i] = (byte)ch;
            PdfObject ei = ps.ReadPRObject();

            if (!ei.ToString().Equals("EI"))
                // Some PDF producers seem to add another non-whitespace character after the image data.
                // Let's try to handle that case here.
                PdfObject ei2 = ps.ReadPRObject();
                if (!ei2.ToString().Equals("EI"))
                    throw new InlineImageParseException("EI not found after end of image data");

Esempio n. 8
        protected override void CheckFileSpec(PdfWriter writer, int key, Object obj1)
            if (obj1 is PdfFileSpecification)
                PdfDictionary fileSpec = (PdfFileSpecification)obj1;
                if (!fileSpec.Contains(PdfName.UF) || !fileSpec.Contains(PdfName.F) ||
                    throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("file.specification.dictionary.shall.contain.f.uf.and.desc.entries"));

                PdfObject obj = fileSpec.Get(PdfName.AFRELATIONSHIP);

                if (obj == null || !obj.IsName() || !allowedAFRelationships.Contains(obj as PdfName))
                    throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("file.specification.dictionary.shall.contain.correct.afrelationship.key"));

                if (fileSpec.Contains(PdfName.EF))
                    PdfDictionary dict = GetDirectDictionary(fileSpec.Get(PdfName.EF));
                    if (dict == null || !dict.Contains(PdfName.F))
                        throw new PdfAConformanceException(obj1,

                    PdfDictionary embeddedFile = GetDirectDictionary(dict.Get(PdfName.F));
                    if (embeddedFile == null)
                        throw new PdfAConformanceException(obj1,

Esempio n. 9
 public static void Test_TraceObjects(string file)
     Trace.WriteLine($"pdf \"{file}\"");
     using (PdfReader pdfReader = new PdfReader(file))
         int objectCount = pdfReader.XrefSize;
         for (int i = 0; i < objectCount; i++)
             PdfObject obj = pdfReader.GetPdfObject(i);
             //if (obj != null)
             if (obj is PdfDictionary)
                 PdfDictionary objDic = (PdfDictionary)obj;
                 string        type   = null;
                 if (objDic.Contains(PdfName.TYPE))
                     type = objDic.Get(PdfName.TYPE).ToString();
                 string subtype = null;
                 if (objDic.Contains(PdfName.SUBTYPE))
                     subtype = objDic.Get(PdfName.SUBTYPE).ToString();
                 Trace.WriteLine($"object {i + 1,3} object type {obj.GetType()} dictionary type {type} subtype {subtype}");
             else if (obj != null)
                 Trace.WriteLine($"object {i + 1,3} object type {obj.GetType()}");
                 Trace.WriteLine($"object {i + 1,3} null");
Esempio n. 10
        public PageInfo(PdfReader reader, int pageNumber)
            PageNumber = pageNumber;
            ImagesInfo = new ImagesInfo();
            TextsInfo  = new TextsInfo();
            _reader    = reader;
            _page      = reader.GetPageN(pageNumber);

            PageUnits = _page.Contains(PdfName.USERUNIT) ? _page.GetAsNumber(PdfName.USERUNIT).FloatValue : 72;

            Rectangle mediabox = reader.GetPageSize(_page);

            PageWidth  = Utilities.PointsToMillimeters(mediabox.Width);
            PageHeight = Utilities.PointsToMillimeters(mediabox.Height);

            _RenderPage(PageNumber, PageUnits);
Esempio n. 11
        public static int GetImagesCount(PdfReader pdfReader)
            int objectCount = pdfReader.XrefSize;
            int count       = 0;

            for (int i = 0; i < objectCount; i++)
                PdfObject obj = pdfReader.GetPdfObject(i);
                if (obj is PdfDictionary)
                    PdfDictionary objDic = (PdfDictionary)obj;
                    if (objDic.Contains(PdfName.TYPE) && objDic.Get(PdfName.TYPE).ToString() == "/XObject" && objDic.Contains(PdfName.SUBTYPE) && objDic.Get(PdfName.SUBTYPE).ToString() == "/Image")
Esempio n. 12
        public static void Test_ExtractImage(string file, int index, string imageFile)
            Trace.WriteLine($"extract image index {index} from pdf \"{file}\" to \"{imageFile}\"");
            if (!zPath.IsPathRooted(imageFile))
                imageFile = zPath.Combine(zPath.GetDirectoryName(file), imageFile);
            using (PdfReader pdfReader = new PdfReader(file))
                PdfObject obj = pdfReader.GetPdfObject(index);
                if (!(obj is PdfDictionary))
                    Trace.WriteLine("object is not dictionary");
                PdfDictionary objDic = (PdfDictionary)obj;
                if (!objDic.Contains(PdfName.TYPE) || objDic.Get(PdfName.TYPE).ToString() != "/XObject" || !objDic.Contains(PdfName.SUBTYPE) || objDic.Get(PdfName.SUBTYPE).ToString() != "/Image")
                    Trace.WriteLine("object is not an image");
                //PdfImage pdfImage = new PdfImage();
                byte[] bytes = PdfReader.FlateDecode(PdfReader.GetStreamBytesRaw((PRStream)obj), true);
                //byte[] bytes = PdfReader.GetStreamBytesRaw((PRStream)obj);
                // error : The byte array is not a recognized imageformat
                iTextSharp.text.Image image = iTextSharp.text.Image.GetInstance(bytes);
                Trace.WriteLine($"width {image.Width} height {image.Height} dpiX {image.DpiX} dpiY {image.DpiY}");

Esempio n. 13
         * Parses the samples of the image from the underlying content parser, ignoring all filters.
         * The parser must be positioned immediately after the ID operator that ends the inline image's dictionary.
         * The parser will be left positioned immediately following the EI operator.
         * This is primarily useful if no filters have been applied.
         * @param imageDictionary the dictionary of the inline image
         * @param ps the content parser
         * @return the samples of the image
         * @throws IOException if anything bad happens during parsing
        private static byte[] ParseUnfilteredSamples(PdfDictionary imageDictionary, PdfContentParser ps)
            // special case:  when no filter is specified, we just read the number of bits
            // per component, multiplied by the width and height.
            if (imageDictionary.Contains(PdfName.FILTER))
                throw new ArgumentException("Dictionary contains filters");

            PdfNumber h = imageDictionary.GetAsNumber(PdfName.HEIGHT);

            int bytesToRead = ComputeBytesPerRow(imageDictionary) * h.IntValue;

            byte[]      bytes     = new byte[bytesToRead];
            PRTokeniser tokeniser = ps.GetTokeniser();

            tokeniser.Read(); // skip next character (which better be a whitespace character - I suppose we could check for this)
            for (int i = 0; i < bytesToRead; i++)
                int ch = tokeniser.Read();
                if (ch == -1)
                    throw new InlineImageParseException("End of content stream reached before end of image data");

                bytes[i] = (byte)ch;
            PdfObject ei = ps.ReadPRObject();

            if (!ei.ToString().Equals("EI"))
                throw new InlineImageParseException("EI not found after end of image data");

Esempio n. 14
         * Extracts locations from the redact annotations contained in the document and applied to the given page.
        private IList <PdfCleanUpLocation> ExtractLocationsFromRedactAnnots(int page, PdfDictionary pageDict)
            List <PdfCleanUpLocation> locations = new List <PdfCleanUpLocation>();

            if (pageDict.Contains(PdfName.ANNOTS))
                PdfArray annotsArray = pageDict.GetAsArray(PdfName.ANNOTS);

                for (int i = 0; i < annotsArray.Size; ++i)
                    PdfIndirectReference annotIndirRef = annotsArray.GetAsIndirectObject(i);
                    PdfDictionary        annotDict     = annotsArray.GetAsDict(i);
                    PdfName annotSubtype = annotDict.GetAsName(PdfName.SUBTYPE);

                    if (annotSubtype.Equals(PdfName.REDACT))
                        SaveRedactAnnotIndirRef(page, annotIndirRef.ToString());
                        locations.AddRange(ExtractLocationsFromRedactAnnot(page, i, annotDict));

Esempio n. 15
 public static void Test_TraceImages(string file)
     Trace.WriteLine($"pdf \"{file}\"");
     using (PdfReader pdfReader = new PdfReader(file))
         int objectCount = pdfReader.XrefSize;
         for (int i = 0; i < objectCount; i++)
             PdfObject obj = pdfReader.GetPdfObject(i);
             if (obj is PdfDictionary)
                 PdfDictionary objDic = (PdfDictionary)obj;
                 if (objDic.Contains(PdfName.TYPE) && objDic.Get(PdfName.TYPE).ToString() == "/XObject" && objDic.Contains(PdfName.SUBTYPE) && objDic.Get(PdfName.SUBTYPE).ToString() == "/Image")
                     string filter = objDic.Get(PdfName.FILTER).ToString();
                     int    width  = int.Parse(objDic.Get(PdfName.WIDTH).ToString());
                     int    height = int.Parse(objDic.Get(PdfName.HEIGHT).ToString());
                     string bpp    = objDic.Get(PdfName.BITSPERCOMPONENT).ToString();
                     Trace.WriteLine($"object {i + 1,3} image width {width} height {height} filter {filter} bits per component {bpp}");
         * Parses the samples of the image from the underlying content parser, accounting for filters
         * The parser must be positioned immediately after the ID operator that ends the inline image's dictionary.
         * The parser will be left positioned immediately following the EI operator.
         * <b>Note:</b>This implementation does not actually apply the filters at this time
         * @param imageDictionary the dictionary of the inline image
         * @param ps the content parser
         * @return the samples of the image
         * @throws IOException if anything bad happens during parsing
        private static byte[] ParseInlineImageSamples(PdfDictionary imageDictionary, PdfDictionary colorSpaceDic, PdfContentParser ps)
            // by the time we get to here, we have already parsed the ID operator

            if (!imageDictionary.Contains(PdfName.FILTER))
                return(ParseUnfilteredSamples(imageDictionary, colorSpaceDic, ps));

            // read all content until we reach an EI operator surrounded by whitespace.
            // The following algorithm has two potential issues: what if the image stream
            // contains <ws>EI<ws> ?
            // Plus, there are some streams that don't have the <ws> before the EI operator
            // it sounds like we would have to actually decode the content stream, which
            // I'd rather avoid right now.
            MemoryStream baos        = new MemoryStream();
            MemoryStream accumulated = new MemoryStream();
            int          ch;
            int          found     = 0;
            PRTokeniser  tokeniser = ps.GetTokeniser();

            byte[] ff = null;

            while ((ch = tokeniser.Read()) != -1)
                if (found == 0 && PRTokeniser.IsWhitespace(ch))
                else if (found == 1 && ch == 'E')
                else if (found == 1 && PRTokeniser.IsWhitespace(ch))
                    // this clause is needed if we have a white space character that is part of the image data
                    // followed by a whitespace character that precedes the EI operator.  In this case, we need
                    // to flush the first whitespace, then treat the current whitespace as the first potential
                    // character for the end of stream check.  Note that we don't increment 'found' here.
                    baos.Write(ff = accumulated.ToArray(), 0, ff.Length);
                else if (found == 2 && ch == 'I')
                else if (found == 3 && PRTokeniser.IsWhitespace(ch))
                    byte[] tmp = baos.ToArray();
                    if (InlineImageStreamBytesAreComplete(tmp, imageDictionary))
                    byte[] accumulatedArr = accumulated.ToArray();
                    baos.Write(accumulatedArr, 0, accumulatedArr.Length);

                    found = 0;
                    baos.Write(ff = accumulated.ToArray(), 0, ff.Length);

                    found = 0;
            throw new InlineImageParseException("Could not find image data or EI");
Esempio n. 17
 protected override void CheckAnnotation(PdfWriter writer, int key, Object obj1)
     if (obj1 is PdfFormField)
         PdfFormField field = (PdfFormField)obj1;
         if (!field.Contains(PdfName.SUBTYPE))
         if (field.Contains(PdfName.AA) || field.Contains(PdfName.A))
             throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("widget.annotation.dictionary.or.field.dictionary.shall.not.include.a.or.aa.entry"));
     if (obj1 is PdfAnnotation)
         PdfAnnotation annot   = (PdfAnnotation)obj1;
         PdfName       subtype = annot.Get(PdfName.SUBTYPE) as PdfName;
         if (subtype != null && !allowedAnnotTypes.Contains(subtype))
             throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("annotation.type.1.not.allowed", subtype.ToString()));
         PdfNumber ca = annot.GetAsNumber(PdfName.CA);
         if (ca != null && ca.FloatValue != 1.0)
             throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage(""));
         PdfNumber f = annot.GetAsNumber(PdfName.F);
         if (f == null)
             throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("an.annotation.dictionary.shall.contain.the.f.key"));
         int flags = f.IntValue;
         if (CheckFlag(flags, PdfAnnotation.FLAGS_PRINT) == false || CheckFlag(flags, PdfAnnotation.FLAGS_HIDDEN) ||
             CheckFlag(flags, PdfAnnotation.FLAGS_INVISIBLE) || CheckFlag(flags, PdfAnnotation.FLAGS_NOVIEW))
             throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage(""));
         if (PdfName.TEXT.Equals(annot.GetAsName(PdfName.SUBTYPE)))
             if (CheckFlag(flags, PdfAnnotation.FLAGS_NOZOOM) == false || CheckFlag(flags, PdfAnnotation.FLAGS_NOROTATE) == false)
                 throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage(""));
         if (annot.Contains(PdfName.C) || annot.Contains(PdfName.IC))
             ICC_Profile colorProfile = ((PdfAWriter)writer).ColorProfile;
             String      cs           = "";
             cs = System.Text.Encoding.ASCII.GetString(colorProfile.Data, 16, 4);
             if (!"RGB".Equals(cs.ToUpper()))
                 throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage(""));
         PdfDictionary ap = GetDirectDictionary(annot.Get(PdfName.AP));
         if (ap != null)
             if (ap.Contains(PdfName.R) || ap.Contains(PdfName.D))
                 throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage(""));
             PdfObject n = ap.Get(PdfName.N);
             if (!(n is PdfIndirectReference))
                 throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage(""));
         if (PdfName.WIDGET.Equals(annot.GetAsName(PdfName.SUBTYPE)) && (annot.Contains(PdfName.AA) || annot.Contains(PdfName.A)))
             throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("widget.annotation.dictionary.or.field.dictionary.shall.not.include.a.or.aa.entry"));
         if (CheckStructure(conformanceLevel))
             if (contentAnnotations.Contains(subtype) && !annot.Contains(PdfName.CONTENTS))
                 throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("annotation.of.type.1.should.have.contents.key", subtype.ToString()));
Esempio n. 18
        protected override void CheckPdfObject(PdfWriter writer, int key, Object obj1)
            if (obj1 is PdfNumber)
                PdfNumber number = (PdfNumber)obj1;
                if (Math.Abs(number.DoubleValue) > maxRealValue && number.ToString().Contains("."))
                    throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage(""));
            else if (obj1 is PdfString)
                PdfString str = (PdfString)obj1;
                if (str.GetBytes().Length > maxStringLength)
                    throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage(""));
            else if (obj1 is PdfArray)
                PdfArray array = (PdfArray)obj1;
                if (array.Size > maxArrayLength)
                    throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage(""));
            else if (obj1 is PdfDictionary)
                PdfDictionary dictionary = (PdfDictionary)obj1;
                if (dictionary.Size > maxDictionaryLength)
                    throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage(""));
                PdfName type = dictionary.GetAsName(PdfName.TYPE);
                if (PdfName.CATALOG.Equals(type))
                    if (!dictionary.Contains(PdfName.METADATA))
                        throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("the.document.catalog.dictionary.shall.contain.metadata"));

                    if (dictionary.Contains(PdfName.AA))
                        throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage(""));

                    if (dictionary.Contains(PdfName.NAMES))
                        PdfDictionary names = GetDirectDictionary(dictionary.Get(PdfName.NAMES));
                        if (names != null && names.Contains(PdfName.EMBEDDEDFILES))
                            throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("the.document.catalog.dictionary.shall.not.include.embeddedfiles.names.entry"));

                    if (CheckStructure(conformanceLevel))
                        PdfDictionary markInfo = GetDirectDictionary(dictionary.Get(PdfName.MARKINFO));
                        if (markInfo == null || markInfo.GetAsBoolean(PdfName.MARKED) == null || markInfo.GetAsBoolean(PdfName.MARKED).BooleanValue == false)
                            throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("document.catalog.dictionary.shall.include.a.markinfo.dictionary.whose.entry.marked.shall.have.a.value.of.true"));
                        if (!dictionary.Contains(PdfName.LANG))
                            throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("document.catalog.dictionary.should.contain.lang.entry"));
                else if (PdfName.PAGE.Equals(type))
                    if (dictionary.Contains(PdfName.AA))
                        throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("page.dictionary.shall.not.include.aa.entry"));
                else if (PdfName.OUTPUTINTENT.Equals(type))
                    PdfObject destOutputIntent = dictionary.Get(PdfName.DESTOUTPUTPROFILE);
                    if (destOutputIntent != null && pdfaDestOutputIntent != null)
                        if (pdfaDestOutputIntent.IndRef != destOutputIntent.IndRef)
                            throw new PdfAConformanceException(obj1,
                        pdfaDestOutputIntent = destOutputIntent;

                    PdfName gts = dictionary.GetAsName(PdfName.S);
                    if (pdfaDestOutputIntent != null)
                        if (PdfName.GTS_PDFA1.Equals(gts))
                            if (pdfaOutputIntentColorSpace != null)
                                throw new PdfAConformanceException(obj1,
                            pdfaOutputIntentColorSpace = "";
                            ICC_Profile icc_profile = writer.ColorProfile;
                            pdfaOutputIntentColorSpace = Encoding.GetEncoding("US-ASCII").GetString(icc_profile.Data, 16, 4);
                        throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("outputintent.shall.have.gtspdfa1.and.destoutputintent"));
Esempio n. 19
        protected override void CheckPdfObject(PdfWriter writer, int key, Object obj1)
            if (obj1 is PdfNumber)
                PdfNumber number = (PdfNumber)obj1;
                if (Math.Abs(number.DoubleValue) > maxRealValue && number.ToString().Contains("."))
                    throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage(""));
            else if (obj1 is PdfString)
                PdfString str = (PdfString)obj1;
                if (str.GetBytes().Length > maxStringLength)
                    throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage(""));
            else if (obj1 is PdfArray)
                PdfArray array = (PdfArray)obj1;
                if (array.Size > maxArrayLength)
                    throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage(""));
            else if (obj1 is PdfDictionary)
                PdfDictionary dictionary = (PdfDictionary)obj1;
                if (dictionary.Size > maxDictionaryLength)
                    throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage(""));
                PdfName type = dictionary.GetAsName(PdfName.TYPE);
                if (PdfName.CATALOG.Equals(type))
                    if (!dictionary.Contains(PdfName.METADATA))
                        throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("the.document.catalog.dictionary.shall.contain.metadata"));

                    if (dictionary.Contains(PdfName.AA))
                        throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage(""));

                    if (dictionary.Contains(PdfName.NAMES))
                        PdfDictionary names = GetDirectDictionary(dictionary.Get(PdfName.NAMES));
                        if (names != null && names.Contains(PdfName.EMBEDDEDFILES))
                            throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("the.document.catalog.dictionary.shall.not.include.embeddedfiles.names.entry"));

                    if (CheckStructure(conformanceLevel))
                        PdfDictionary markInfo = GetDirectDictionary(dictionary.Get(PdfName.MARKINFO));
                        if (markInfo == null || markInfo.GetAsBoolean(PdfName.MARKED) == null || markInfo.GetAsBoolean(PdfName.MARKED).BooleanValue == false)
                            throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("document.catalog.dictionary.shall.include.a.markinfo.dictionary.whose.entry.marked.shall.have.a.value.of.true"));
                        if (!dictionary.Contains(PdfName.LANG))
                            throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("document.catalog.dictionary.should.contain.lang.entry"));

                    PdfArray outputIntents          = GetDirectArray(dictionary.Get(PdfName.OUTPUTINTENTS));
                    bool     pdfa1OutputIntentFound = false;
                    if (outputIntents != null && outputIntents.Size > 0)
                        for (int i = 0; i < outputIntents.Size; i++)
                            PdfDictionary outputIntentDictionary = GetDirectDictionary(outputIntents[i]);
                            PdfName       gts = outputIntentDictionary.GetAsName(PdfName.S);
                            if (PdfName.GTS_PDFA1.Equals(gts))
                                if (pdfa1OutputIntentFound)
                                    throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage(""));
                                pdfa1OutputIntentFound = true;
                            if (outputIntentDictionary != null)
                                PdfObject destOutputIntent = outputIntentDictionary.Get(PdfName.DESTOUTPUTPROFILE);
                                if (destOutputIntent == null && PdfName.GTS_PDFA1.Equals(gts))
                                    throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("outputintent.shall.have.gtspdfa1.and.destoutputintent"));

                    if ((rgbUsed || cmykUsed || grayUsed) && !pdfa1OutputIntentFound)
                        throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage(""));
                else if (PdfName.PAGE.Equals(type))
                    if (dictionary.Contains(PdfName.AA))
                        throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("page.dictionary.shall.not.include.aa.entry"));
                else if (PdfName.OUTPUTINTENT.Equals(type))
                    PdfObject iccProfileStream = dictionary.Get(PdfName.DESTOUTPUTPROFILE);
                    String    inputColorSpace  = "";
                    if (iccProfileStream != null)
                        ICC_Profile icc_profile = writer.ColorProfile;
                        inputColorSpace = Encoding.GetEncoding("US-ASCII").GetString(icc_profile.Data, 16, 4);
                    PdfName gts = dictionary.GetAsName(PdfName.S);
                    if (!PdfName.GTS_PDFA1.Equals(gts))
                        throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("outputintent.shall.have.gtspdfa1.and.destoutputintent"));
                    if ("RGB ".Equals(inputColorSpace))
                        if (cmykUsed)
                            throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage(""));
                    else if ("CMYK".Equals(inputColorSpace))
                        if (rgbUsed)
                            throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage(""));
                        if (cmykUsed)
                            throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage(""));
                        if (rgbUsed)
                            throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage(""));
Esempio n. 20
  * Parses the samples of the image from the underlying content parser, ignoring all filters.
  * The parser must be positioned immediately after the ID operator that ends the inline image's dictionary.
  * The parser will be left positioned immediately following the EI operator.
  * This is primarily useful if no filters have been applied. 
  * @param imageDictionary the dictionary of the inline image
  * @param ps the content parser
  * @return the samples of the image
  * @throws IOException if anything bad happens during parsing
 private static byte[] ParseUnfilteredSamples(PdfDictionary imageDictionary, PdfDictionary colorSpaceDic, PdfContentParser ps) {
     // special case:  when no filter is specified, we just read the number of bits
     // per component, multiplied by the width and height.
     if (imageDictionary.Contains(PdfName.FILTER))
         throw new ArgumentException("Dictionary contains filters");
     PdfNumber h = imageDictionary.GetAsNumber(PdfName.HEIGHT);
     int bytesToRead = ComputeBytesPerRow(imageDictionary, colorSpaceDic) * h.IntValue;
     byte[] bytes = new byte[bytesToRead];
     PRTokeniser tokeniser = ps.GetTokeniser();
     int shouldBeWhiteSpace = tokeniser.Read(); // skip next character (which better be a whitespace character - I suppose we could check for this)
     // from the PDF spec:  Unless the image uses ASCIIHexDecode or ASCII85Decode as one of its filters, the ID operator shall be followed by a single white-space character, and the next character shall be interpreted as the first byte of image data.
     // unfortunately, we've seen some PDFs where there is no space following the ID, so we have to capture this case and handle it
     int startIndex = 0;
     if (!PRTokeniser.IsWhitespace(shouldBeWhiteSpace) || shouldBeWhiteSpace == 0){ // tokeniser treats 0 as whitespace, but for our purposes, we shouldn't)
         bytes[0] = (byte)shouldBeWhiteSpace;
     for (int i = startIndex; i < bytesToRead; i++){
         int ch = tokeniser.Read();
         if (ch == -1)
             throw new InlineImageParseException("End of content stream reached before end of image data");
         bytes[i] = (byte)ch;
     PdfObject ei = ps.ReadPRObject();
     if (!ei.ToString().Equals("EI"))
         throw new InlineImageParseException("EI not found after end of image data");
     return bytes;
Esempio n. 21
         * Parses the samples of the image from the underlying content parser, accounting for filters
         * The parser must be positioned immediately after the ID operator that ends the inline image's dictionary.
         * The parser will be left positioned immediately following the EI operator.
         * <b>Note:</b>This implementation does not actually apply the filters at this time
         * @param imageDictionary the dictionary of the inline image
         * @param ps the content parser
         * @return the samples of the image
         * @throws IOException if anything bad happens during parsing
        private static byte[] ParseInlineImageSamples(PdfDictionary imageDictionary, PdfDictionary colorSpaceDic, PdfContentParser ps) {
            // by the time we get to here, we have already parsed the ID operator
            if (!imageDictionary.Contains(PdfName.FILTER)){
                return ParseUnfilteredSamples(imageDictionary, colorSpaceDic, ps);
            // read all content until we reach an EI operator surrounded by whitespace.
            // The following algorithm has two potential issues: what if the image stream 
            // contains <ws>EI<ws> ?
            // Plus, there are some streams that don't have the <ws> before the EI operator
            // it sounds like we would have to actually decode the content stream, which
            // I'd rather avoid right now.
            MemoryStream baos = new MemoryStream();
            MemoryStream accumulated = new MemoryStream();
            int ch;
            int found = 0;
            PRTokeniser tokeniser = ps.GetTokeniser();
            byte[] ff = null;
            while ((ch = tokeniser.Read()) != -1){
                if (found == 0 && PRTokeniser.IsWhitespace(ch)){
                } else if (found == 1 && ch == 'E'){
                } else if (found == 1 && PRTokeniser.IsWhitespace(ch)){
                    // this clause is needed if we have a white space character that is part of the image data
                    // followed by a whitespace character that precedes the EI operator.  In this case, we need
                    // to flush the first whitespace, then treat the current whitespace as the first potential
                    // character for the end of stream check.  Note that we don't increment 'found' here.
                    baos.Write(ff = accumulated.ToArray(), 0, ff.Length);
                } else if (found == 2 && ch == 'I'){ 
                } else if (found == 3 && PRTokeniser.IsWhitespace(ch)){
                        byte[] tmp = baos.ToArray();
                        new PdfImageObject(imageDictionary, tmp, colorSpaceDic);
                        return tmp;
                    catch (Exception)
                        byte[] tmp = accumulated.ToArray();
                        baos.Write(tmp, 0, tmp.Length);

                        found = 0;

                } else {
                    baos.Write(ff = accumulated.ToArray(), 0, ff.Length);
                    found = 0;
            throw new InlineImageParseException("Could not find image data or EI");
Esempio n. 22
  * Determine if an MCID is available
  * @return true if the MCID is available, false otherwise
 virtual public bool HasMcid()
Esempio n. 23
        public static void Test_ExtractImages(string file, string imageDirectory)
            // from

            Trace.WriteLine($"extract images from pdf \"{file}\" to \"{imageDirectory}\"");
            if (!zPath.IsPathRooted(imageDirectory))
                imageDirectory = zPath.Combine(zPath.GetDirectoryName(file), imageDirectory);
            using (PdfReader pdfReader = new PdfReader(file))
                int index       = 1;
                int objectCount = pdfReader.XrefSize;
                for (int i = 0; i < objectCount; i++)
                    PdfObject obj = pdfReader.GetPdfObject(i);
                    if (obj is PdfDictionary)
                        PdfDictionary objDic = (PdfDictionary)obj;
                        if (objDic.Contains(PdfName.TYPE) && objDic.Get(PdfName.TYPE).ToString() == "/XObject" && objDic.Contains(PdfName.SUBTYPE) && objDic.Get(PdfName.SUBTYPE).ToString() == "/Image")
                            string filter = objDic.Get(PdfName.FILTER).ToString();
                            int    width  = int.Parse(objDic.Get(PdfName.WIDTH).ToString());
                            int    height = int.Parse(objDic.Get(PdfName.HEIGHT).ToString());
                            string bpp    = objDic.Get(PdfName.BITSPERCOMPONENT).ToString();
                            Trace.WriteLine($"object {i + 1} image width {width} height {height} filter {filter} bits per component {bpp}");
                            if (filter == "/FlateDecode")
                                byte[] arr = PdfReader.FlateDecode(PdfReader.GetStreamBytesRaw((PRStream)obj), true);
                                Trace.WriteLine($"  bytes count {arr.Length}");
                                // PixelFormat.Format24bppRgb
                                // System.Drawing.Imaging.PixelFormat 8 bits
                                Bitmap bmp = new Bitmap(width, height, PixelFormat.Format8bppIndexed);
                                // PixelFormat.Format24bppRgb
                                BitmapData bmpData = bmp.LockBits(new Rectangle(0, 0, width, height), ImageLockMode.WriteOnly, PixelFormat.Format8bppIndexed);
                                Marshal.Copy(arr, 0, bmpData.Scan0, arr.Length);
                                //bmp.Save(zPath.Combine(imageDirectory, $"image-{index++:000}.jpeg"), ImageFormat.Jpeg);
                                bmp.Save(zPath.Combine(imageDirectory, $"image-{index++:000}.png"), ImageFormat.Png);
                    //if (obj != null && obj.IsStream())
                    //    PdfDictionary objDic = (PdfDictionary)obj;
                    //    if (objDic.Contains(PdfName.SUBTYPE) && objDic.Get(PdfName.SUBTYPE).ToString() == "/Image")
                    //    {
                    //        string filter = objDic.Get(PdfName.FILTER).ToString();
                    //        int width = int.Parse(objDic.Get(PdfName.WIDTH).ToString());
                    //        int height = int.Parse(objDic.Get(PdfName.HEIGHT).ToString());
                    //        string bpp = objDic.Get(PdfName.BITSPERCOMPONENT).ToString();
                    //        Trace.WriteLine($"object {i + 1} image width {width} height {height} filter {filter} bits per component {bpp}");
                    //        //string extent = ".";
                    //        //byte[] img = null;
                    //        switch (filter)
                    //        {
                    //            case "/FlateDecode":
                    //                byte[] arr = PdfReader.FlateDecode(PdfReader.GetStreamBytesRaw((PRStream)obj), true);
                    //                Bitmap bmp = new Bitmap(width, height, PixelFormat.Format24bppRgb);
                    //                BitmapData bmpData = bmp.LockBits(new Rectangle(0, 0, width, height), ImageLockMode.WriteOnly, PixelFormat.Format24bppRgb);
                    //                Marshal.Copy(arr, 0, bmpData.Scan0, arr.Length);
                    //                bmp.UnlockBits(bmpData);
                    //                //bmp.Save("c:\\temp\\bmp1.png", ImageFormat.Png);
                    //                bmp.Save(zPath.Combine(imageDirectory, $"image-{index++:000}.jpeg"), ImageFormat.Jpeg);
                    //                break;
                    //            default:
                    //                break;
                    //        }
                    //    }