/// <summary> /// This method will look for markup annotations on EVERY page /// </summary> /// <returns>Count of apparent markup annotations</returns> private int ScanLinearizedPdf() { int CountAnnots = 0; for (int P = 1; P < MyReader.NumberOfPages; P++) { PdfDictionary Next = MyReader.GetPageN(P); PdfArray annotArray = Next.GetAsArray(PdfName.ANNOTS); if (annotArray != null) { for (int i = 0; i < annotArray.Size - 1; i++) { PdfDictionary NextAnnot = annotArray.GetAsDict(i); if (NextAnnot.Contains(PdfName.POPUP) || (NextAnnot.Contains(PdfName.T) && NextAnnot.Contains(PdfName.POPUP)) || NextAnnot.Contains(PdfName.INTENT) ) { CountAnnots++; } } } } return(CountAnnots); }
public void CopySignedDocuments() { string file = RESOURCES + "hello_signed1.pdf"; Directory.CreateDirectory("PdfCopyTest/"); Document pdfDocument = new Document(); PdfCopy copier = new PdfCopy(pdfDocument, new FileStream("PdfCopyTest/CopySignedDocuments.pdf", FileMode.Create)); pdfDocument.Open(); PdfReader reader1 = new PdfReader(file); copier.AddPage(copier.GetImportedPage(reader1, 1)); copier.FreeReader(reader1); reader1 = new PdfReader(file); copier.AddPage(copier.GetImportedPage(reader1, 1)); copier.FreeReader(reader1); pdfDocument.Close(); PdfReader reader = new PdfReader("PdfCopyTest/CopySignedDocuments.pdf"); PdfDictionary sig = (PdfDictionary)reader.GetPdfObject(9); PdfDictionary sigRef = sig.GetAsArray(PdfName.REFERENCE).GetAsDict(0); Assert.True(PdfName.SIGREF.Equals(sigRef.GetAsName(PdfName.TYPE))); Assert.False(sigRef.Contains(PdfName.DATA)); sig = (PdfDictionary)reader.GetPdfObject(21); sigRef = sig.GetAsArray(PdfName.REFERENCE).GetAsDict(0); Assert.True(PdfName.SIGREF.Equals(sigRef.GetAsName(PdfName.TYPE))); Assert.False(sigRef.Contains(PdfName.DATA)); }
protected override void CheckPdfObject(PdfWriter writer, int key, Object obj1) { if (obj1 is PdfNumber) { PdfNumber number = (PdfNumber)obj1; if (Math.Abs(number.DoubleValue) > maxRealValue && number.ToString().Contains(".")) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("real.number.is.out.of.range")); } } else if (obj1 is PdfString) { PdfString str = (PdfString)obj1; if (str.GetBytes().Length > maxStringLength) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("pdf.string.is.too.long")); } } else if (obj1 is PdfArray) { PdfArray array = (PdfArray)obj1; if (array.Size > maxArrayLength) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("pdf.array.is.out.of.bounds")); } } else if (obj1 is PdfDictionary) { PdfDictionary dictionary = (PdfDictionary)obj1; if (dictionary.Size > maxDictionaryLength) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("pdf.dictionary.is.out.of.bounds")); } if (PdfName.CATALOG.Equals(dictionary.GetAsName(PdfName.TYPE))) { if (dictionary.Contains(PdfName.AA)) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("the.document.catalog.dictionary.shall.not.include.an.aa.entry")); } if (CheckStructure(conformanceLevel)) { PdfDictionary markInfo = dictionary.GetAsDict(PdfName.MARKINFO); if (markInfo == null || markInfo.GetAsBoolean(PdfName.MARKED) == null || markInfo.GetAsBoolean(PdfName.MARKED).BooleanValue == false) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("document.catalog.dictionary.shall.include.a.markinfo.dictionary.whose.entry.marked.shall.have.a.value.of.true")); } if (!dictionary.Contains(PdfName.LANG)) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("document.catalog.dictionary.should.contain.lang.entry")); } } } } }
public void PdfDictionaryContainsReturnsFalseIfKeyIsNull() { PdfDictionary dictionary = new PdfDictionary(); bool contained = dictionary.Contains(null); Assert.False(contained); }
private void CheckAnnotationSize(Stream inputStream, int expectedAnnotationsSize) { PdfReader reader = new PdfReader(inputStream); PdfDictionary pageDictionary = reader.GetPageN(1); if (pageDictionary.Contains(PdfName.ANNOTS)) { PdfArray annotations = pageDictionary.GetAsArray(PdfName.ANNOTS); Assert.True(annotations.Size == expectedAnnotationsSize); } }
protected override void CheckGState(PdfWriter writer, int key, Object obj1) { if (obj1 is PdfDictionary) { PdfDictionary gs = (PdfDictionary)obj1; PdfObject obj = gs.Get(PdfName.BM); if (obj != null && !PdfGState.BM_NORMAL.Equals(obj) && !PdfGState.BM_COMPATIBLE.Equals(obj)) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("blend.mode.1.not.allowed", obj.ToString())); } obj = gs.Get(PdfName.CA); double v = 0.0; if (obj != null && (v = ((PdfNumber)obj).DoubleValue) != 1.0) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("transparency.is.not.allowed.ca.eq.1", v.ToString())); } obj = gs.Get(PdfName.ca); v = 0.0; if (obj != null && (v = ((PdfNumber)obj).DoubleValue) != 1.0) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("transparency.is.not.allowed.ca.eq.1", v.ToString())); } if (gs.Contains(PdfName.TR)) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("an.extgstate.dictionary.shall.not.contain.the.tr.key")); } PdfName tr2 = gs.GetAsName(PdfName.TR2); if (tr2 != null && !tr2.Equals(PdfName.DEFAULT)) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage( "an.extgstate.dictionary.shall.not.contain.the.TR2.key.with.a.value.other.than.default")); } PdfName ri = gs.GetAsName(PdfName.RI); if (ri != null && !(PdfName.RELATIVECOLORIMETRIC.Equals(ri) || PdfName.ABSOLUTECOLORIMETRIC.Equals(ri) || PdfName.PERCEPTUAL.Equals(ri) || PdfName.SATURATION.Equals(ri))) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("1.value.of.ri.key.is.not.allowed", ri.ToString())); } if (gs.Get(PdfName.SMASK) != null && !PdfName.NONE.Equals(gs.GetAsName(PdfName.SMASK))) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("the.smask.key.is.not.allowed.in.extgstate")); } } }
/** * Parses the samples of the image from the underlying content parser, ignoring all filters. * The parser must be positioned immediately after the ID operator that ends the inline image's dictionary. * The parser will be left positioned immediately following the EI operator. * This is primarily useful if no filters have been applied. * @param imageDictionary the dictionary of the inline image * @param ps the content parser * @return the samples of the image * @throws IOException if anything bad happens during parsing */ private static byte[] ParseUnfilteredSamples(PdfDictionary imageDictionary, PdfDictionary colorSpaceDic, PdfContentParser ps) { // special case: when no filter is specified, we just read the number of bits // per component, multiplied by the width and height. if (imageDictionary.Contains(PdfName.FILTER)) { throw new ArgumentException("Dictionary contains filters"); } PdfNumber h = imageDictionary.GetAsNumber(PdfName.HEIGHT); int bytesToRead = ComputeBytesPerRow(imageDictionary, colorSpaceDic) * h.IntValue; byte[] bytes = new byte[bytesToRead]; PRTokeniser tokeniser = ps.GetTokeniser(); int shouldBeWhiteSpace = tokeniser.Read(); // skip next character (which better be a whitespace character - I suppose we could check for this) // from the PDF spec: Unless the image uses ASCIIHexDecode or ASCII85Decode as one of its filters, the ID operator shall be followed by a single white-space character, and the next character shall be interpreted as the first byte of image data. // unfortunately, we've seen some PDFs where there is no space following the ID, so we have to capture this case and handle it int startIndex = 0; if (!PRTokeniser.IsWhitespace(shouldBeWhiteSpace) || shouldBeWhiteSpace == 0) // tokeniser treats 0 as whitespace, but for our purposes, we shouldn't) { bytes[0] = (byte)shouldBeWhiteSpace; startIndex++; } for (int i = startIndex; i < bytesToRead; i++) { int ch = tokeniser.Read(); if (ch == -1) { throw new InlineImageParseException("End of content stream reached before end of image data"); } bytes[i] = (byte)ch; } PdfObject ei = ps.ReadPRObject(); if (!ei.ToString().Equals("EI")) { // Some PDF producers seem to add another non-whitespace character after the image data. // Let's try to handle that case here. PdfObject ei2 = ps.ReadPRObject(); if (!ei2.ToString().Equals("EI")) { throw new InlineImageParseException("EI not found after end of image data"); } } return(bytes); }
protected override void CheckFileSpec(PdfWriter writer, int key, Object obj1) { if (obj1 is PdfFileSpecification) { PdfDictionary fileSpec = (PdfFileSpecification)obj1; if (!fileSpec.Contains(PdfName.UF) || !fileSpec.Contains(PdfName.F) || !fileSpec.Contains(PdfName.DESC)) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("file.specification.dictionary.shall.contain.f.uf.and.desc.entries")); } PdfObject obj = fileSpec.Get(PdfName.AFRELATIONSHIP); if (obj == null || !obj.IsName() || !allowedAFRelationships.Contains(obj as PdfName)) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("file.specification.dictionary.shall.contain.correct.afrelationship.key")); } if (fileSpec.Contains(PdfName.EF)) { PdfDictionary dict = GetDirectDictionary(fileSpec.Get(PdfName.EF)); if (dict == null || !dict.Contains(PdfName.F)) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("ef.key.of.file.specification.dictionary.shall.contain.dictionary.with.valid.f.key")); } PdfDictionary embeddedFile = GetDirectDictionary(dict.Get(PdfName.F)); if (embeddedFile == null) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("ef.key.of.file.specification.dictionary.shall.contain.dictionary.with.valid.f.key")); } CheckEmbeddedFile(embeddedFile); } } }
public static void Test_TraceObjects(string file) { Trace.WriteLine($"pdf \"{file}\""); using (PdfReader pdfReader = new PdfReader(file)) { int objectCount = pdfReader.XrefSize; for (int i = 0; i < objectCount; i++) { PdfObject obj = pdfReader.GetPdfObject(i); //if (obj != null) if (obj is PdfDictionary) { PdfDictionary objDic = (PdfDictionary)obj; string type = null; if (objDic.Contains(PdfName.TYPE)) { type = objDic.Get(PdfName.TYPE).ToString(); } string subtype = null; if (objDic.Contains(PdfName.SUBTYPE)) { subtype = objDic.Get(PdfName.SUBTYPE).ToString(); } Trace.WriteLine($"object {i + 1,3} object type {obj.GetType()} dictionary type {type} subtype {subtype}"); } else if (obj != null) { Trace.WriteLine($"object {i + 1,3} object type {obj.GetType()}"); } else { Trace.WriteLine($"object {i + 1,3} null"); } } } }
public PageInfo(PdfReader reader, int pageNumber) { PageNumber = pageNumber; ImagesInfo = new ImagesInfo(); TextsInfo = new TextsInfo(); _reader = reader; _page = reader.GetPageN(pageNumber); PageUnits = _page.Contains(PdfName.USERUNIT) ? _page.GetAsNumber(PdfName.USERUNIT).FloatValue : 72; Rectangle mediabox = reader.GetPageSize(_page); PageWidth = Utilities.PointsToMillimeters(mediabox.Width); PageHeight = Utilities.PointsToMillimeters(mediabox.Height); _RenderPage(PageNumber, PageUnits); }
public static int GetImagesCount(PdfReader pdfReader) { int objectCount = pdfReader.XrefSize; int count = 0; for (int i = 0; i < objectCount; i++) { PdfObject obj = pdfReader.GetPdfObject(i); if (obj is PdfDictionary) { PdfDictionary objDic = (PdfDictionary)obj; if (objDic.Contains(PdfName.TYPE) && objDic.Get(PdfName.TYPE).ToString() == "/XObject" && objDic.Contains(PdfName.SUBTYPE) && objDic.Get(PdfName.SUBTYPE).ToString() == "/Image") { count++; } } } return(count); }
public static void Test_ExtractImage(string file, int index, string imageFile) { Trace.WriteLine($"extract image index {index} from pdf \"{file}\" to \"{imageFile}\""); if (!zPath.IsPathRooted(imageFile)) { imageFile = zPath.Combine(zPath.GetDirectoryName(file), imageFile); } using (PdfReader pdfReader = new PdfReader(file)) { PdfObject obj = pdfReader.GetPdfObject(index); if (!(obj is PdfDictionary)) { Trace.WriteLine("object is not dictionary"); return; } PdfDictionary objDic = (PdfDictionary)obj; if (!objDic.Contains(PdfName.TYPE) || objDic.Get(PdfName.TYPE).ToString() != "/XObject" || !objDic.Contains(PdfName.SUBTYPE) || objDic.Get(PdfName.SUBTYPE).ToString() != "/Image") { Trace.WriteLine("object is not an image"); return; } //iTextSharp.text.pdf //Image //iTextSharp.text.pdf.PdfImage //PdfImage pdfImage = new PdfImage(); byte[] bytes = PdfReader.FlateDecode(PdfReader.GetStreamBytesRaw((PRStream)obj), true); //byte[] bytes = PdfReader.GetStreamBytesRaw((PRStream)obj); // error : The byte array is not a recognized imageformat iTextSharp.text.Image image = iTextSharp.text.Image.GetInstance(bytes); Trace.WriteLine($"width {image.Width} height {image.Height} dpiX {image.DpiX} dpiY {image.DpiY}"); //image.IsContent(); //image.IsImgRaw(); //image.IsJpeg(); //image.OriginalType; //image.RawData; //image.Type; } }
/** * Parses the samples of the image from the underlying content parser, ignoring all filters. * The parser must be positioned immediately after the ID operator that ends the inline image's dictionary. * The parser will be left positioned immediately following the EI operator. * This is primarily useful if no filters have been applied. * @param imageDictionary the dictionary of the inline image * @param ps the content parser * @return the samples of the image * @throws IOException if anything bad happens during parsing */ private static byte[] ParseUnfilteredSamples(PdfDictionary imageDictionary, PdfContentParser ps) { // special case: when no filter is specified, we just read the number of bits // per component, multiplied by the width and height. if (imageDictionary.Contains(PdfName.FILTER)) { throw new ArgumentException("Dictionary contains filters"); } PdfNumber h = imageDictionary.GetAsNumber(PdfName.HEIGHT); int bytesToRead = ComputeBytesPerRow(imageDictionary) * h.IntValue; byte[] bytes = new byte[bytesToRead]; PRTokeniser tokeniser = ps.GetTokeniser(); tokeniser.Read(); // skip next character (which better be a whitespace character - I suppose we could check for this) for (int i = 0; i < bytesToRead; i++) { int ch = tokeniser.Read(); if (ch == -1) { throw new InlineImageParseException("End of content stream reached before end of image data"); } bytes[i] = (byte)ch; } PdfObject ei = ps.ReadPRObject(); if (!ei.ToString().Equals("EI")) { throw new InlineImageParseException("EI not found after end of image data"); } return(bytes); }
/** * Extracts locations from the redact annotations contained in the document and applied to the given page. */ private IList <PdfCleanUpLocation> ExtractLocationsFromRedactAnnots(int page, PdfDictionary pageDict) { List <PdfCleanUpLocation> locations = new List <PdfCleanUpLocation>(); if (pageDict.Contains(PdfName.ANNOTS)) { PdfArray annotsArray = pageDict.GetAsArray(PdfName.ANNOTS); for (int i = 0; i < annotsArray.Size; ++i) { PdfIndirectReference annotIndirRef = annotsArray.GetAsIndirectObject(i); PdfDictionary annotDict = annotsArray.GetAsDict(i); PdfName annotSubtype = annotDict.GetAsName(PdfName.SUBTYPE); if (annotSubtype.Equals(PdfName.REDACT)) { SaveRedactAnnotIndirRef(page, annotIndirRef.ToString()); locations.AddRange(ExtractLocationsFromRedactAnnot(page, i, annotDict)); } } } return(locations); }
public static void Test_TraceImages(string file) { Trace.WriteLine($"pdf \"{file}\""); using (PdfReader pdfReader = new PdfReader(file)) { int objectCount = pdfReader.XrefSize; for (int i = 0; i < objectCount; i++) { PdfObject obj = pdfReader.GetPdfObject(i); if (obj is PdfDictionary) { PdfDictionary objDic = (PdfDictionary)obj; if (objDic.Contains(PdfName.TYPE) && objDic.Get(PdfName.TYPE).ToString() == "/XObject" && objDic.Contains(PdfName.SUBTYPE) && objDic.Get(PdfName.SUBTYPE).ToString() == "/Image") { string filter = objDic.Get(PdfName.FILTER).ToString(); int width = int.Parse(objDic.Get(PdfName.WIDTH).ToString()); int height = int.Parse(objDic.Get(PdfName.HEIGHT).ToString()); string bpp = objDic.Get(PdfName.BITSPERCOMPONENT).ToString(); Trace.WriteLine($"object {i + 1,3} image width {width} height {height} filter {filter} bits per component {bpp}"); } } } } }
/** * Parses the samples of the image from the underlying content parser, accounting for filters * The parser must be positioned immediately after the ID operator that ends the inline image's dictionary. * The parser will be left positioned immediately following the EI operator. * <b>Note:</b>This implementation does not actually apply the filters at this time * @param imageDictionary the dictionary of the inline image * @param ps the content parser * @return the samples of the image * @throws IOException if anything bad happens during parsing */ private static byte[] ParseInlineImageSamples(PdfDictionary imageDictionary, PdfDictionary colorSpaceDic, PdfContentParser ps) { // by the time we get to here, we have already parsed the ID operator if (!imageDictionary.Contains(PdfName.FILTER)) { return(ParseUnfilteredSamples(imageDictionary, colorSpaceDic, ps)); } // read all content until we reach an EI operator surrounded by whitespace. // The following algorithm has two potential issues: what if the image stream // contains <ws>EI<ws> ? // Plus, there are some streams that don't have the <ws> before the EI operator // it sounds like we would have to actually decode the content stream, which // I'd rather avoid right now. MemoryStream baos = new MemoryStream(); MemoryStream accumulated = new MemoryStream(); int ch; int found = 0; PRTokeniser tokeniser = ps.GetTokeniser(); byte[] ff = null; while ((ch = tokeniser.Read()) != -1) { if (found == 0 && PRTokeniser.IsWhitespace(ch)) { found++; accumulated.WriteByte((byte)ch); } else if (found == 1 && ch == 'E') { found++; accumulated.WriteByte((byte)ch); } else if (found == 1 && PRTokeniser.IsWhitespace(ch)) { // this clause is needed if we have a white space character that is part of the image data // followed by a whitespace character that precedes the EI operator. In this case, we need // to flush the first whitespace, then treat the current whitespace as the first potential // character for the end of stream check. Note that we don't increment 'found' here. baos.Write(ff = accumulated.ToArray(), 0, ff.Length); accumulated.SetLength(0); accumulated.WriteByte((byte)ch); } else if (found == 2 && ch == 'I') { found++; accumulated.WriteByte((byte)ch); } else if (found == 3 && PRTokeniser.IsWhitespace(ch)) { byte[] tmp = baos.ToArray(); if (InlineImageStreamBytesAreComplete(tmp, imageDictionary)) { return(tmp); } byte[] accumulatedArr = accumulated.ToArray(); baos.Write(accumulatedArr, 0, accumulatedArr.Length); accumulated.SetLength(0); baos.WriteByte((byte)ch); found = 0; } else { baos.Write(ff = accumulated.ToArray(), 0, ff.Length); accumulated.SetLength(0); baos.WriteByte((byte)ch); found = 0; } } throw new InlineImageParseException("Could not find image data or EI"); }
protected override void CheckAnnotation(PdfWriter writer, int key, Object obj1) { if (obj1 is PdfFormField) { PdfFormField field = (PdfFormField)obj1; if (!field.Contains(PdfName.SUBTYPE)) { return; } if (field.Contains(PdfName.AA) || field.Contains(PdfName.A)) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("widget.annotation.dictionary.or.field.dictionary.shall.not.include.a.or.aa.entry")); } } if (obj1 is PdfAnnotation) { PdfAnnotation annot = (PdfAnnotation)obj1; PdfName subtype = annot.Get(PdfName.SUBTYPE) as PdfName; if (subtype != null && !allowedAnnotTypes.Contains(subtype)) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("annotation.type.1.not.allowed", subtype.ToString())); } PdfNumber ca = annot.GetAsNumber(PdfName.CA); if (ca != null && ca.FloatValue != 1.0) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("an.annotation.dictionary.shall.not.contain.the.ca.key.with.a.value.other.than.1")); } PdfNumber f = annot.GetAsNumber(PdfName.F); if (f == null) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("an.annotation.dictionary.shall.contain.the.f.key")); } int flags = f.IntValue; if (CheckFlag(flags, PdfAnnotation.FLAGS_PRINT) == false || CheckFlag(flags, PdfAnnotation.FLAGS_HIDDEN) || CheckFlag(flags, PdfAnnotation.FLAGS_INVISIBLE) || CheckFlag(flags, PdfAnnotation.FLAGS_NOVIEW)) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("the.f.keys.print.flag.bit.shall.be.set.to.1.and.its.hidden.invisible.and.noview.flag.bits.shall.be.set.to.0")); } if (PdfName.TEXT.Equals(annot.GetAsName(PdfName.SUBTYPE))) { if (CheckFlag(flags, PdfAnnotation.FLAGS_NOZOOM) == false || CheckFlag(flags, PdfAnnotation.FLAGS_NOROTATE) == false) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("text.annotations.should.set.the.nozoom.and.norotate.flag.bits.of.the.f.key.to.1")); } } if (annot.Contains(PdfName.C) || annot.Contains(PdfName.IC)) { ICC_Profile colorProfile = ((PdfAWriter)writer).ColorProfile; String cs = ""; cs = System.Text.Encoding.ASCII.GetString(colorProfile.Data, 16, 4); if (!"RGB".Equals(cs.ToUpper())) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("destoutputprofile.in.the.pdfa1.outputintent.dictionary.shall.be.rgb")); } } PdfDictionary ap = GetDirectDictionary(annot.Get(PdfName.AP)); if (ap != null) { if (ap.Contains(PdfName.R) || ap.Contains(PdfName.D)) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("appearance.dictionary.shall.contain.only.the.n.key.with.stream.value")); } PdfObject n = ap.Get(PdfName.N); if (!(n is PdfIndirectReference)) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("appearance.dictionary.shall.contain.only.the.n.key.with.stream.value")); } } if (PdfName.WIDGET.Equals(annot.GetAsName(PdfName.SUBTYPE)) && (annot.Contains(PdfName.AA) || annot.Contains(PdfName.A))) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("widget.annotation.dictionary.or.field.dictionary.shall.not.include.a.or.aa.entry")); } if (CheckStructure(conformanceLevel)) { if (contentAnnotations.Contains(subtype) && !annot.Contains(PdfName.CONTENTS)) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("annotation.of.type.1.should.have.contents.key", subtype.ToString())); } } } }
protected override void CheckPdfObject(PdfWriter writer, int key, Object obj1) { if (obj1 is PdfNumber) { PdfNumber number = (PdfNumber)obj1; if (Math.Abs(number.DoubleValue) > maxRealValue && number.ToString().Contains(".")) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("real.number.is.out.of.range")); } } else if (obj1 is PdfString) { PdfString str = (PdfString)obj1; if (str.GetBytes().Length > maxStringLength) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("pdf.string.is.too.long")); } } else if (obj1 is PdfArray) { PdfArray array = (PdfArray)obj1; if (array.Size > maxArrayLength) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("pdf.array.is.out.of.bounds")); } } else if (obj1 is PdfDictionary) { PdfDictionary dictionary = (PdfDictionary)obj1; if (dictionary.Size > maxDictionaryLength) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("pdf.dictionary.is.out.of.bounds")); } PdfName type = dictionary.GetAsName(PdfName.TYPE); if (PdfName.CATALOG.Equals(type)) { if (!dictionary.Contains(PdfName.METADATA)) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("the.document.catalog.dictionary.shall.contain.metadata")); } if (dictionary.Contains(PdfName.AA)) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("the.document.catalog.dictionary.shall.not.include.an.aa.entry")); } if (dictionary.Contains(PdfName.NAMES)) { PdfDictionary names = GetDirectDictionary(dictionary.Get(PdfName.NAMES)); if (names != null && names.Contains(PdfName.EMBEDDEDFILES)) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("the.document.catalog.dictionary.shall.not.include.embeddedfiles.names.entry")); } } if (CheckStructure(conformanceLevel)) { PdfDictionary markInfo = GetDirectDictionary(dictionary.Get(PdfName.MARKINFO)); if (markInfo == null || markInfo.GetAsBoolean(PdfName.MARKED) == null || markInfo.GetAsBoolean(PdfName.MARKED).BooleanValue == false) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("document.catalog.dictionary.shall.include.a.markinfo.dictionary.whose.entry.marked.shall.have.a.value.of.true")); } if (!dictionary.Contains(PdfName.LANG)) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("document.catalog.dictionary.should.contain.lang.entry")); } } } else if (PdfName.PAGE.Equals(type)) { if (dictionary.Contains(PdfName.AA)) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("page.dictionary.shall.not.include.aa.entry")); } } else if (PdfName.OUTPUTINTENT.Equals(type)) { PdfObject destOutputIntent = dictionary.Get(PdfName.DESTOUTPUTPROFILE); if (destOutputIntent != null && pdfaDestOutputIntent != null) { if (pdfaDestOutputIntent.IndRef != destOutputIntent.IndRef) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage( "if.outputintents.array.more.than.one.entry.the.same.indirect.object")); } } else { pdfaDestOutputIntent = destOutputIntent; } PdfName gts = dictionary.GetAsName(PdfName.S); if (pdfaDestOutputIntent != null) { if (PdfName.GTS_PDFA1.Equals(gts)) { if (pdfaOutputIntentColorSpace != null) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("a.pdfa.file.may.have.only.one.pdfa.outputintent")); } pdfaOutputIntentColorSpace = ""; ICC_Profile icc_profile = writer.ColorProfile; pdfaOutputIntentColorSpace = Encoding.GetEncoding("US-ASCII").GetString(icc_profile.Data, 16, 4); } } else { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("outputintent.shall.have.gtspdfa1.and.destoutputintent")); } } } }
protected override void CheckPdfObject(PdfWriter writer, int key, Object obj1) { if (obj1 is PdfNumber) { PdfNumber number = (PdfNumber)obj1; if (Math.Abs(number.DoubleValue) > maxRealValue && number.ToString().Contains(".")) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("real.number.is.out.of.range")); } } else if (obj1 is PdfString) { PdfString str = (PdfString)obj1; if (str.GetBytes().Length > maxStringLength) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("pdf.string.is.too.long")); } } else if (obj1 is PdfArray) { PdfArray array = (PdfArray)obj1; if (array.Size > maxArrayLength) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("pdf.array.is.out.of.bounds")); } } else if (obj1 is PdfDictionary) { PdfDictionary dictionary = (PdfDictionary)obj1; if (dictionary.Size > maxDictionaryLength) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("pdf.dictionary.is.out.of.bounds")); } PdfName type = dictionary.GetAsName(PdfName.TYPE); if (PdfName.CATALOG.Equals(type)) { if (!dictionary.Contains(PdfName.METADATA)) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("the.document.catalog.dictionary.shall.contain.metadata")); } if (dictionary.Contains(PdfName.AA)) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("the.document.catalog.dictionary.shall.not.include.an.aa.entry")); } if (dictionary.Contains(PdfName.NAMES)) { PdfDictionary names = GetDirectDictionary(dictionary.Get(PdfName.NAMES)); if (names != null && names.Contains(PdfName.EMBEDDEDFILES)) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("the.document.catalog.dictionary.shall.not.include.embeddedfiles.names.entry")); } } if (CheckStructure(conformanceLevel)) { PdfDictionary markInfo = GetDirectDictionary(dictionary.Get(PdfName.MARKINFO)); if (markInfo == null || markInfo.GetAsBoolean(PdfName.MARKED) == null || markInfo.GetAsBoolean(PdfName.MARKED).BooleanValue == false) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("document.catalog.dictionary.shall.include.a.markinfo.dictionary.whose.entry.marked.shall.have.a.value.of.true")); } if (!dictionary.Contains(PdfName.LANG)) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("document.catalog.dictionary.should.contain.lang.entry")); } } PdfArray outputIntents = GetDirectArray(dictionary.Get(PdfName.OUTPUTINTENTS)); bool pdfa1OutputIntentFound = false; if (outputIntents != null && outputIntents.Size > 0) { for (int i = 0; i < outputIntents.Size; i++) { PdfDictionary outputIntentDictionary = GetDirectDictionary(outputIntents[i]); PdfName gts = outputIntentDictionary.GetAsName(PdfName.S); if (PdfName.GTS_PDFA1.Equals(gts)) { if (pdfa1OutputIntentFound) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("a.pdfa.file.may.have.only.one.pdfa.outputintent")); } pdfa1OutputIntentFound = true; } if (outputIntentDictionary != null) { PdfObject destOutputIntent = outputIntentDictionary.Get(PdfName.DESTOUTPUTPROFILE); if (destOutputIntent == null && PdfName.GTS_PDFA1.Equals(gts)) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("outputintent.shall.have.gtspdfa1.and.destoutputintent")); } } } } if ((rgbUsed || cmykUsed || grayUsed) && !pdfa1OutputIntentFound) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("if.device.rgb.cmyk.gray.used.in.file.that.file.shall.contain.pdfa.outputintent")); } } else if (PdfName.PAGE.Equals(type)) { if (dictionary.Contains(PdfName.AA)) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("page.dictionary.shall.not.include.aa.entry")); } } else if (PdfName.OUTPUTINTENT.Equals(type)) { PdfObject iccProfileStream = dictionary.Get(PdfName.DESTOUTPUTPROFILE); String inputColorSpace = ""; if (iccProfileStream != null) { ICC_Profile icc_profile = writer.ColorProfile; inputColorSpace = Encoding.GetEncoding("US-ASCII").GetString(icc_profile.Data, 16, 4); } PdfName gts = dictionary.GetAsName(PdfName.S); if (!PdfName.GTS_PDFA1.Equals(gts)) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("outputintent.shall.have.gtspdfa1.and.destoutputintent")); } if ("RGB ".Equals(inputColorSpace)) { if (cmykUsed) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("devicecmyk.may.be.used.only.if.the.file.has.a.cmyk.pdfa.outputIntent")); } } else if ("CMYK".Equals(inputColorSpace)) { if (rgbUsed) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("devicergb.may.be.used.only.if.the.file.has.a.rgb.pdfa.outputIntent")); } } else { if (cmykUsed) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("devicecmyk.may.be.used.only.if.the.file.has.a.cmyk.pdfa.outputIntent")); } if (rgbUsed) { throw new PdfAConformanceException(obj1, MessageLocalization.GetComposedMessage("devicergb.may.be.used.only.if.the.file.has.a.rgb.pdfa.outputIntent")); } } } } }
/** * Parses the samples of the image from the underlying content parser, ignoring all filters. * The parser must be positioned immediately after the ID operator that ends the inline image's dictionary. * The parser will be left positioned immediately following the EI operator. * This is primarily useful if no filters have been applied. * @param imageDictionary the dictionary of the inline image * @param ps the content parser * @return the samples of the image * @throws IOException if anything bad happens during parsing */ private static byte[] ParseUnfilteredSamples(PdfDictionary imageDictionary, PdfDictionary colorSpaceDic, PdfContentParser ps) { // special case: when no filter is specified, we just read the number of bits // per component, multiplied by the width and height. if (imageDictionary.Contains(PdfName.FILTER)) throw new ArgumentException("Dictionary contains filters"); PdfNumber h = imageDictionary.GetAsNumber(PdfName.HEIGHT); int bytesToRead = ComputeBytesPerRow(imageDictionary, colorSpaceDic) * h.IntValue; byte[] bytes = new byte[bytesToRead]; PRTokeniser tokeniser = ps.GetTokeniser(); int shouldBeWhiteSpace = tokeniser.Read(); // skip next character (which better be a whitespace character - I suppose we could check for this) // from the PDF spec: Unless the image uses ASCIIHexDecode or ASCII85Decode as one of its filters, the ID operator shall be followed by a single white-space character, and the next character shall be interpreted as the first byte of image data. // unfortunately, we've seen some PDFs where there is no space following the ID, so we have to capture this case and handle it int startIndex = 0; if (!PRTokeniser.IsWhitespace(shouldBeWhiteSpace) || shouldBeWhiteSpace == 0){ // tokeniser treats 0 as whitespace, but for our purposes, we shouldn't) bytes[0] = (byte)shouldBeWhiteSpace; startIndex++; } for (int i = startIndex; i < bytesToRead; i++){ int ch = tokeniser.Read(); if (ch == -1) throw new InlineImageParseException("End of content stream reached before end of image data"); bytes[i] = (byte)ch; } PdfObject ei = ps.ReadPRObject(); if (!ei.ToString().Equals("EI")) throw new InlineImageParseException("EI not found after end of image data"); return bytes; }
/** * Parses the samples of the image from the underlying content parser, accounting for filters * The parser must be positioned immediately after the ID operator that ends the inline image's dictionary. * The parser will be left positioned immediately following the EI operator. * <b>Note:</b>This implementation does not actually apply the filters at this time * @param imageDictionary the dictionary of the inline image * @param ps the content parser * @return the samples of the image * @throws IOException if anything bad happens during parsing */ private static byte[] ParseInlineImageSamples(PdfDictionary imageDictionary, PdfDictionary colorSpaceDic, PdfContentParser ps) { // by the time we get to here, we have already parsed the ID operator if (!imageDictionary.Contains(PdfName.FILTER)){ return ParseUnfilteredSamples(imageDictionary, colorSpaceDic, ps); } // read all content until we reach an EI operator surrounded by whitespace. // The following algorithm has two potential issues: what if the image stream // contains <ws>EI<ws> ? // Plus, there are some streams that don't have the <ws> before the EI operator // it sounds like we would have to actually decode the content stream, which // I'd rather avoid right now. MemoryStream baos = new MemoryStream(); MemoryStream accumulated = new MemoryStream(); int ch; int found = 0; PRTokeniser tokeniser = ps.GetTokeniser(); byte[] ff = null; while ((ch = tokeniser.Read()) != -1){ if (found == 0 && PRTokeniser.IsWhitespace(ch)){ found++; accumulated.WriteByte((byte)ch); } else if (found == 1 && ch == 'E'){ found++; accumulated.WriteByte((byte)ch); } else if (found == 1 && PRTokeniser.IsWhitespace(ch)){ // this clause is needed if we have a white space character that is part of the image data // followed by a whitespace character that precedes the EI operator. In this case, we need // to flush the first whitespace, then treat the current whitespace as the first potential // character for the end of stream check. Note that we don't increment 'found' here. baos.Write(ff = accumulated.ToArray(), 0, ff.Length); accumulated.SetLength(0); accumulated.WriteByte((byte)ch); } else if (found == 2 && ch == 'I'){ found++; accumulated.WriteByte((byte)ch); } else if (found == 3 && PRTokeniser.IsWhitespace(ch)){ try { byte[] tmp = baos.ToArray(); new PdfImageObject(imageDictionary, tmp, colorSpaceDic); return tmp; } catch (Exception) { byte[] tmp = accumulated.ToArray(); baos.Write(tmp, 0, tmp.Length); accumulated.SetLength(0); baos.WriteByte((byte)ch); found = 0; } } else { baos.Write(ff = accumulated.ToArray(), 0, ff.Length); accumulated.SetLength(0); baos.WriteByte((byte)ch); found = 0; } } throw new InlineImageParseException("Could not find image data or EI"); }
/** * Determine if an MCID is available * @return true if the MCID is available, false otherwise */ virtual public bool HasMcid() { return(dictionary.Contains(PdfName.MCID)); }
public static void Test_ExtractImages(string file, string imageDirectory) { // from http://stackoverflow.com/questions/802269/extract-images-using-itextsharp/804392#804392 Trace.WriteLine($"extract images from pdf \"{file}\" to \"{imageDirectory}\""); if (!zPath.IsPathRooted(imageDirectory)) { imageDirectory = zPath.Combine(zPath.GetDirectoryName(file), imageDirectory); } using (PdfReader pdfReader = new PdfReader(file)) { int index = 1; int objectCount = pdfReader.XrefSize; for (int i = 0; i < objectCount; i++) { PdfObject obj = pdfReader.GetPdfObject(i); if (obj is PdfDictionary) { PdfDictionary objDic = (PdfDictionary)obj; if (objDic.Contains(PdfName.TYPE) && objDic.Get(PdfName.TYPE).ToString() == "/XObject" && objDic.Contains(PdfName.SUBTYPE) && objDic.Get(PdfName.SUBTYPE).ToString() == "/Image") { string filter = objDic.Get(PdfName.FILTER).ToString(); int width = int.Parse(objDic.Get(PdfName.WIDTH).ToString()); int height = int.Parse(objDic.Get(PdfName.HEIGHT).ToString()); string bpp = objDic.Get(PdfName.BITSPERCOMPONENT).ToString(); Trace.WriteLine($"object {i + 1} image width {width} height {height} filter {filter} bits per component {bpp}"); if (filter == "/FlateDecode") { byte[] arr = PdfReader.FlateDecode(PdfReader.GetStreamBytesRaw((PRStream)obj), true); Trace.WriteLine($" bytes count {arr.Length}"); // PixelFormat.Format24bppRgb // System.Drawing.Imaging.PixelFormat 8 bits Bitmap bmp = new Bitmap(width, height, PixelFormat.Format8bppIndexed); // PixelFormat.Format24bppRgb BitmapData bmpData = bmp.LockBits(new Rectangle(0, 0, width, height), ImageLockMode.WriteOnly, PixelFormat.Format8bppIndexed); Marshal.Copy(arr, 0, bmpData.Scan0, arr.Length); bmp.UnlockBits(bmpData); //bmp.Save(zPath.Combine(imageDirectory, $"image-{index++:000}.jpeg"), ImageFormat.Jpeg); bmp.Save(zPath.Combine(imageDirectory, $"image-{index++:000}.png"), ImageFormat.Png); } } } //if (obj != null && obj.IsStream()) //{ // PdfDictionary objDic = (PdfDictionary)obj; // if (objDic.Contains(PdfName.SUBTYPE) && objDic.Get(PdfName.SUBTYPE).ToString() == "/Image") // { // string filter = objDic.Get(PdfName.FILTER).ToString(); // int width = int.Parse(objDic.Get(PdfName.WIDTH).ToString()); // int height = int.Parse(objDic.Get(PdfName.HEIGHT).ToString()); // string bpp = objDic.Get(PdfName.BITSPERCOMPONENT).ToString(); // Trace.WriteLine($"object {i + 1} image width {width} height {height} filter {filter} bits per component {bpp}"); // //string extent = "."; // //byte[] img = null; // switch (filter) // { // case "/FlateDecode": // byte[] arr = PdfReader.FlateDecode(PdfReader.GetStreamBytesRaw((PRStream)obj), true); // Bitmap bmp = new Bitmap(width, height, PixelFormat.Format24bppRgb); // BitmapData bmpData = bmp.LockBits(new Rectangle(0, 0, width, height), ImageLockMode.WriteOnly, PixelFormat.Format24bppRgb); // Marshal.Copy(arr, 0, bmpData.Scan0, arr.Length); // bmp.UnlockBits(bmpData); // //bmp.Save("c:\\temp\\bmp1.png", ImageFormat.Png); // bmp.Save(zPath.Combine(imageDirectory, $"image-{index++:000}.jpeg"), ImageFormat.Jpeg); // break; // default: // break; // } // } //} } } }