/// <summary> /// Parses a stream object and removes OCGs. </summary> /// <param name="stream"> a stream object </param> /// <param name="resources"> the resources dictionary of that object (containing info about the OCGs) </param> public virtual void Parse(PRStream stream, PdfDictionary resources) { baos = new MemoryStream(); properties = resources.GetAsDict(PdfName.PROPERTIES); xobj = new HashSet2 <PdfName>(); PdfDictionary xobjects = resources.GetAsDict(PdfName.XOBJECT); if (xobjects != null) { // remove XObject (form or image) that belong to an OCG that needs to be removed foreach (PdfName name in xobjects.Keys) { PRStream xobject = (PRStream)xobjects.GetAsStream(name); PdfDictionary oc = xobject.GetAsDict(PdfName.OC); if (oc != null) { PdfString ocname = oc.GetAsString(PdfName.NAME); if (ocname != null && ocgs.Contains(ocname.ToString())) { xobj.Add(name); } } } foreach (PdfName name in xobj) { xobjects.Remove(name); } } // parse the content stream byte[] contentBytes = PdfReader.GetStreamBytes(stream); PRTokeniser tokeniser = new PRTokeniser(new RandomAccessFileOrArray(contentBytes)); PdfContentParser ps = new PdfContentParser(tokeniser); List <PdfObject> operands = new List <PdfObject>(); while (ps.Parse(operands).Count > 0) { PdfLiteral @operator = (PdfLiteral)operands[operands.Count - 1]; ProcessOperator(this, @operator, operands); } baos.Flush(); baos.Close(); stream.SetData(baos.GetBuffer()); }
/// <summary> /// Parses a stream object and removes OCGs. </summary> /// <param name="stream"> a stream object </param> /// <param name="resources"> the resources dictionary of that object (containing info about the OCGs) </param> public virtual void Parse(PRStream stream, PdfDictionary resources) { baos = new MemoryStream(); properties = resources.GetAsDict(PdfName.PROPERTIES); xobj = new HashSet2 <PdfName>(); PdfDictionary xobjects = resources.GetAsDict(PdfName.XOBJECT); if (xobjects != null) { // remove XObject (form or image) that belong to an OCG that needs to be removed foreach (PdfName name in xobjects.Keys) { PRStream xobject = (PRStream)xobjects.GetAsStream(name); PdfDictionary oc = xobject.GetAsDict(PdfName.OC); if (oc != null) { PdfString ocname = oc.GetAsString(PdfName.NAME); if (ocname != null && ocgs.Contains(ocname.ToString())) { xobj.Add(name); } } } foreach (PdfName name in xobj) { xobjects.Remove(name); } } // parse the content stream byte[] contentBytes = PdfReader.GetStreamBytes(stream); PRTokeniser tokeniser = new PRTokeniser(new RandomAccessFileOrArray(contentBytes)); PdfContentParser ps = new PdfContentParser(tokeniser); List <PdfObject> operands = new List <PdfObject>(); while (ps.Parse(operands).Count > 0) { PdfLiteral @operator = (PdfLiteral)operands[operands.Count - 1]; ProcessOperator(this, @operator, operands); if ("BI".Equals(@operator.ToString())) { int found = 0; int ch; bool immediateAfterBI = true; while ((ch = tokeniser.Read()) != -1) { if (!immediateAfterBI || !PRTokeniser.IsWhitespace(ch)) { baos.WriteByte((byte)ch); } immediateAfterBI = false; if (found == 0 && PRTokeniser.IsWhitespace(ch)) { found++; } else if (found == 1 && ch == 'E') { found++; } else if (found == 1 && PRTokeniser.IsWhitespace(ch)) { // this clause is needed if we have a white space character that is part of the image data // followed by a whitespace character that precedes the EI operator. In this case, we need // to flush the first whitespace, then treat the current whitespace as the first potential // character for the end of stream check. Note that we don't increment 'found' here. } else if (found == 2 && ch == 'I') { found++; } else if (found == 3 && PRTokeniser.IsWhitespace(ch)) { break; } else { found = 0; } } } } baos.Flush(); baos.Close(); stream.SetData(baos.GetBuffer()); }
private Bitmap GetImagesFromPdfDict(PdfDictionary dict) { PdfDictionary res = (PdfDictionary)(PdfReader.GetPdfObject(dict.Get(PdfName.RESOURCES))); PdfDictionary xobj = (PdfDictionary)(PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT))); Bitmap bm = null; if (xobj != null) { foreach (PdfName name in xobj.Keys) { PdfObject obj = xobj.Get(name); if (obj.IsIndirect()) { PdfDictionary tg = (PdfDictionary)(PdfReader.GetPdfObject(obj)); PdfName subtype = (PdfName)(PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE))); if (PdfName.IMAGE.Equals(subtype)) { int xrefIdx = ((PRIndirectReference)obj).Number; PdfObject pdfObj = this.reader.GetPdfObject(xrefIdx); PRStream str = (PRStream)(pdfObj); PdfArray decode = tg.GetAsArray(PdfName.DECODE); int width = tg.GetAsNumber(PdfName.WIDTH).IntValue; int height = tg.GetAsNumber(PdfName.HEIGHT).IntValue; int bpc = tg.GetAsNumber(PdfName.BITSPERCOMPONENT).IntValue; var filter = tg.Get(PdfName.FILTER); if (filter.Equals(PdfName.FLATEDECODE)) { var imageBytes = PdfReader.GetStreamBytesRaw(str); var decodedBytes = PdfReader.FlateDecode(imageBytes); //decode the raw image var streamBytes = PdfReader.DecodePredictor(decodedBytes, str.GetAsDict(PdfName.DECODEPARMS)); //decode predict to filter the bytes var pixelFormat = PixelFormat.Format1bppIndexed; switch (bpc) //determine the BPC { case 1: pixelFormat = PixelFormat.Format1bppIndexed; break; case 8: pixelFormat = PixelFormat.Format8bppIndexed; break; case 24: pixelFormat = PixelFormat.Format24bppRgb; break; } bm = new Bitmap(width, height, pixelFormat); { var bmpData = bm.LockBits(new System.Drawing.Rectangle(0, 0, width, height), ImageLockMode.WriteOnly, pixelFormat); var length = (int)Math.Ceiling(width * bpc / 8.0); for (int i = 0; i < height; i++) { int offset = i * length; int scanOffset = i * bmpData.Stride; Marshal.Copy(streamBytes, offset, new IntPtr(bmpData.Scan0.ToInt32() + scanOffset), length); } bm.UnlockBits(bmpData); } } else { iTextSharp.text.pdf.parser.PdfImageObject pdfImage = new iTextSharp.text.pdf.parser.PdfImageObject(str); bm = (System.Drawing.Bitmap)pdfImage.GetDrawingImage(); } int yDPI = bm.Height / 11; int xDPI = (bm.Width * 2) / 17; xDPI = Math.Abs(xDPI - 300) < 10 ? 300 : xDPI; yDPI = Math.Abs(yDPI - 300) < 10 ? 300 : yDPI; xDPI = Math.Abs(xDPI - 600) < 10 ? 600 : xDPI; yDPI = Math.Abs(yDPI - 600) < 10 ? 600 : yDPI; if (xDPI == yDPI) { bm.SetResolution(xDPI, yDPI); } else { } break; } else if (PdfName.FORM.Equals(subtype) || PdfName.GROUP.Equals(subtype)) { GetImagesFromPdfDict(tg); } } } } return(bm); }