virtual protected PdfObject GetDirectObject(PdfObject obj) { if (obj == null) return null; //use counter to prevent indirect reference cycling int count = 0; // resolve references while (obj is PdfIndirectReference) { PdfObject curr; if (obj.IsIndirect()) curr = PdfReader.GetPdfObject(obj); else cachedObjects.TryGetValue(new RefKey((PdfIndirectReference) obj), out curr); if (curr == null) break; obj = curr; //10 - is max allowed reference chain if (count++ > 10) break; } return obj; }
public static string GetObjectInfo(PdfObject pdfObject) { StringBuilder sb = new StringBuilder(); if (pdfObject != null) { if (pdfObject.IsBoolean()) { sb.Append(", bool"); } if (pdfObject.IsNumber()) { sb.Append(", number"); } if (pdfObject.IsString()) { sb.Append(", string"); } if (pdfObject.IsLiteral()) { sb.Append(", literal"); } if (pdfObject.IsArray()) { sb.Append(", array"); } if (pdfObject.IsDictionary()) { sb.Append(", dictionary"); } if (pdfObject.IsName()) { sb.Append(", name"); } if (pdfObject.IsStream()) { sb.Append(", stream"); } if (pdfObject.IsIndirect()) { sb.Append(", indirect"); } if (pdfObject.IsIndirectReference()) { sb.Append(", indirect reference"); } if (pdfObject.IsModified()) { sb.Append(", modified"); } if (pdfObject.IsNull()) { sb.Append(", null"); } if (sb.Length > 0) { sb.Remove(0, 2); } } else { sb.Append("null"); } return(sb.ToString()); }
private int RemoveRefFromArray(PdfArray array, PdfObject refo) { if (refo == null || !refo.IsIndirect()) return array.Size; PdfIndirectReference refi = (PdfIndirectReference)refo; for (int j = 0; j < array.Size; ++j) { PdfObject obj = array[j]; if (!obj.IsIndirect()) continue; if (((PdfIndirectReference)obj).Number == refi.Number) array.Remove(j--); } return array.Size; }
/// <summary> /// Procesa un PDF /// </summary> private static MemoryStream PDFCompress2(Stream fileStream) { BinaryReader br = new BinaryReader(fileStream); byte[] byt = br.ReadBytes((int)fileStream.Length); MemoryStream ms = new MemoryStream(); PdfReader pdf = new PdfReader(byt); if (pdf.IsOpenedWithFullPermissions) { PdfStamper stp = new PdfStamper(pdf, ms); PdfWriter writer = stp.Writer; // int page_count = pdf.NumberOfPages; for (int i = 1; i <= page_count; i++) { PdfDictionary pg = pdf.GetPageN(i); PdfDictionary res = (PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES)); PdfDictionary xobj = (PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT)); if (xobj != null) { foreach (PdfName name in xobj.Keys) { PdfObject obj = xobj.Get(name); if (obj.IsIndirect()) { PdfDictionary tg = (PdfDictionary)PdfReader.GetPdfObject(obj); if (tg != null)//Veo que a veces que si se trata varias veces la misma imagen esto se vuelve null { PdfName type = (PdfName)PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE)); if (PdfName.IMAGE.Equals(type)) { int xrefIdx = ((PRIndirectReference)obj).Number; PdfObject pdfObj = pdf.GetPdfObject(xrefIdx); PdfStream str = (PdfStream)pdfObj; string filter = string.Empty; if (tg.Get(PdfName.FILTER) != null) { filter = tg.Get(PdfName.FILTER).ToString(); } else { } if (filter.Contains("/DCTDecode")) //Unas veces es "[/DCTDecode]" y otras "/DCTDecode" { try { iTextSharp.text.Image img = iTextSharp.text.Image.GetInstance((PRIndirectReference)obj); //byte[] bytes = PdfReader.GetStreamBytesRaw((PRStream)str); //System.Drawing.Image imgOriginal = System.Drawing.Image.FromStream(new MemoryStream(bytes)); PdfImageObject pdfImage = new PdfImageObject((PRStream)str); using (System.Drawing.Image imgOriginal = pdfImage.GetDrawingImage()) using (System.Drawing.Image img2 = Globals.ResizeImage(imgOriginal)) { if (img2.Width != imgOriginal.Width || img2.Height != imgOriginal.Height) { //img2 = Resize(img2, maxImageWidth, maxImageHeight); var stream = new System.IO.MemoryStream(); img2.Save(stream, ImageFormat.Jpeg); stream.Position = 0; PdfReader.KillIndirect(obj); img = iTextSharp.text.Image.GetInstance(stream); writer.AddDirectImageSimple(img, (PRIndirectReference)obj); } } } catch (Exception ex) { throw ex; } break; } } } } } } } stp.Writer.CloseStream = false; stp.FormFlattening = true; stp.Close(); pdf.Close(); //return ms; } else { ms = null; } return(ms); }
private bool ExtractImageOLD(String pdfFile, string imgPath, ProgressBar progressBar1) { bool ret = true; int imgNo = 1; // string pdfFile=@"F:\gheyret\kitablar\Matitey we anarqiz pajiesi.pdf"; // string imgPath=@"F:\gheyret\kitab_suret\Hazirqi Zaman Uyghur Tili Gramatikisi\"; string imgname = System.IO.Path.GetFileNameWithoutExtension(pdfFile); progressBar1.Visible = true; progressBar1.Minimum = 1; try{ PdfDictionary pg, tg; PdfDictionary res; PdfDictionary xobj; PdfImageObject pdfimage; PdfObject lpdfobjW; PdfObject lpdfobjH; ImageRenderInfo imgRI; string width, height; int pageRotation; PdfReader pdfReader = new PdfReader(pdfFile); progressBar1.Maximum = pdfReader.NumberOfPages; for (int pageNumber = 1; pageNumber <= pdfReader.NumberOfPages; pageNumber++) { // if(gStop) break; // pg = pdfReader.GetPageN(pageNumber); pg = pdfReader.GetPageNRelease(pageNumber); res = (PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES)); xobj = (PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT)); imgNo = 1; pageRotation = pdfReader.GetPageRotation(pageNumber); foreach (PdfName name in xobj.Keys) { PdfObject obj = xobj.Get(name); if (obj.IsIndirect()) { tg = (PdfDictionary)PdfReader.GetPdfObject(obj); lpdfobjW = tg.Get(PdfName.WIDTH); lpdfobjH = tg.Get(PdfName.HEIGHT); if (lpdfobjW != null && lpdfobjH != null) { width = lpdfobjW.ToString(); height = lpdfobjH.ToString(); // imgRI = ImageRenderInfo.CreateForXObject(new Matrix(float.Parse(width), float.Parse(height)), (PRIndirectReference)obj, tg); imgRI = ImageRenderInfo.CreateForXObject(new GraphicsState(), (PRIndirectReference)obj, tg); pdfimage = imgRI.GetImage(); ExtractImageOLD(pdfimage, imgPath + "\\" + imgname + pageNumber.ToString("_000") + "_" + imgNo + ".png", pageRotation); imgNo++; } } } progressBar1.Value = pageNumber; Application.DoEvents(); } pdfReader.Close(); } catch (Exception ee) { // MessageBox.Show(ee.Message,"Xataliq koruldi"); System.Diagnostics.Debug.WriteLine(ee.StackTrace); ret = false; } progressBar1.Visible = false; return(ret); }
public void CompressionZipNoResample() { _th.Profile.PdfSettings.CompressColorAndGray.Enabled = true; _th.Profile.PdfSettings.CompressColorAndGray.Compression = CompressionColorAndGray.Zip; _th.Profile.PdfSettings.CompressColorAndGray.JpegCompressionFactor = 25; _th.Profile.PdfSettings.CompressColorAndGray.Resampling = false; _th.Profile.PdfSettings.CompressColorAndGray.Dpi = 24; _th.Profile.PdfSettings.CompressMonochrome.Enabled = true; _th.Profile.PdfSettings.CompressMonochrome.Compression = CompressionMonochrome.Zip; _th.Profile.PdfSettings.CompressMonochrome.Resampling = false; _th.Profile.PdfSettings.CompressMonochrome.Dpi = 24; _th.GenerateGsJob(PSfiles.PDFCreatorTestpage, OutputFormat.Pdf); _th.RunGsJob(); var pdf = new PdfReader(_th.Job.OutputFiles[0]); var pg = pdf.GetPageN(1); var res = (PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES)); var xobj = (PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT)); var testResultsFilter = new string[4]; var testResultsWidth = new string[4]; var testResultsSize = new string[4]; int i = 0; foreach (PdfName name in xobj.Keys) { PdfObject obj = xobj.Get(name); if (obj.IsIndirect()) { var tg = (PdfDictionary)PdfReader.GetPdfObject(obj); if (tg.Get(PdfName.FILTER) != null) { testResultsFilter[i] = tg.Get(PdfName.FILTER).ToString(); } testResultsWidth[i] = tg.Get(PdfName.WIDTH).ToString(); string height = tg.Get(PdfName.HEIGHT).ToString(); var length = CalculateImageByteSize(float.Parse(testResultsWidth[i]), float.Parse(height), obj, tg); testResultsSize[i] = length.ToString(CultureInfo.InvariantCulture); i++; } } Assert.AreEqual("/FlateDecode", testResultsFilter[0]); Assert.AreEqual("/FlateDecode", testResultsFilter[1]); Assert.AreEqual("/FlateDecode", testResultsFilter[2]); Assert.AreEqual("/FlateDecode", testResultsFilter[3]); Assert.AreEqual("200", testResultsWidth[0]); Assert.AreEqual("200", testResultsWidth[1]); Assert.AreEqual("200", testResultsWidth[2]); Assert.AreEqual("475", testResultsWidth[3]); Assert.AreEqual("1995", testResultsSize[0]); Assert.AreEqual("29961", testResultsSize[1]); Assert.AreEqual("89669", testResultsSize[2]); Assert.AreEqual("11481", testResultsSize[3]); }
public void CompressionFactorJpegMaximum_NoResample() { _th.Profile.PdfSettings.CompressColorAndGray.Enabled = true; _th.Profile.PdfSettings.CompressColorAndGray.Compression = CompressionColorAndGray.JpegMaximum; _th.Profile.PdfSettings.CompressColorAndGray.JpegCompressionFactor = 25; _th.Profile.PdfSettings.CompressColorAndGray.Resampling = false; _th.Profile.PdfSettings.CompressColorAndGray.Dpi = 8; _th.Profile.PdfSettings.CompressMonochrome.Enabled = false; _th.Profile.PdfSettings.CompressMonochrome.Compression = CompressionMonochrome.CcittFaxEncoding; _th.Profile.PdfSettings.CompressMonochrome.Resampling = true; _th.Profile.PdfSettings.CompressMonochrome.Dpi = 8; _th.GenerateGsJob(PSfiles.PDFCreatorTestpage, OutputFormat.Pdf); _th.RunGsJob(); var pdf = new PdfReader(_th.Job.OutputFiles[0]); PdfDictionary pg = pdf.GetPageN(1); var res = (PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES)); var xobj = (PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT)); var testResultsFilter = new string[4]; var testResultsWidth = new int[4]; var testResultsSize = new int[4]; int i = 0; foreach (PdfName name in xobj.Keys) { PdfObject obj = xobj.Get(name); if (obj.IsIndirect()) { var tg = (PdfDictionary)PdfReader.GetPdfObject(obj); if (tg.Get(PdfName.FILTER) != null) { testResultsFilter[i] = tg.Get(PdfName.FILTER).ToString(); } testResultsWidth[i] = Convert.ToInt32(tg.Get(PdfName.WIDTH).ToString()); string height = tg.Get(PdfName.HEIGHT).ToString(); testResultsSize[i] = CalculateImageByteSize(testResultsWidth[i], float.Parse(height), obj, tg); i++; } } Assert.AreEqual(null, testResultsFilter[0]); Assert.AreEqual("/DCTDecode", testResultsFilter[1]); Assert.AreEqual("/DCTDecode", testResultsFilter[2]); Assert.AreEqual("/DCTDecode", testResultsFilter[3]); Assert.AreEqual(200, testResultsWidth[0]); Assert.AreEqual(200, testResultsWidth[1]); Assert.AreEqual(200, testResultsWidth[2]); Assert.AreEqual(475, testResultsWidth[3]); Assert.AreEqual(1995, testResultsSize[0]); Assert.AreEqual(4202, testResultsSize[1]); Assert.AreEqual(6179, testResultsSize[2]); Assert.AreEqual(4197, testResultsSize[3]); }
public void CompressionZipFaxResample24Dpi() { _th.Profile.PdfSettings.CompressColorAndGray.Enabled = true; _th.Profile.PdfSettings.CompressColorAndGray.Compression = CompressionColorAndGray.Zip; _th.Profile.PdfSettings.CompressColorAndGray.JpegCompressionFactor = 25; _th.Profile.PdfSettings.CompressColorAndGray.Resampling = true; _th.Profile.PdfSettings.CompressColorAndGray.Dpi = 24; _th.Profile.PdfSettings.CompressMonochrome.Enabled = true; _th.Profile.PdfSettings.CompressMonochrome.Compression = CompressionMonochrome.CcittFaxEncoding; _th.Profile.PdfSettings.CompressMonochrome.Resampling = true; _th.Profile.PdfSettings.CompressMonochrome.Dpi = 24; _th.GenerateGsJob(PSfiles.PDFCreatorTestpage, OutputFormat.Pdf); _th.RunGsJob(); var pdf = new PdfReader(_th.Job.OutputFiles[0]); PdfDictionary pg = pdf.GetPageN(1); var res = (PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES)); var xobj = (PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT)); var testResultsFilter = new string[4]; var testResultsWidth = new int[4]; var testResultsSize = new int[4]; int i = 0; foreach (PdfName name in xobj.Keys) { PdfObject obj = xobj.Get(name); if (obj.IsIndirect()) { var tg = (PdfDictionary)PdfReader.GetPdfObject(obj); if (tg.Get(PdfName.FILTER) != null) { testResultsFilter[i] = tg.Get(PdfName.FILTER).ToString(); } testResultsWidth[i] = Convert.ToInt32(tg.Get(PdfName.WIDTH).ToString()); string height = tg.Get(PdfName.HEIGHT).ToString(); testResultsSize[i] = CalculateImageByteSize(testResultsWidth[i], float.Parse(height), obj, tg); i++; } } Assert.AreEqual("/CCITTFaxDecode", testResultsFilter[0]); Assert.AreEqual("/FlateDecode", testResultsFilter[1]); Assert.AreEqual("/FlateDecode", testResultsFilter[2]); Assert.AreEqual("/FlateDecode", testResultsFilter[3]); Assert.AreEqual(48, testResultsWidth[0]); Assert.AreEqual(48, testResultsWidth[1]); Assert.AreEqual(48, testResultsWidth[2]); Assert.AreEqual(79, testResultsWidth[3]); // do asserts with an allowed delta (difference) of a few bytes Assert.AreEqual(289, testResultsSize[0], 30.0); Assert.AreEqual(1930, testResultsSize[1], 30.0); Assert.AreEqual(5548, testResultsSize[2], 30.0); Assert.AreEqual(634, testResultsSize[3], 30.0); }
//private Bitmap parseDict(PdfDictionary dict) //{ //} private Bitmap GetImagesFromPdfDict(PdfDictionary dict) { PdfDictionary res = (PdfDictionary)(PdfReader.GetPdfObject(dict.Get(PdfName.RESOURCES))); PdfDictionary xobj = (PdfDictionary)(PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT))); Bitmap bm = null; if (xobj != null) { foreach (PdfName name in xobj.Keys) { PdfObject obj = xobj.Get(name); if (obj.IsIndirect()) { PdfDictionary tg = (PdfDictionary)(PdfReader.GetPdfObject(obj)); PdfName subtype = (PdfName)(PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE))); if (PdfName.IMAGE.Equals(subtype)) { int xrefIdx = ((PRIndirectReference)obj).Number; PdfObject pdfObj = reader.GetPdfObject(xrefIdx); PRStream str = (PRStream)(pdfObj); PdfArray decode = tg.GetAsArray(PdfName.DECODE); int width = tg.GetAsNumber(PdfName.WIDTH).IntValue; int height = tg.GetAsNumber(PdfName.HEIGHT).IntValue; int bpc = tg.GetAsNumber(PdfName.BITSPERCOMPONENT).IntValue; PdfObject filter = tg.Get(PdfName.FILTER); if (filter.Equals(PdfName.FLATEDECODE)) { byte[] imageBytes = PdfReader.GetStreamBytesRaw(str); byte[] decodedBytes = PdfReader.FlateDecode(imageBytes); //decode the raw image byte[] streamBytes = PdfReader.DecodePredictor(decodedBytes, str.GetAsDict(PdfName.DECODEPARMS)); //decode predict to filter the bytes PixelFormat pixelFormat = PixelFormat.Format1bppIndexed; switch (bpc) //determine the BPC { case 1: pixelFormat = PixelFormat.Format1bppIndexed; break; case 8: pixelFormat = PixelFormat.Format8bppIndexed; break; case 24: pixelFormat = PixelFormat.Format24bppRgb; break; } bm = new Bitmap(width, height, pixelFormat); { BitmapData bmpData = bm.LockBits(new System.Drawing.Rectangle(0, 0, width, height), ImageLockMode.WriteOnly, pixelFormat); int length = (int)Math.Ceiling(width * bpc / 8.0); for (int i = 0; i < height; i++) { int offset = i * length; int scanOffset = i * bmpData.Stride; Marshal.Copy(streamBytes, offset, new IntPtr(bmpData.Scan0.ToInt32() + scanOffset), length); } bm.UnlockBits(bmpData); } } else { iTextSharp.text.pdf.parser.PdfImageObject pdfImage = new iTextSharp.text.pdf.parser.PdfImageObject(str); bm = (System.Drawing.Bitmap)pdfImage.GetDrawingImage(); } int yDPI = bm.Height / 11; int xDPI = (bm.Width * 2) / 17; xDPI = Math.Abs(xDPI - 300) < 10 ? 300 : xDPI; yDPI = Math.Abs(yDPI - 300) < 10 ? 300 : yDPI; xDPI = Math.Abs(xDPI - 600) < 10 ? 600 : xDPI; yDPI = Math.Abs(yDPI - 600) < 10 ? 600 : yDPI; if (xDPI == yDPI) { bm.SetResolution(xDPI, yDPI); } else { } break; } else if (PdfName.FORM.Equals(subtype) || PdfName.GROUP.Equals(subtype)) { GetImagesFromPdfDict(tg); } } } } return(bm); }
public void SavePart(string srcFile, string dstFile, int startPage, int endPage) { PdfReader pdfReader = null; try { Cursor.Current = Cursors.WaitCursor; pdfReader = new PdfReader(srcFile); pdfReader.RemoveUnusedObjects(); if (pdfReader.NumberOfPages == 0) { throw new Exception("В документе отсутствуют страницы. Операция сохранения невозможна!"); } startPage = (startPage <= 0 || startPage > pdfReader.NumberOfPages) ? 1 : startPage; endPage = (endPage <= 0 || endPage <startPage || endPage> pdfReader.NumberOfPages) ? pdfReader.NumberOfPages : endPage; pdfReader.SelectPages(startPage.ToString() + "-" + endPage.ToString()); using (var file_stream = new FileStream(dstFile, FileMode.Create, FileAccess.Write, FileShare.None)) using (var stamper = new PdfStamper(pdfReader, file_stream)) { stamper.SetFullCompression(); for (int numberPage = startPage; numberPage <= endPage; numberPage++) { if (numberPage <= 0 || numberPage > pdfReader.NumberOfPages) { continue; } PdfDictionary page = pdfReader.GetPageN(numberPage); var resources = (PdfDictionary)PdfReader.GetPdfObject(page.Get(PdfName.RESOURCES)); var xobject = (PdfDictionary)PdfReader.GetPdfObject(resources.Get(PdfName.XOBJECT)); if (xobject != null) { foreach (PdfName pdname in xobject.Keys) { PdfObject obj = xobject.Get(pdname); if (obj.IsIndirect()) { var tg = (PdfDictionary)PdfReader.GetPdfObject(obj); //resolve indirect reference var subType = (PdfName)PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE)); if (PdfName.IMAGE.Equals(subType)) { int xrefIndex = ((PRIndirectReference)obj).Number; PdfObject imgPdfObj = pdfReader.GetPdfObject(xrefIndex); var imgPdfStream = (PdfStream)imgPdfObj; var imgPRStream = (PRStream)imgPdfStream; byte[] bytes = PdfReader.GetStreamBytesRaw(imgPRStream); if (bytes != null && bytes.Length > 0) { try { var pdfImage = new iTextSharp.text.pdf.parser.PdfImageObject(imgPRStream); Image img = pdfImage.GetDrawingImage(); if (img != null) { var filter = (PdfName)pdfImage.Get(PdfName.FILTER); if (filter != null) { continue; } System.Drawing.Imaging.ImageFormat format; byte[] updatedImgBytes = Controls.PdfViewControl.PDFView.ConvertImageToBytes(img, 75, out format); iTextSharp.text.Image compressedImage = iTextSharp.text.Image.GetInstance(updatedImgBytes); if (format == System.Drawing.Imaging.ImageFormat.Png) { compressedImage.Deflated = true; } PdfReader.KillIndirect(obj); stamper.Writer.AddDirectImageSimple(compressedImage, (PRIndirectReference)obj); } } catch (Exception e) { Console.WriteLine(e.ToString()); continue; } } } } } } } stamper.Close(); } } finally { if (pdfReader != null) { pdfReader.Close(); } Cursor.Current = Cursors.Default; } }