public static byte[] GetPictureFromPdf(Stream stream) { byte[] imageData = new byte[0]; string path = Path.Combine(Directory.GetParent(Directory.GetCurrentDirectory()).FullName, "wwwroot", "CV Parser", "Pictures"); using (PdfReader pdfReader = new PdfReader(stream)) { for (int i = 0; i < pdfReader.XrefSize; i++) { PdfObject po = pdfReader.GetPdfObject(i); if (po == null || !po.IsStream()) //object not found so continue { continue; } PRStream pst = (PRStream)po; PdfObject type = pst.Get(PdfName.SUBTYPE); //get the object type //check if the object is the image type object if (type != null && type.ToString().Equals(PdfName.IMAGE.ToString())) { PdfImageObject pio = new PdfImageObject(pst); int imageLength = pio.GetImageAsBytes().Length; if (imageLength != WatermarkSize && imageLength > imageData.Length) { imageData = pio.GetImageAsBytes(); } } } } return(imageData); }
public void RenderImage(ImageRenderInfo renderInfo) { PdfImageObject image = renderInfo.GetImage(); try { image = renderInfo.GetImage(); if (image == null) { return; } ImageNames.Add(string.Format( "Image{0}.{1}", renderInfo.GetRef().Number, image.GetFileType() )); using (MemoryStream ms = new MemoryStream(image.GetImageAsBytes())) { Images.Add(ms.ToArray()); } } catch (IOException ex) { throw ex; } }
public void RenderImage(ImageRenderInfo renderInfo) { PdfImageObject image = null; // renderInfo.GetImage(); try { image = renderInfo.GetImage(); if (image == null) { return; } ImageNames.Add(string.Format( "Image{0}.{1}", renderInfo.GetRef().Number, image.GetFileType() )); using (MemoryStream ms = new MemoryStream(image.GetImageAsBytes())) { Images.Add(ms.ToArray()); } } catch (Exception ie) { /* * pass-through; image type not supported by iText[Sharp]; e.g. jbig2 */ WFLogger.NLogger.ErrorException("ERROR: RenderImage failed!", ie); } }
public void Compress(PdfObject pdfObject, PdfDictionary pdfImageObject, PdfStamper stamper, int pageNum) { var image = new PdfImageObject((PRStream)pdfImageObject); var oldBytes = image.GetImageAsBytes(); var compressedBitmapInfo = CompressImage(oldBytes, CompressionQuality.Low, pageNum); Bitmap bitmap = compressedBitmapInfo.Item1; if (compressedBitmapInfo.Item1 == null) { return; } iTextSharp.text.Image compressedImage; using (var shrinkedBitmap = ShrinkImage(bitmap, _recommendedCompression)) { if (shrinkedBitmap == null) { return; } Image palettedImage = shrinkedBitmap; if (compressedBitmapInfo.Item2 != PixelFormat.Undefined) { var bitsPerPixel = GetIntPalette(compressedBitmapInfo.Item2); if (bitsPerPixel > 0) { palettedImage = GdiPaletteConverter.ConvertBitmapTo1Or8Bpp(shrinkedBitmap, bitsPerPixel); } } using (var msInternal = new MemoryStream()) { //Checked for T_ file, resolution: Png format took less space up to 10% rather JPEG //var newBytes = ConvertImageToBytes(palettedImage, 90); palettedImage.Save(msInternal, _imageOutputFormat); var newBytes = msInternal.ToArray(); compressedImage = iTextSharp.text.Image.GetInstance(newBytes); } shrinkedBitmap.Dispose(); if (compressedBitmapInfo.Item1 != null) { compressedBitmapInfo.Item1.Dispose(); } palettedImage.Dispose(); } PdfReader.KillIndirect(pdfObject); var mask = compressedImage.ImageMask; if (mask != null) { stamper.Writer.AddDirectImageSimple(mask); } stamper.Writer.AddDirectImageSimple(compressedImage, (PRIndirectReference)pdfObject); }
private void TestFile(String filename, int page, String objectid) { PdfReader pdfReader = TestResourceUtils.GetResourceAsPdfReader(TEST_RESOURCES_PATH, filename); try { PdfDictionary resources = pdfReader.GetPageResources(page); PdfDictionary xobjects = resources.GetAsDict(PdfName.XOBJECT); PdfIndirectReference objRef = xobjects.GetAsIndirectObject(new PdfName(objectid)); if (objRef == null) { throw new NullReferenceException("Reference " + objectid + " not found - Available keys are " + xobjects.Keys); } PRStream stream = (PRStream)PdfReader.GetPdfObject(objRef); PdfDictionary colorSpaceDic = resources != null?resources.GetAsDict(PdfName.COLORSPACE) : null; PdfImageObject img = new PdfImageObject(stream, colorSpaceDic); byte[] result = img.GetImageAsBytes(); Assert.NotNull(result); int zeroCount = 0; foreach (byte b in result) { if (b == 0) { zeroCount++; } } Assert.IsTrue(zeroCount > 0); } finally { pdfReader.Close(); } }
// --------------------------------------------------------------------------- /** * @see com.itextpdf.text.pdf.parser.RenderListener#renderImage( * com.itextpdf.text.pdf.parser.ImageRenderInfo) */ public void RenderImage(ImageRenderInfo renderInfo) { PdfImageObject image = renderInfo.GetImage(); //PdfName filter = (PdfName)image.Get(PdfName.FILTER); string imageName = string.Format("{0:0000}_{1:0000}.{2}", PageIndex, ImagesList.Count, image.GetImageBytesType().FileExtension); var pageImageIndex = new PageImageIndex { ImageName = imageName, ImageIndex = ImagesList.Count, PageIndex = PageIndex }; var imageType = image.GetImageBytesType(); //if (imageType != PdfImageObject.ImageBytesType.JBIG2) //{ //var bmp = image.GetDrawingImage(); // Write image to file string pathToSave = string.Format(@"{0}\{1}", OutputPath, imageName); //bmp.Save(string.Format(pathToSave)); // bmp.Dispose(); // Sometime gdi+ error happen. We must write byte directly to disk if (!Directory.Exists(OutputPath)) { Directory.CreateDirectory(OutputPath); } var bytes = image.GetImageAsBytes(); File.WriteAllBytes(pathToSave, bytes); ImagesList.Add(pageImageIndex, null); //} }
public void RenderImage(ImageRenderInfo renderInfo) { PdfImageObject image = null; try { image = renderInfo.GetImage(); if (image == null) { return; } ImageNames.Add(string.Format( "qrcode.{0}", image.GetFileType() )); using (MemoryStream ms = new MemoryStream(image.GetImageAsBytes())) { Images.Add(ms.ToArray()); } } catch (IOException ie) { throw ie; } }
public void RenderImage(ImageRenderInfo renderInfo) { PdfImageObject image = null; Image drawingImage = null; try { image = renderInfo.GetImage(); var imgBytesLen = image.GetImageAsBytes().Length; // Smallest image we can OCR is 40 x 40 if (imgBytesLen > 1600) { drawingImage = image.GetDrawingImage(); } } catch (Exception ex) { _log.Error("Exception in GetImage or GetDrawingImage: {0}", ex); } if (drawingImage != null) { this.Images.Add(drawingImage); } }
// --------------------------------------------------------------------------- /** * @see com.itextpdf.text.pdf.parser.RenderListener#renderImage( * com.itextpdf.text.pdf.parser.ImageRenderInfo) */ public void RenderImage(ImageRenderInfo renderInfo) { try { PdfImageObject image = renderInfo.GetImage(); if (image == null /* * do not attempt to parse => jbig2 decoder not fully implemented. * THE JAVA EXAMPLE INCORRECTLY CREATES A CORRUPT JBIG2 IMAGE * BECAUSE THERE IS NO EXPLICIT CHECK. I POSTED TWICE TO THE MAILING * LIST, SINCE VERSION 5.1.3 BUT THE ERROR HAS NOT BEEN CORRECTED. */ || image.GetImageBytesType() == PdfImageObject.ImageBytesType.JBIG2 ) { return; } _imageNames.Add(string.Format( "Image{0}.{1}", renderInfo.GetRef().Number, image.GetFileType() )); _myImages.Add(image.GetImageAsBytes()); } catch { // pass through any other unsupported image types } }
// --------------------------------------------------------------------------- /** * @see com.itextpdf.text.pdf.parser.RenderListener#renderImage( * com.itextpdf.text.pdf.parser.ImageRenderInfo) */ public void RenderImage(ImageRenderInfo renderInfo) { PdfImageObject image = renderInfo.GetImage(); //PdfName filter = (PdfName)image.Get(PdfName.FILTER); _imageNames.Add(string.Format("{0:0000}_{1:0000}.{2}", PageIndex, _imageNames.Count, image.GetImageBytesType().FileExtension)); _ImageBytes.Add(image.GetImageAsBytes()); }
public void RenderImageOrg(ImageRenderInfo renderInfo) { PdfImageObject pdfimage = renderInfo.GetImage(); string imgtp = pdfimage.GetFileType(); String imageFileName = String.Format("{0}_{1:000}_{2}.{3}", _imgname, _currentPage, _imgNo, imgtp); imageFileName = _outputFolder + "\\" + imageFileName; var imageRawBytes = pdfimage.GetImageAsBytes(); File.WriteAllBytes(imageFileName, imageRawBytes); _imgNo++; }
private ImageFormat _imageOutputFormat = ImageFormat.Png; //for T_.., Sthlm.. files public void Compress(PdfObject pdfObject, PdfDictionary pdfImageObject, PdfStamper stamper, int pageNum) { var stream = (PRStream)pdfImageObject; var image = new PdfImageObject(stream); var imageBytes = image.GetImageAsBytes(); using (var ms = new MemoryStream(imageBytes)) { var fileType = FreeImage.GetFileTypeFromStream(ms); var sourceBitmap = FreeImage.LoadFromStream(ms, FREE_IMAGE_LOAD_FLAGS.PNG_IGNOREGAMMA, ref fileType); var dotnetBitmap = FreeImage.GetBitmap(sourceBitmap); iTextSharp.text.Image compressedImage; using (var shrinkedBitmap = ShrinkImage(dotnetBitmap, _recommendedCompression)) { if (shrinkedBitmap == null) { return; } System.Drawing.Image newImage = shrinkedBitmap; if (DefineBitPerPixel(sourceBitmap) <= 8) { newImage = GdiPaletteConverter.ConvertBitmapTo1Or8Bpp(shrinkedBitmap, 1); } using (var msInternal = new MemoryStream()) { newImage.Save(msInternal, _imageOutputFormat); var newBytes = msInternal.ToArray(); compressedImage = iTextSharp.text.Image.GetInstance(newBytes); } shrinkedBitmap.Dispose(); newImage.Dispose(); } dotnetBitmap.Dispose(); FreeImage.Unload(sourceBitmap); sourceBitmap.SetNull(); PdfReader.KillIndirect(pdfObject); stamper.Writer.CompressionLevel = PdfStream.BEST_COMPRESSION; var mask = compressedImage.ImageMask; if (mask != null) { stamper.Writer.AddDirectImageSimple(mask); } stamper.Writer.AddDirectImageSimple(compressedImage, (PRIndirectReference)pdfObject); } }
public virtual void RenderImage(ImageRenderInfo renderInfo) { IList <Rectangle> areasToBeCleaned = GetImageAreasToBeCleaned(renderInfo); if (areasToBeCleaned == null) { chunks.Add(new PdfCleanUpContentChunk.Image(false, null)); } else { PdfImageObject pdfImage = renderInfo.GetImage(); byte[] imageBytes = ProcessImage(pdfImage.GetImageAsBytes(), areasToBeCleaned); if (renderInfo.GetRef() == null && pdfImage != null) // true => inline image { PdfDictionary dict = pdfImage.GetDictionary(); PdfObject imageMask = dict.Get(PdfName.IMAGEMASK); Image image = Image.GetInstance(imageBytes); if (imageMask == null) { imageMask = dict.Get(PdfName.IM); } if (imageMask != null && imageMask.Equals(PdfBoolean.PDFTRUE)) { image.MakeMask(); } PdfContentByte canvas = Context.Canvas; canvas.AddImage(image, 1, 0, 0, 1, 0, 0, true); } else if (pdfImage != null && imageBytes != pdfImage.GetImageAsBytes()) { chunks.Add(new PdfCleanUpContentChunk.Image(true, imageBytes)); } } }
public void RenderImage(ImageRenderInfo renderInfo) { try { PdfImageObject image = renderInfo.GetImage(); if (image == null || image.GetImageBytesType() == PdfImageObject.ImageBytesType.JBIG2) { return; } _imageNames.Add(string.Format("Image{0}.{1}", renderInfo.GetRef().Number, image.GetFileType())); _myImages.Add(image.GetImageAsBytes()); } catch { } }
public static List <Stream> ExtractImagesFromPDF(byte[] sourcePdf, TraceWriter log) { List <Stream> imgList = new List <Stream>(); PdfReader reader = new PdfReader(sourcePdf); PRStream prStream; PdfImageObject pdfImgObject; PdfObject pdfObject; int n = reader.XrefSize; try { for (int i = 0; i < n; i++) { pdfObject = reader.GetPdfObject(i); if (pdfObject == null || !pdfObject.IsStream()) { continue; } prStream = (PRStream)pdfObject; PdfObject type = prStream.Get(PdfName.SUBTYPE); if (type != null && type.ToString().Equals(PdfName.IMAGE.ToString())) { pdfImgObject = new PdfImageObject(prStream); var image = pdfImgObject.GetDrawingImage(); // only add images larger than 50x50 for OCR processing if (image.Height >= 50 && image.Width >= 50) { byte[] imgdata = pdfImgObject.GetImageAsBytes(); MemoryStream memStream = new MemoryStream(imgdata); imgList.Add(memStream); } } } } catch (Exception e) { log.Error(e.Message); } return(imgList); }
public void loadSignInfo(string pdf_filename) { PdfReader reader = new PdfReader(pdf_filename); AcroFields fields = reader.AcroFields; int sigIndex = 1; SignatureImageExtractor extractor = new SignatureImageExtractor(reader); foreach (string sigFieldName in fields.GetSignatureNames()) { PdfImageObject image = extractor.extractImage(sigFieldName); MemoryStream ms = new MemoryStream(image.GetImageAsBytes()); SigObj sig = new SigObj(); ReadEncodedBitmapResult result = sig.ReadEncodedBitmap(ms.ToArray()); if (result == ReadEncodedBitmapResult.ReadEncodedBitmapOK) { //MessageBox.Show(sig.Who + " " + sig.Why + " " + sig.When); treeView1.BeginUpdate(); treeView1.Nodes.Add("Signature " + sigIndex); treeView1.Nodes[sigIndex - 1].Nodes.Add("Name: " + sig.Who); treeView1.Nodes[sigIndex - 1].Nodes.Add("Reason: " + sig.Why); treeView1.Nodes[sigIndex - 1].Nodes.Add("Timestamp: " + sig.When); treeView1.Nodes[sigIndex - 1].Nodes.Add("Digitizer: " + sig.get_AdditionalData(CaptData.CaptDigitizer)); treeView1.Nodes[sigIndex - 1].Nodes.Add("Digitizer Driver: " + sig.get_AdditionalData(CaptData.CaptDigitizerDriver)); treeView1.Nodes[sigIndex - 1].Nodes.Add("Machine OS: " + sig.get_AdditionalData(CaptData.CaptMachineOS)); treeView1.Nodes[sigIndex - 1].Nodes.Add("Network Card: " + sig.get_AdditionalData(CaptData.CaptNetworkCard)); treeView1.Nodes[sigIndex - 1].Nodes.Add("Signature Covers whole document: " + fields.SignatureCoversWholeDocument(sigFieldName).ToString()); treeView1.Nodes[sigIndex - 1].Nodes.Add("Document Revision: " + fields.GetRevision(sigFieldName).ToString() + " of " + fields.TotalRevisions.ToString()); PdfPKCS7 pkcs7 = fields.VerifySignature(sigFieldName); treeView1.Nodes[sigIndex - 1].Nodes.Add("Integrity Check OK? " + pkcs7.Verify().ToString()); treeView1.EndUpdate(); sigIndex = sigIndex + 1; } ms.Close(); } treeView1.ExpandAll(); }
public override void RenderImage(ImageRenderInfo renderInfo) { PdfImageObject image = renderInfo.GetImage(); if (image == null) { return; } int number = counter++; String filename = name + "-" + number + "." + image.GetFileType(); File.WriteAllBytes(outputPath + filename, image.GetImageAsBytes()); LineSegment segment = UNIT_LINE.TransformBy(renderInfo.GetImageCTM()); TextChunk location = new TextChunk("[" + filename + "]", segment.GetStartPoint(), segment.GetEndPoint(), 0f); List <TextChunk> locationalResult = (List <TextChunk>)field.GetValue(this); locationalResult.Add(location); }
public void RenderImage(ImageRenderInfo info) { PdfImageObject image = info.GetImage(); var fileType = image.GetFileType(); var imgBytes = image.GetImageAsBytes(); var imgDict = image.GetDictionary(); var imgInfo = "Unknown"; var filter = image.Get(PdfName.FILTER); if (filter != null) { imgInfo = filter.ToString().Replace(',', ' '); } var ctm = info.GetImageCTM(); var ctmWidth = ctm[Matrix.I11]; var ctmHeight = ctm[Matrix.I22]; int imgWidth = -1; int imgHeight = -1; int imgResolution = -1; PixelFormat imgFormat = PixelFormat.Undefined; if (imgInfo != "/JBIG2Decode" && imgInfo != "/JPXDecode") { var img = image.GetDrawingImage(); imgWidth = img.Width; imgHeight = img.Height; imgFormat = img.PixelFormat; imgResolution = Convert.ToInt32(img.VerticalResolution); img.Dispose(); } Images.Add(new PDFImageInfo() { ImageBytes = imgBytes.Length, ImageFormat = imgFormat.ToString(), ImageHeight = imgHeight, ImageWidth = imgWidth, ImageResolution = imgResolution, ImageInfo = imgInfo, ImageType = fileType }); }
public void RenderImage(ImageRenderInfo renderInfo) { try { PdfImageObject image = renderInfo.GetImage(); if (image == null) { return; } using (MemoryStream ms = new MemoryStream(image.GetImageAsBytes())) { Bitmap i = (System.Drawing.Bitmap)Bitmap.FromStream(ms); Image = (System.Drawing.Bitmap)i.Clone(); i.Dispose(); // int dpi = i.Height / 11; int yDPI = Image.Height / 11; int xDPI = (Image.Width * 2) / 17; xDPI = Math.Abs(xDPI - 300) < 10 ? 300 : xDPI; yDPI = Math.Abs(yDPI - 300) < 10 ? 300 : yDPI; xDPI = Math.Abs(xDPI - 600) < 10 ? 600 : xDPI; yDPI = Math.Abs(yDPI - 600) < 10 ? 600 : yDPI; if (xDPI == yDPI) { Image.SetResolution(xDPI, yDPI); } else { } } } catch (IOException) { /* * pass-through; image type not supported by iText[Sharp]; e.g. jbig2 */ } }
public void RenderImage(ImageRenderInfo renderInfo) { PdfImageObject image = renderInfo.GetImage(); try { image = renderInfo.GetImage(); if (image == null) { return; } using (MemoryStream ms = new MemoryStream(image.GetImageAsBytes())) { Images.Add(ms.ToArray()); ImageExt.Add(image.GetFileType()); } } catch (IOException ie) { /* * pass-through; image type not supported by iText[Sharp]; e.g. jbig2 */ } }
public void RenderImage(ImageRenderInfo renderInfo) { PdfImageObject image = renderInfo.GetImage(); PdfName filter = (PdfName)image.Get(PdfName.FILTER); if (filter != null) { var drawingImage = image.GetImageAsBytes(); string extension = "."; if (filter == PdfName.DCTDECODE) { extension += PdfImageObject.ImageBytesType.JPG.FileExtension; } else if (filter == PdfName.JPXDECODE) { extension += PdfImageObject.ImageBytesType.JP2.FileExtension; } else if (filter == PdfName.FLATEDECODE) { extension += PdfImageObject.ImageBytesType.PNG.FileExtension; } else if (filter == PdfName.LZWDECODE) { extension += PdfImageObject.ImageBytesType.CCITT.FileExtension; } /* Rather than struggle with the image stream and try to figure out how to handle * BitMapData scan lines in various formats (like virtually every sample I’ve found * online), use the PdfImageObject.GetDrawingImage() method, which does the work for us. */ var skbit = SkiaSharp.SKBitmap.Decode(drawingImage); var skimg = SKImage.FromBitmap(skbit); this.Images.Add(skimg, extension); } }
private byte[] _GetBytesFromObject(PdfDictionary obj) { byte[] result = null; PRStream stream = IsMask ? _maskStream : _stream; switch (ImageFilter) { case ImageFilter.FlateDecode: try { var image = _renderInfo.GetImage(); result = image.GetImageAsBytes(); } catch (UnsupportedPdfException) { var bytes = PdfReader.GetStreamBytesRaw(stream); result = PdfReader.FlateDecode(bytes, true); _PostProcessBytes(ref result); } break; case ImageFilter.None: case ImageFilter.DCTDecode: case ImageFilter.JPXDECODE: case ImageFilter.JBIG2DECODE: case ImageFilter.ASCII85DECODE: if (BitsPerPixel == 1 || BitsPerPixel == 4) { result = PdfReader.GetStreamBytesRaw(stream); } else { PdfImageObject image = new PdfImageObject(stream); result = image.GetImageAsBytes(); //System.IO.File.WriteAllBytes(@"c:\1.jpg", result); } break; case ImageFilter.CCITTFaxDecode: { try { var image = _renderInfo.GetImage(); result = image.GetImageAsBytes(); } catch (UnsupportedPdfException) { result = PdfReader.GetStreamBytesRaw(stream); result = _GetTiff(result); } } break; } if (result == null) { throw new Exception("Cant Get Image Bytes"); } return(result); }
public static string[] GetContentFromPdf(Resume resume, Stream stream) { StringBuilder stringBuilder = new StringBuilder(); using (PdfReader pdfReader = new PdfReader(stream)) { for (int i = 1; i <= pdfReader.NumberOfPages; i++) { string thePage = PdfTextExtractor.GetTextFromPage(pdfReader, i, new SimpleTextExtractionStrategy()); string[] theLines = thePage.Split("\n"); foreach (string theLine in theLines) { if (!string.IsNullOrEmpty(theLine) && !string.IsNullOrWhiteSpace(theLine)) { stringBuilder.AppendLine(theLine.Trim()); } } } int n = pdfReader.XrefSize; //number of objects in pdf document FileStream fs = null; PRStream pst; PdfImageObject pio; PdfObject po; String path = @"C:\Users\rka\Desktop\ResumeParserApp\ResumeParserApp\ResumeParserApp"; try { for (int i = 0; i < n; i++) { po = pdfReader.GetPdfObject(i); //get the object at the index i in the objects collection if (po == null || !po.IsStream()) //object not found so continue { continue; } pst = (PRStream)po; //cast object to stream PdfObject type = pst.Get(PdfName.SUBTYPE); //get the object type //check if the object is the image type object if (type != null && type.ToString().Equals(PdfName.IMAGE.ToString())) { pio = new PdfImageObject(pst); //get the image int imageLength = pio.GetImageAsBytes().Length; if (imageLength != 14593 && imageLength > resume?.PictureData?.Length) // When convert Doc to Pdf watermark size { fs = new FileStream(path + "image" + i + ".jpg", FileMode.Create); //read bytes of image in to an array resume.PictureData = pio.GetImageAsBytes(); //write the bytes array to file fs.Write(resume.PictureData, 0, resume.PictureData.Length); fs.Flush(); fs.Close(); } } } } catch (Exception e) { Console.WriteLine(e.Message); } } return(stringBuilder.ToString().Split("\r\n")); }
public void RenderImage(ImageRenderInfo renderInfo) { imageNumber++; expectingTitle = true; PdfImageObject imageObject = renderInfo.GetImage(); if (imageObject == null) { Console.WriteLine("Image {0} could not be read.", imageNumber); } else { File.WriteAllBytes(string.Format(format, imageNumber, imageObject.GetFileType()), imageObject.GetImageAsBytes()); } }