/// BT = Beginning of a text object operator /// ET = End of a text object operator /// Td move to the start of next line /// 5 Ts = superscript /// -5 Ts = subscript /// public List <byte[]> ExtractImages(string inFileName, ImageFormat imageFormat, int minimumHeight, int minumumWidth) { List <byte[]> extractedImages = new List <byte[]>(); PdfReader pdfReader = new PdfReader(inFileName); for (int pageNumber = 1; pageNumber <= pdfReader.NumberOfPages; pageNumber++) { PdfDictionary pdfDictionary = pdfReader.GetPageN(pageNumber); PdfDictionary res = (PdfDictionary)PdfReader.GetPdfObject(pdfDictionary.Get(PdfName.RESOURCES)); PdfDictionary xobj = (PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT)); foreach (PdfName name in xobj.Keys) { PdfObject obj = xobj.Get(name); if (obj.IsIndirect()) { PdfDictionary tg = (PdfDictionary)PdfReader.GetPdfObject(obj); string width = tg.Get(PdfName.WIDTH).ToString(); string height = tg.Get(PdfName.HEIGHT).ToString(); ImageRenderInfo imgRI = ImageRenderInfo.CreateForXObject(new Matrix(float.Parse(width), float.Parse(height)), (PRIndirectReference)obj, tg); PdfImageObject pdfImageObject = imgRI.GetImage(); using (Image image = pdfImageObject.GetDrawingImage()) { if (image.Height >= minimumHeight && image.Width >= minumumWidth) { if (pdfImageObject.GetDrawingImage() != null) { using (MemoryStream ms = new MemoryStream()) { image.Save(ms, imageFormat); extractedImages.Add(ms.ToArray()); } } } } } } } pdfReader.Close(); return(extractedImages); }
private string GetStreamType(PdfObject obj) { if (obj == null) { return("unknown"); } if (obj.IsStream()) { var stream = (PRStream)obj; try { var pdfImage = new PdfImageObject(stream); var drawingImage = pdfImage.GetDrawingImage(); if (pdfImage != null) { return($"{drawingImage.Width}x{drawingImage.Height} {pdfImage.GetFileType()} Image"); } } catch (Exception ex) { var sb = new StringBuilder(); foreach (var item in stream.Keys) { var streamKeyValue = stream.Get(item); sb.Append(item + ":" + streamKeyValue + " "); } return(sb.ToString()); } } return(string.Empty); }
/// <summary> /// Gets all the images in the given document /// </summary> /// <param name="file"></param> private List <Image> ExtractImages(string file) { var randomAccess = new RandomAccessFileOrArray(file); var reader = new PdfReader(randomAccess, null); List <Image> imgList = new List <Image>(); for (int i = 0; i <= reader.XrefSize - 1; i++) { var pdfObject = reader.GetPdfObject(i); if ((pdfObject != null) && pdfObject.IsStream()) { var PDFStremObj = (PdfStream)pdfObject; PdfObject subtype = PDFStremObj.Get(PdfName.SUBTYPE); if ((subtype != null) && subtype.ToString() == PdfName.IMAGE.ToString()) { PdfImageObject PdfImageObj = new PdfImageObject((PRStream)PDFStremObj); Image ImgPDF = PdfImageObj.GetDrawingImage(); imgList.Add(ImgPDF); } } } reader.Close(); return(imgList); }
public void RenderImage(ImageRenderInfo renderInfo) { PdfImageObject image = renderInfo.GetImage(); var filter = (PdfName)image.Get(PdfName.FILTER); if (filter != null) { System.Drawing.Image drawingImage = image.GetDrawingImage(); string extension = "."; if (filter == PdfName.DCTDECODE) { extension += PdfImageObject.ImageBytesType.JPG.FileExtension; } else if (filter == PdfName.JPXDECODE) { extension += PdfImageObject.ImageBytesType.JP2.FileExtension; } else if (filter == PdfName.FLATEDECODE) { extension += PdfImageObject.ImageBytesType.PNG.FileExtension; } else if (filter == PdfName.LZWDECODE) { extension += PdfImageObject.ImageBytesType.CCITT.FileExtension; } this.Images.Add(drawingImage, extension); string path = System.IO.Path.GetFullPath(System.AppDomain.CurrentDomain.BaseDirectory); drawingImage.Save(path + "temp" + extension, drawingImage.RawFormat); } }
/* ----------------------------------------------------------------- */ /// /// RenderImage /// /// <summary> /// Occurs when the specified image is rendered. /// </summary> /// /* ----------------------------------------------------------------- */ public void RenderImage(ImageRenderInfo info) { var obj = info.GetImage(); if (!(obj.Get(PdfName.FILTER) is PdfName)) { return; } var raw = obj.GetDrawingImage(); if (raw == null) { return; } var sm = obj.GetDictionary().GetDirectObject(PdfName.SMASK); if (sm == null) { _inner.Add(raw); return; } var tmp = new PdfImageObject(sm as PRStream); var mask = tmp.GetDrawingImage(); var dest = Restore(raw as Bitmap, mask as Bitmap); _inner.Add(dest ?? raw); }
public void RenderImage(ImageRenderInfo renderInfo) { PdfImageObject image = renderInfo.GetImage(); PdfName filter = (PdfName)image.Get(PdfName.FILTER); if (filter != null) { System.Drawing.Image drawingImage = image.GetDrawingImage(); string extension = "."; if (filter == PdfName.DCTDECODE) { extension += PdfImageObject.ImageBytesType.JPG.FileExtension; } else if (filter == PdfName.JPXDECODE) { extension += PdfImageObject.ImageBytesType.JP2.FileExtension; } else if (filter == PdfName.FLATEDECODE) { extension += PdfImageObject.ImageBytesType.PNG.FileExtension; } else if (filter == PdfName.LZWDECODE) { extension += PdfImageObject.ImageBytesType.CCITT.FileExtension; } this.Images.Add(drawingImage, extension); } }
public void RenderImage(ImageRenderInfo renderInfo) { PdfImageObject image = null; Image drawingImage = null; try { image = renderInfo.GetImage(); var imgBytesLen = image.GetImageAsBytes().Length; // Smallest image we can OCR is 40 x 40 if (imgBytesLen > 1600) { drawingImage = image.GetDrawingImage(); } } catch (Exception ex) { _log.Error("Exception in GetImage or GetDrawingImage: {0}", ex); } if (drawingImage != null) { this.Images.Add(drawingImage); } }
public void RenderImage(ImageRenderInfo renderInfo) { PdfImageObject image = renderInfo.GetImage(); ImageInfo eii = new ImageInfo(); Matrix m = renderInfo.GetImageCTM(); try { Image drawingImage = image.GetDrawingImage(); double hPoints = m[0]; double vPoints = m[4]; //72 Points = 1 inch so... double widthInches = hPoints / 72; double heightInches = vPoints / 72; double hDPI = drawingImage.Width / widthInches; double vDPI = drawingImage.Height / heightInches; eii.hDPI = Math.Round(hDPI); eii.vDPI = Math.Round(vDPI); eii.width = drawingImage.Width; eii.height = drawingImage.Height; eii.pixelFormat = drawingImage.PixelFormat; } catch (Exception e) { //It was not possible to extract image with image.GetDrawingImage(); //Don't throw exception to continue parsing the document log.Warn(e.Message, e); } ImagesInfo.Add(eii); }
/// <summary> /// Extract Image from PDF file and Store in Image Object /// </summary> /// <param name="pdfPath">Specify PDF Source Path</param> /// <returns>List</returns> public static List <Image> ExtractImages(string pdfPath) { var images = new List <Image>(); var rafObj = new RandomAccessFileOrArray(pdfPath); var pdfReader = new PdfReader(rafObj, null); for (int i = 0; i < pdfReader.XrefSize; i++) { var pdfObject = pdfReader.GetPdfObject(i); if ((pdfObject != null) && pdfObject.IsStream()) { var pdfStream = (PdfStream)pdfObject; var subtype = pdfStream.Get(PdfName.SUBTYPE); if ((subtype != null) && subtype.ToString() == PdfName.IMAGE.ToString()) { var pdfImageObj = new PdfImageObject((PRStream)pdfStream); var image = pdfImageObj.GetDrawingImage(); images.Add(image); } } } pdfReader.Close(); return(images); }
public void RenderImage(ImageRenderInfo renderInfo) { try { PdfImageObject image = renderInfo.GetImage(); if (image != null)//Added By DS To Handle Exception { //PdfName filter = (PdfName)image.Get(PdfName.FILTER);// Commented By DS To Handle Exception PdfName filter = null; //int width = Convert.ToInt32(image.Get(PdfName.WIDTH).ToString()); //int bitsPerComponent = Convert.ToInt32(image.Get(PdfName.BITSPERCOMPONENT).ToString()); //string subtype = image.Get(PdfName.SUBTYPE).ToString(); //int height = Convert.ToInt32(image.Get(PdfName.HEIGHT).ToString()); //int length = Convert.ToInt32(image.Get(PdfName.LENGTH).ToString()); //string colorSpace = image.Get(PdfName.COLORSPACE).ToString(); /* It appears to be safe to assume that when filter == null, PdfImageObject * does not know how to decode the image to a System.Drawing.Image. * * Uncomment the code above to verify, but when I've seen this happen, * width, height and bits per component all equal zero as well. */ //if (filter != null)// Commented By DS To Handle Exception //{ System.Drawing.Image drawingImage = image.GetDrawingImage(); string extension = "."; if (filter == PdfName.DCTDECODE) { extension += PdfImageObject.ImageBytesType.JPG.FileExtension; } else if (filter == PdfName.JPXDECODE) { extension += PdfImageObject.ImageBytesType.JP2.FileExtension; } else if (filter == PdfName.FLATEDECODE) { extension += PdfImageObject.ImageBytesType.PNG.FileExtension; } else if (filter == PdfName.LZWDECODE) { extension += PdfImageObject.ImageBytesType.CCITT.FileExtension; } /* Rather than struggle with the image stream and try to figure out how to handle * BitMapData scan lines in various formats (like virtually every sample I've found * online), use the PdfImageObject.GetDrawingImage() method, which does the work for us. */ this.Images.Add(drawingImage, extension); //}// Commented By DS To Handle Exception } } catch (Exception ex) { throw ex; } }
public void RenderImage(ImageRenderInfo renderInfo) { PdfImageObject imageObject = renderInfo.GetImage(); if (imageObject == null) { Console.WriteLine("Image {0} could not be read.", renderInfo.GetRef().Number); } else { Images.Add(imageObject.GetDrawingImage()); } }
private bool TryToReadImage(PRStream stream) { try { var pdfImage = new PdfImageObject(stream); picImage.Image = null; picImage.Image = pdfImage.GetDrawingImage(); tsMessage.Text = "Image Size = " + stream.Length; return(true); } catch (Exception e) { return(false); } }
//////////////////////////////////////////////////////////////////////////////////////////////////// public static List <System.Drawing.Image> ExtractImages(string PDFSourcePath) { List <System.Drawing.Image> imgList = new List <System.Drawing.Image>(); RandomAccessFileOrArray RAFObj = null; PdfReader PDFReaderObj = null; PdfObject PDFObj = null; PdfStream PDFStremObj = null; try { RAFObj = new RandomAccessFileOrArray(PDFSourcePath); PDFReaderObj = new PdfReader(RAFObj, null); for (int i = 0; i <= PDFReaderObj.XrefSize - 1; i++) { PDFObj = PDFReaderObj.GetPdfObject(i); if ((PDFObj != null) && PDFObj.IsStream()) { PDFStremObj = (PdfStream)PDFObj; iTextSharp.text.pdf.PdfObject subtype = PDFStremObj.Get(PdfName.SUBTYPE); if ((subtype != null) && subtype.ToString() == PdfName.IMAGE.ToString()) { try { PdfImageObject PdfImageObj = new PdfImageObject((PRStream)PDFStremObj); System.Drawing.Image ImgPDF = PdfImageObj.GetDrawingImage(); imgList.Add(ImgPDF); } catch (Exception) { } } } } PDFReaderObj.Close(); } catch (Exception ex) { throw new Exception(ex.Message); } return(imgList); }
public void RenderImage(ImageRenderInfo renderInfo) { PdfImageObject pdfimage = renderInfo.GetImage(); string pp = pdfimage.GetImageBytesType().FileExtension; string imgtp = pdfimage.GetFileType(); if ("jpg".Equals(imgtp, StringComparison.OrdinalIgnoreCase) || "png".Equals(imgtp, StringComparison.OrdinalIgnoreCase) || "gif".Equals(imgtp, StringComparison.OrdinalIgnoreCase)) { } else { imgtp = pp; imgtp = "png"; } String imageFileName = String.Format("{0}_{1:000}_{2}.{3}", _imgname, _currentPage, _imgNo, imgtp); // imageFileName= _outputFolder+"\\"+_imgname+_currentPage.ToString("_000")+"_"+_imgNo+".png"; imageFileName = _outputFolder + "\\" + imageFileName; try{ using (Image dotnetImg = pdfimage.GetDrawingImage()) { if (dotnetImg != null) { if (_pageRotation == 270) { dotnetImg.RotateFlip(RotateFlipType.Rotate270FlipNone); } else if (_pageRotation == 90) { dotnetImg.RotateFlip(RotateFlipType.Rotate90FlipNone); } else if (_pageRotation == 180) { dotnetImg.RotateFlip(RotateFlipType.Rotate180FlipNone); } dotnetImg.Save(imageFileName); } } }catch (Exception ee) { System.Diagnostics.Debug.WriteLine(ee.StackTrace); } _imgNo++; }
private static Tuple <string, System.Drawing.Image> GetImage(PdfName filter, PdfImageObject pdfImageObject) { Tuple <string, System.Drawing.Image> image = null; System.Drawing.Image drawingImage = pdfImageObject.GetDrawingImage(); string extension = "."; if (Equals(filter, PdfName.DCTDECODE)) { Trace.TraceInformation("JPG image detected"); extension += PdfImageObject.ImageBytesType.JPG.FileExtension; } else if (Equals(filter, PdfName.JBIG2DECODE)) { Trace.TraceInformation("JBIG2 extension detected"); extension += PdfImageObject.ImageBytesType.JBIG2.FileExtension; } else if (Equals(filter, PdfName.JPXDECODE)) { Trace.TraceInformation("JP2 extension detected"); extension += PdfImageObject.ImageBytesType.JP2.FileExtension; } else if (Equals(filter, PdfName.FLATEDECODE)) { Trace.TraceInformation("PNG image detected"); extension += PdfImageObject.ImageBytesType.PNG.FileExtension; } else if (Equals(filter, PdfName.LZWDECODE)) { Trace.TraceInformation("LZWDECODE extension detected"); extension += PdfImageObject.ImageBytesType.CCITT.FileExtension; } else if (Equals(filter, PdfName.CCITTFAXDECODE)) { Trace.TraceInformation("CCITTFAXDECODE extension detected"); extension += PdfImageObject.ImageBytesType.CCITT.FileExtension; } else { Debug.WriteLine("Unknown type: " + filter); Trace.TraceInformation("Unknown type: " + filter); } return(new Tuple <string, System.Drawing.Image>(extension, drawingImage)); }
public static List <Stream> ExtractImagesFromPDF(byte[] sourcePdf, TraceWriter log) { List <Stream> imgList = new List <Stream>(); PdfReader reader = new PdfReader(sourcePdf); PRStream prStream; PdfImageObject pdfImgObject; PdfObject pdfObject; int n = reader.XrefSize; try { for (int i = 0; i < n; i++) { pdfObject = reader.GetPdfObject(i); if (pdfObject == null || !pdfObject.IsStream()) { continue; } prStream = (PRStream)pdfObject; PdfObject type = prStream.Get(PdfName.SUBTYPE); if (type != null && type.ToString().Equals(PdfName.IMAGE.ToString())) { pdfImgObject = new PdfImageObject(prStream); var image = pdfImgObject.GetDrawingImage(); // only add images larger than 50x50 for OCR processing if (image.Height >= 50 && image.Width >= 50) { byte[] imgdata = pdfImgObject.GetImageAsBytes(); MemoryStream memStream = new MemoryStream(imgdata); imgList.Add(memStream); } } } } catch (Exception e) { log.Error(e.Message); } return(imgList); }
private void RenderImage(ImageRenderInfo renderInfo) { PdfImageObject image = renderInfo.GetImage(); using (Dotnet dotnetImg = image.GetDrawingImage()) { if (dotnetImg != null) { using (MemoryStream ms = new MemoryStream()) { dotnetImg.Save(ms, ImageFormat.Tiff); Bitmap d = new Bitmap(dotnetImg); d.Save(imgPath); } } } }
public void RenderImage(ImageRenderInfo renderInfo) { PdfImageObject image = renderInfo.GetImage(); PdfName filter = (PdfName)image.Get(PdfName.FILTER); if (filter != null) { try { Image drawingImage = image.GetDrawingImage(); _images.Add(drawingImage); } catch (Exception) { // _log.Error(e.Message); } } }
public void RenderImage(ImageRenderInfo info) { PdfImageObject image = info.GetImage(); var fileType = image.GetFileType(); var imgBytes = image.GetImageAsBytes(); var imgDict = image.GetDictionary(); var imgInfo = "Unknown"; var filter = image.Get(PdfName.FILTER); if (filter != null) { imgInfo = filter.ToString().Replace(',', ' '); } var ctm = info.GetImageCTM(); var ctmWidth = ctm[Matrix.I11]; var ctmHeight = ctm[Matrix.I22]; int imgWidth = -1; int imgHeight = -1; int imgResolution = -1; PixelFormat imgFormat = PixelFormat.Undefined; if (imgInfo != "/JBIG2Decode" && imgInfo != "/JPXDecode") { var img = image.GetDrawingImage(); imgWidth = img.Width; imgHeight = img.Height; imgFormat = img.PixelFormat; imgResolution = Convert.ToInt32(img.VerticalResolution); img.Dispose(); } Images.Add(new PDFImageInfo() { ImageBytes = imgBytes.Length, ImageFormat = imgFormat.ToString(), ImageHeight = imgHeight, ImageWidth = imgWidth, ImageResolution = imgResolution, ImageInfo = imgInfo, ImageType = fileType }); }
private void ExtractImageOLD(PdfImageObject pdfimage, string path, int pageRotation) { using (Image dotnetImg = pdfimage.GetDrawingImage()) { if (dotnetImg != null) { if (pageRotation == 270) { dotnetImg.RotateFlip(RotateFlipType.Rotate270FlipNone); } else if (pageRotation == 90) { dotnetImg.RotateFlip(RotateFlipType.Rotate90FlipNone); } else if (pageRotation == 180) { dotnetImg.RotateFlip(RotateFlipType.Rotate180FlipNone); } dotnetImg.Save(path); } } }
private void btnRemoveObject_Click(object sender, EventArgs e) { foreach (var item in lstSelectedImages.SelectedItems) { if (item is KeyValuePair <string, object> ) { var thisItem = (KeyValuePair <string, object>)item; var obj = thisItem.Value as PdfObject; if (obj != null && obj.IsStream()) { var stream = (PRStream)obj; byte[] b; try { b = PdfReader.GetStreamBytes(stream); } catch (Exception ex1) { b = PdfReader.GetStreamBytesRaw(stream); } var bytes = b; try { var pdfImage = new PdfImageObject(stream); picImage.Image = pdfImage.GetDrawingImage(); picImage.Image.Save(Path.GetDirectoryName(fileName) + "\\output\\" + DateTime.Now.Ticks.ToString() + "." + pdfImage.GetFileType()); PdfImage image = new PdfImage(MakeBlankImg(), "", null); ReplaceStream(stream, image); } catch (Exception ex) { tsMessage.Text = ex.Message; } } } SaveReaderToOutput(); } }
/// <summary> /// https://stackoverflow.com/questions/802269/extract-images-using-itextsharp /// </summary> internal static IList <Image> GetImagesFromPdfDict(PdfDictionary dict, PdfReader doc) { var images = new List <Image>(); var res = (PdfDictionary)(PdfReader.GetPdfObject(dict.Get(PdfName.RESOURCES))); var xobj = (PdfDictionary)(PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT))); if (xobj == null) { return(images); } foreach (var name in xobj.Keys) { var obj = xobj.Get(name); if (!obj.IsIndirect()) { continue; } var tg = (PdfDictionary)(PdfReader.GetPdfObject(obj)); var subtype = (PdfName)(PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE))); if (PdfName.IMAGE.Equals(subtype)) { var xrefIdx = ((PRIndirectReference)obj).Number; var pdfObj = doc.GetPdfObject(xrefIdx); var str = (PdfStream)(pdfObj); var pdfImage = new PdfImageObject((PRStream)str); var img = pdfImage.GetDrawingImage(); images.Add(img); } else if (PdfName.FORM.Equals(subtype) || PdfName.GROUP.Equals(subtype)) { images.AddRange(GetImagesFromPdfDict(tg, doc)); } } return(images); }
private static void FindImages(string source, string target) { using (var pdf = new PdfReader(source)) { for (int pageNumber = 1, imageNumber = 1; pageNumber <= pdf.NumberOfPages; pageNumber++, imageNumber = 1) { FindPageImages(pdf.GetPageN(pageNumber), obj => { if (obj == null) { return; } var pdfObj = pdf.GetPdfObject(((PRIndirectReference)obj).Number); if (pdfObj == null || !pdfObj.IsStream()) { return; } var stream = (PdfStream)pdfObj; var subtype = stream.Get(PdfName.SUBTYPE); if (subtype == null || !subtype.Equals(PdfName.IMAGE)) { return; } var imageObj = new PdfImageObject((PRStream)stream); using (var image = imageObj.GetDrawingImage()) { image.Save(Path.Combine(target, $"Image {pageNumber} - {imageNumber++}.{imageObj.GetFileType()}")); } }); } } }
public void RenderImage(ImageRenderInfo renderInfo) { PdfImageObject image = renderInfo.GetImage(); //int width = Convert.ToInt32(image.Get(PdfName.WIDTH).ToString()); //int bitsPerComponent = Convert.ToInt32(image.Get(PdfName.BITSPERCOMPONENT).ToString()); //string subtype = image.Get(PdfName.SUBTYPE).ToString(); //int height = Convert.ToInt32(image.Get(PdfName.HEIGHT).ToString()); //int length = Convert.ToInt32(image.Get(PdfName.LENGTH).ToString()); //string colorSpace = image.Get(PdfName.COLORSPACE).ToString(); /* It appears to be safe to assume that when filter == null, PdfImageObject * does not know how to decode the image to a System.Drawing.Image. * * Uncomment the code above to verify, but when I've seen this happen, * width, height and bits per component all equal zero as well. */ System.Drawing.Image drawingImage = image.GetDrawingImage(); /* Rather than struggle with the image stream and try to figure out how to handle * BitMapData scan lines in various formats (like virtually every sample I've found * online), use the PdfImageObject.GetDrawingImage() method, which does the work for us. */ this.Images.Add(drawingImage, PdfImageObject.ImageBytesType.PNG.FileExtension); }
/// <summary> /// Procesa un PDF /// </summary> private static MemoryStream PDFCompress2(Stream fileStream) { BinaryReader br = new BinaryReader(fileStream); byte[] byt = br.ReadBytes((int)fileStream.Length); MemoryStream ms = new MemoryStream(); PdfReader pdf = new PdfReader(byt); if (pdf.IsOpenedWithFullPermissions) { PdfStamper stp = new PdfStamper(pdf, ms); PdfWriter writer = stp.Writer; // int page_count = pdf.NumberOfPages; for (int i = 1; i <= page_count; i++) { PdfDictionary pg = pdf.GetPageN(i); PdfDictionary res = (PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES)); PdfDictionary xobj = (PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT)); if (xobj != null) { foreach (PdfName name in xobj.Keys) { PdfObject obj = xobj.Get(name); if (obj.IsIndirect()) { PdfDictionary tg = (PdfDictionary)PdfReader.GetPdfObject(obj); if (tg != null)//Veo que a veces que si se trata varias veces la misma imagen esto se vuelve null { PdfName type = (PdfName)PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE)); if (PdfName.IMAGE.Equals(type)) { int xrefIdx = ((PRIndirectReference)obj).Number; PdfObject pdfObj = pdf.GetPdfObject(xrefIdx); PdfStream str = (PdfStream)pdfObj; string filter = string.Empty; if (tg.Get(PdfName.FILTER) != null) { filter = tg.Get(PdfName.FILTER).ToString(); } else { } if (filter.Contains("/DCTDecode")) //Unas veces es "[/DCTDecode]" y otras "/DCTDecode" { try { iTextSharp.text.Image img = iTextSharp.text.Image.GetInstance((PRIndirectReference)obj); //byte[] bytes = PdfReader.GetStreamBytesRaw((PRStream)str); //System.Drawing.Image imgOriginal = System.Drawing.Image.FromStream(new MemoryStream(bytes)); PdfImageObject pdfImage = new PdfImageObject((PRStream)str); using (System.Drawing.Image imgOriginal = pdfImage.GetDrawingImage()) using (System.Drawing.Image img2 = Globals.ResizeImage(imgOriginal)) { if (img2.Width != imgOriginal.Width || img2.Height != imgOriginal.Height) { //img2 = Resize(img2, maxImageWidth, maxImageHeight); var stream = new System.IO.MemoryStream(); img2.Save(stream, ImageFormat.Jpeg); stream.Position = 0; PdfReader.KillIndirect(obj); img = iTextSharp.text.Image.GetInstance(stream); writer.AddDirectImageSimple(img, (PRIndirectReference)obj); } } } catch (Exception ex) { throw ex; } break; } } } } } } } stp.Writer.CloseStream = false; stp.FormFlattening = true; stp.Close(); pdf.Close(); //return ms; } else { ms = null; } return(ms); }
// --------------------------------------------------------------------------- /** * Manipulates a PDF file src with the byte array as result */ public byte[] ManipulatePdf(byte[] pdf) { PdfName key = new PdfName("ITXT_SpecialId"); PdfName value = new PdfName("123456789"); // Read the file PdfReader reader = new PdfReader(pdf); int n = reader.XrefSize; PdfObject pdfObject; PRStream prStream; // Look for image and manipulate image prStream for (int i = 0; i < n; i++) { pdfObject = reader.GetPdfObject(i); if (pdfObject == null || !pdfObject.IsStream()) { continue; } prStream = (PRStream)pdfObject; byte[] imageBytes; if (value.Equals(prStream.Get(key))) { PdfImageObject image = new PdfImageObject(prStream); using (System.Drawing.Image original = image.GetDrawingImage()) { if (original == null) { continue; } int width = (int)(original.Width * FACTOR); int height = (int)(original.Height * FACTOR); using (System.Drawing.Image thumb = new Bitmap(width, height)) { using (Graphics graphic = Graphics.FromImage(thumb)) { graphic.DrawImage(original, 0, 0, width, height); using (MemoryStream ms = new MemoryStream()) { thumb.Save(ms, ImageFormat.Jpeg); imageBytes = ms.ToArray(); } } } prStream.Clear(); prStream.SetData(imageBytes, false, PRStream.NO_COMPRESSION); prStream.Put(PdfName.TYPE, PdfName.XOBJECT); prStream.Put(PdfName.SUBTYPE, PdfName.IMAGE); prStream.Put(key, value); prStream.Put(PdfName.FILTER, PdfName.DCTDECODE); prStream.Put(PdfName.WIDTH, new PdfNumber(width)); prStream.Put(PdfName.HEIGHT, new PdfNumber(height)); prStream.Put(PdfName.BITSPERCOMPONENT, new PdfNumber(8)); prStream.Put(PdfName.COLORSPACE, PdfName.DEVICERGB); } } } // Save altered PDF using (MemoryStream ms = new MemoryStream()) { using (PdfStamper stamper = new PdfStamper(reader, ms)) { } return(ms.ToArray()); } }
public void RenderImage(ImageRenderInfo renderInfo) { PdfImageObject image = renderInfo.GetImage(); var v = PdfName.FILTER; //PdfArray array = new PdfArray(); //array.Add(PdfName.FLATEDECODE); //array.Add(PdfName.DCTDECODE); //imgStream.put(PdfName.FILTER, array); //PdfName filter = (PdfName)image.Get(PdfName.FILTER); PdfName filter = (PdfName)image.Get(PdfName.FIRST); int width = Convert.ToInt32(image.Get(PdfName.WIDTH).ToString()); int bitsPerComponent = Convert.ToInt32(image.Get(PdfName.BITSPERCOMPONENT).ToString()); string subtype = image.Get(PdfName.SUBTYPE).ToString(); int height = Convert.ToInt32(image.Get(PdfName.HEIGHT).ToString()); int length = Convert.ToInt32(image.Get(PdfName.LENGTH).ToString()); string colorSpace = image.Get(PdfName.COLORSPACE).ToString(); /* It appears to be safe to assume that when filter == null, PdfImageObject * does not know how to decode the image to a System.Drawing.Image. * * Uncomment the code above to verify, but when I've seen this happen, * width, height and bits per component all equal zero as well. */ //if (filter != null) //{ Image drawingImage = image.GetDrawingImage(); string extension = PdfImageObject.ImageBytesType.JPG.FileExtension; //if (filter == PdfName.DCTDECODE) //{ // extension += PdfImageObject.ImageBytesType.JPG.FileExtension; //} //else if (filter == PdfName.JPXDECODE) //{ // extension += PdfImageObject.ImageBytesType.JP2.FileExtension; //} //else if (filter == PdfName.FLATEDECODE) //{ // extension += PdfImageObject.ImageBytesType.PNG.FileExtension; //} //else if (filter == PdfName.LZWDECODE) //{ // extension += PdfImageObject.ImageBytesType.CCITT.FileExtension; //} /* Rather than struggle with the image stream and try to figure out how to handle * BitMapData scan lines in various formats (like virtually every sample I've found * online), use the PdfImageObject.GetDrawingImage() method, which does the work for us. */ try { var pages = Program.NumberOfPagesPdf(Program.FilePhth); this.Images.Add(drawingImage, extension); string filename = @"C:\Images\" + pages + "\\"; bool exists = System.IO.Directory.Exists(filename); PdfImageExtractor.increment++; if (!exists) { System.IO.Directory.CreateDirectory(filename); } if (PdfImageExtractor.increment <= pages) { string fullName2 = filename + "im" + PdfImageExtractor.increment + ".JPG"; byte[] byteArray = Encoding.UTF8.GetBytes(fullName2); MemoryStream stream = new MemoryStream(); drawingImage.Save(stream, drawingImage.RawFormat); drawingImage.Save(fullName2, ImageFormat.Gif); if (Program.invoice.images1.Any(m => m.ImageName == fullName2)) { } else { //sparar bilden till modellen Program.invoice.images1.Add(new Models.Images() { ImageName = fullName2, ImageData = stream.ToArray() }); } } } catch (IOException e) { Console.WriteLine(e); } }
public void RenderImage(ImageRenderInfo renderInfo) { PdfImageObject image = renderInfo.GetImage(); PdfName filter = (PdfName)image.Get(PdfName.FILTER); //int width = Convert.ToInt32(image.Get(PdfName.WIDTH).ToString()); //int bitsPerComponent = Convert.ToInt32(image.Get(PdfName.BITSPERCOMPONENT).ToString()); //string subtype = image.Get(PdfName.SUBTYPE).ToString(); //int height = Convert.ToInt32(image.Get(PdfName.HEIGHT).ToString()); //int length = Convert.ToInt32(image.Get(PdfName.LENGTH).ToString()); //string colorSpace = image.Get(PdfName.COLORSPACE).ToString(); /* It appears to be safe to assume that when filter == null, PdfImageObject * does not know how to decode the image to a System.Drawing.Image. * * Uncomment the code above to verify, but when I’ve seen this happen, * width, height and bits per component all equal zero as well. */ if (filter != null) { Matrix matrix = renderInfo.GetImageCTM(); System.Drawing.Image drawingImage = image.GetDrawingImage(); string extension = "."; float x = matrix[Matrix.I31]; float y = matrix[Matrix.I32]; float w = matrix[Matrix.I11]; float h = matrix[Matrix.I22]; if (filter == PdfName.DCTDECODE) { extension += PdfImageObject.ImageBytesType.JPG.FileExtension; } else if (filter == PdfName.JPXDECODE) { extension += PdfImageObject.ImageBytesType.JP2.FileExtension; } else if (filter == PdfName.FLATEDECODE) { extension += PdfImageObject.ImageBytesType.PNG.FileExtension; } else if (filter == PdfName.LZWDECODE) { extension += PdfImageObject.ImageBytesType.CCITT.FileExtension; } /* Rather than struggle with the image stream and try to figure out how to handle * BitMapData scan lines in various formats (like virtually every sample I’ve found * online), use the PdfImageObject.GetDrawingImage() method, which does the work for us. */ //this.Images.Add(drawingImage, extension); images.Add(new PdfImage() { X = x, Y = y, Width = w, Height = h, DrawingImage = drawingImage, Extension = extension }); } }
/// <summary> /// Gets image from PDF and compresses it - Found on StackOverflow - asis /// </summary> /// <param name="reader"></param> /// <param name="quality"></param> public static void ReduceResolution(PdfReader reader, long quality) { int n = reader.XrefSize; for (int i = 0; i < n; i++) { PdfObject obj = reader.GetPdfObject(i); if (obj == null || !obj.IsStream()) { continue; } PdfDictionary dict = (PdfDictionary)PdfReader.GetPdfObject(obj); PdfName subType = (PdfName)PdfReader.GetPdfObject( dict.Get(PdfName.SUBTYPE) ); if (!PdfName.IMAGE.Equals(subType)) { continue; } PRStream stream = (PRStream)obj; try { PdfImageObject image = new PdfImageObject(stream); //PdfName filter = (PdfName)image.Get(PdfName.FILTER); //if ( // PdfName.JBIG2DECODE.Equals(filter) // || PdfName.JPXDECODE.Equals(filter) // || PdfName.CCITTFAXDECODE.Equals(filter) // || PdfName.FLATEDECODE.Equals(filter) //) continue; System.Drawing.Image img = image.GetDrawingImage(); if (img == null) { continue; } var ll = image.GetImageBytesType(); int width = img.Width; int height = img.Height; using (System.Drawing.Bitmap dotnetImg = new System.Drawing.Bitmap(img)) { // set codec to jpeg type => jpeg index codec is "1" System.Drawing.Imaging.ImageCodecInfo codec = System.Drawing.Imaging.ImageCodecInfo.GetImageEncoders()[1]; // set parameters for image quality System.Drawing.Imaging.EncoderParameters eParams = new System.Drawing.Imaging.EncoderParameters(1); eParams.Param[0] = new System.Drawing.Imaging.EncoderParameter( System.Drawing.Imaging.Encoder.Quality, quality ); using (MemoryStream msImg = new MemoryStream()) { dotnetImg.Save(msImg, codec, eParams); msImg.Position = 0; stream.SetData(msImg.ToArray()); stream.SetData( msImg.ToArray(), false, PRStream.BEST_COMPRESSION ); stream.Put(PdfName.TYPE, PdfName.XOBJECT); stream.Put(PdfName.SUBTYPE, PdfName.IMAGE); stream.Put(PdfName.FILTER, image.Get(PdfName.FILTER)); stream.Put(PdfName.FILTER, PdfName.DCTDECODE); stream.Put(PdfName.WIDTH, new PdfNumber(width)); stream.Put(PdfName.HEIGHT, new PdfNumber(height)); stream.Put(PdfName.BITSPERCOMPONENT, new PdfNumber(8)); stream.Put(PdfName.COLORSPACE, PdfName.DEVICERGB); } } } catch { // throw; // iText[Sharp] can't handle all image types... } finally { // may or may not help reader.RemoveUnusedObjects(); } } }
public void OCRPdf(bool rotate, string docPath, ref Documents doc) { PdfReader pdfReader = new PdfReader(docPath); int totalPage = pdfReader.NumberOfPages; Console.WriteLine("Pdf file {0} contains {1} pages...", docPath, totalPage); List <int> pageNos = new List <int>(); for (int i = 1; i <= totalPage; i++) { if (!doc.DocBodyDic.ContainsKey(i)) { pageNos.Add(i); } } foreach (int pageNumber in pageNos) { try { Console.WriteLine("Working on page {0}...", pageNumber); PdfReader pdf = new PdfReader(docPath); PdfDictionary pg = pdf.GetPageN(pageNumber); PdfDictionary res = (PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES)); PdfDictionary xobj = (PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT)); foreach (PdfName name in xobj.Keys) { PdfObject obj = xobj.Get(name); if (obj.IsIndirect()) { PdfDictionary tg = (PdfDictionary)PdfReader.GetPdfObject(obj); string width = tg.Get(PdfName.WIDTH).ToString(); float widthValue = float.Parse(width); string height = tg.Get(PdfName.HEIGHT).ToString(); float heightValue = -1; bool isDigit = float.TryParse(height, out heightValue); heightValue = isDigit ? heightValue : widthValue; if (heightValue < 100 || widthValue < 100) { continue; } ImageRenderInfo imgRI = ImageRenderInfo.CreateForXObject(new Matrix(float.Parse(width), heightValue), (PRIndirectReference)obj, tg); PdfImageObject image = imgRI.GetImage(); string imageFileName = string.Empty; using (Image dotnetImg = image.GetDrawingImage()) { if (dotnetImg != null) { using (MemoryStream ms = new MemoryStream()) { dotnetImg.Save(ms, ImageFormat.Jpeg); } } string ocrFolder = string.Format("{0}\\{1}", this.localDirectory, Path.GetFileNameWithoutExtension(docPath)); if (!Directory.Exists(ocrFolder)) { Directory.CreateDirectory(ocrFolder); } imageFileName = string.Format("{0}\\{1}\\Page_{2}.jpg", localDirectory, Path.GetFileNameWithoutExtension(docPath), pageNumber); dotnetImg.Save(imageFileName); } //string text = RunOCRCommand(imageFileName); string text = RetryText(imageFileName); if ((!doc.DocBodyDic.ContainsKey(pageNumber)) && (!string.IsNullOrEmpty(text))) { doc.DocBodyDic.Add(pageNumber, text); } else { Console.ForegroundColor = ConsoleColor.Yellow; Console.WriteLine("Page {0} could read...", pageNumber); Console.ResetColor(); } } } } catch (Exception ex) { } } pdfReader.Close(); }