public static void Extract(string src, string outputPath, int page = 0) { PdfReader pr = new PdfReader(src); RandomAccessFileOrArray file = new RandomAccessFileOrArray(src); int max = page > 0 ? page : pr.NumberOfPages; for (int i = page > 0 ? page : 1; i <= max; ++i) { try { //(PdfObject obj, PdfObject filter) = Find(pr.GetPageN(i)); PdfObject obj = null; if (obj != null) { /*int xrefIndex = Convert.ToInt32( * ( * (PRIndirectReference)obj * ).Number.ToString( * CultureInfo.InvariantCulture * ) * ); * * PdfObject pdfImg = pr.GetPdfObject(xrefIndex); * PdfStream stream = (PdfStream)pdfImg; * byte[] bytes = PdfReader.GetStreamBytesRaw((PRStream)stream);*/ byte[] bytes = PdfReader.GetStreamBytesRaw( (PRStream)((PdfStream)pr.GetPdfObject( Convert.ToInt32( ( (PRIndirectReference)obj ).Number.ToString( CultureInfo.InvariantCulture ) ) )) ); if (bytes != null) { using (MemoryStream mem = new MemoryStream(bytes)) { mem.Position = 0; var img = Image.FromStream(mem, true, true); if (!Directory.Exists(outputPath)) { Directory.CreateDirectory(outputPath); } ImageCodecInfo codec = ImageCodecInfo.GetImageEncoders().Single(e => e.FormatID.Equals(img.RawFormat.Guid)); EncoderParameters prms = new EncoderParameters(1); prms.Param[0] = new EncoderParameter(Encoder.Compression, 0); img.Save( Path.Combine(outputPath, String.Format(@"{0}.{1}", i, codec.FilenameExtension.Split(';')[0])), codec, prms ); } } } } catch { } pr.Dispose(); pr.Close(); file.Close(); } }
/// <exception cref="System.IO.IOException"/> protected internal virtual void Process() { RandomAccessFileOrArray raf = fontParser.GetMetricsFile(); String line; bool startKernPairs = false; while (!startKernPairs && (line = raf.ReadLine()) != null) { StringTokenizer tok = new StringTokenizer(line, " ,\n\r\t\f"); if (!tok.HasMoreTokens()) { continue; } String ident = tok.NextToken(); switch (ident) { case "FontName": { fontNames.SetFontName(tok.NextToken("\u00ff").Substring(1)); break; } case "FullName": { String fullName = tok.NextToken("\u00ff").Substring(1); fontNames.SetFullName(new String[][] { new String[] { "", "", "", fullName } }); break; } case "FamilyName": { String familyName = tok.NextToken("\u00ff").Substring(1); fontNames.SetFamilyName(new String[][] { new String[] { "", "", "", familyName } }); break; } case "Weight": { fontNames.SetWeight(FontNames.ConvertFontWeight(tok.NextToken("\u00ff").Substring(1))); break; } case "ItalicAngle": { fontMetrics.SetItalicAngle(float.Parse(tok.NextToken(), System.Globalization.CultureInfo.InvariantCulture) ); break; } case "IsFixedPitch": { fontMetrics.SetIsFixedPitch(tok.NextToken().Equals("true")); break; } case "CharacterSet": { characterSet = tok.NextToken("\u00ff").Substring(1); break; } case "FontBBox": { int llx = (int)float.Parse(tok.NextToken(), System.Globalization.CultureInfo.InvariantCulture); int lly = (int)float.Parse(tok.NextToken(), System.Globalization.CultureInfo.InvariantCulture); int urx = (int)float.Parse(tok.NextToken(), System.Globalization.CultureInfo.InvariantCulture); int ury = (int)float.Parse(tok.NextToken(), System.Globalization.CultureInfo.InvariantCulture); fontMetrics.SetBbox(llx, lly, urx, ury); break; } case "UnderlinePosition": { fontMetrics.SetUnderlinePosition((int)float.Parse(tok.NextToken(), System.Globalization.CultureInfo.InvariantCulture )); break; } case "UnderlineThickness": { fontMetrics.SetUnderlineThickness((int)float.Parse(tok.NextToken(), System.Globalization.CultureInfo.InvariantCulture )); break; } case "EncodingScheme": { encodingScheme = tok.NextToken("\u00ff").Substring(1).Trim(); break; } case "CapHeight": { fontMetrics.SetCapHeight((int)float.Parse(tok.NextToken(), System.Globalization.CultureInfo.InvariantCulture )); break; } case "XHeight": { fontMetrics.SetXHeight((int)float.Parse(tok.NextToken(), System.Globalization.CultureInfo.InvariantCulture )); break; } case "Ascender": { fontMetrics.SetTypoAscender((int)float.Parse(tok.NextToken(), System.Globalization.CultureInfo.InvariantCulture )); break; } case "Descender": { fontMetrics.SetTypoDescender((int)float.Parse(tok.NextToken(), System.Globalization.CultureInfo.InvariantCulture )); break; } case "StdHW": { fontMetrics.SetStemH((int)float.Parse(tok.NextToken(), System.Globalization.CultureInfo.InvariantCulture)); break; } case "StdVW": { fontMetrics.SetStemV((int)float.Parse(tok.NextToken(), System.Globalization.CultureInfo.InvariantCulture)); break; } case "StartCharMetrics": { startKernPairs = true; break; } } } if (!startKernPairs) { String metricsPath = fontParser.GetAfmPath(); if (metricsPath != null) { throw new iText.IO.IOException("missing.startcharmetrics.in.1").SetMessageParams(metricsPath); } else { throw new iText.IO.IOException("missing.startcharmetrics.in.the.metrics.file"); } } avgWidth = 0; int widthCount = 0; while ((line = raf.ReadLine()) != null) { StringTokenizer tok = new StringTokenizer(line); if (!tok.HasMoreTokens()) { continue; } String ident = tok.NextToken(); if (ident.Equals("EndCharMetrics")) { startKernPairs = false; break; } int C = -1; int WX = 250; String N = ""; int[] B = null; tok = new StringTokenizer(line, ";"); while (tok.HasMoreTokens()) { StringTokenizer tokc = new StringTokenizer(tok.NextToken()); if (!tokc.HasMoreTokens()) { continue; } ident = tokc.NextToken(); switch (ident) { case "C": { C = System.Convert.ToInt32(tokc.NextToken()); break; } case "WX": { WX = (int)float.Parse(tokc.NextToken(), System.Globalization.CultureInfo.InvariantCulture); break; } case "N": { N = tokc.NextToken(); break; } case "B": { B = new int[] { System.Convert.ToInt32(tokc.NextToken()), System.Convert.ToInt32(tokc.NextToken()), System.Convert.ToInt32 (tokc.NextToken()), System.Convert.ToInt32(tokc.NextToken()) }; break; } } } int unicode = (int)AdobeGlyphList.NameToUnicode(N); Glyph glyph = new Glyph(C, WX, unicode, B); if (C >= 0) { codeToGlyph[C] = glyph; } if (unicode != -1) { unicodeToGlyph[unicode] = glyph; } avgWidth += WX; widthCount++; } if (widthCount != 0) { avgWidth /= widthCount; } if (startKernPairs) { String metricsPath = fontParser.GetAfmPath(); if (metricsPath != null) { throw new iText.IO.IOException("missing.endcharmetrics.in.1").SetMessageParams(metricsPath); } else { throw new iText.IO.IOException("missing.endcharmetrics.in.the.metrics.file"); } } // From AdobeGlyphList: // nonbreakingspace;00A0 // space;0020 if (!unicodeToGlyph.ContainsKey(0x00A0)) { Glyph space = null; if (unicodeToGlyph.ContainsKey(0x0020)) { space = unicodeToGlyph.Get(0x0020); } if (space != null) { unicodeToGlyph[0x00A0] = new Glyph(space.GetCode(), space.GetWidth(), 0x00A0, space.GetBbox()); } } bool endOfMetrics = false; while ((line = raf.ReadLine()) != null) { StringTokenizer tok = new StringTokenizer(line); if (!tok.HasMoreTokens()) { continue; } String ident = tok.NextToken(); if (ident.Equals("EndFontMetrics")) { endOfMetrics = true; break; } else { if (ident.Equals("StartKernPairs")) { startKernPairs = true; break; } } } if (startKernPairs) { while ((line = raf.ReadLine()) != null) { StringTokenizer tok = new StringTokenizer(line); if (!tok.HasMoreTokens()) { continue; } String ident = tok.NextToken(); if (ident.Equals("KPX")) { String first = tok.NextToken(); String second = tok.NextToken(); int? width = (int)float.Parse(tok.NextToken(), System.Globalization.CultureInfo.InvariantCulture); int firstUni = (int)AdobeGlyphList.NameToUnicode(first); int secondUni = (int)AdobeGlyphList.NameToUnicode(second); if (firstUni != -1 && secondUni != -1) { long record = ((long)firstUni << 32) + secondUni; kernPairs[record] = width; } } else { if (ident.Equals("EndKernPairs")) { startKernPairs = false; break; } } } } else { if (!endOfMetrics) { String metricsPath = fontParser.GetAfmPath(); if (metricsPath != null) { throw new iText.IO.IOException("missing.endfontmetrics.in.1").SetMessageParams(metricsPath); } else { throw new iText.IO.IOException("missing.endfontmetrics.in.the.metrics.file"); } } } if (startKernPairs) { String metricsPath = fontParser.GetAfmPath(); if (metricsPath != null) { throw new iText.IO.IOException("missing.endkernpairs.in.1").SetMessageParams(metricsPath); } else { throw new iText.IO.IOException("missing.endkernpairs.in.the.metrics.file"); } } raf.Close(); isFontSpecific = !(encodingScheme.Equals("AdobeStandardEncoding") || encodingScheme.Equals("StandardEncoding" )); }
public virtual RandomAccessFileOrArray GetMetricsFile() { isBuiltInFont = false; if (StandardFonts.IsStandardFont(afmPath)) { isBuiltInFont = true; byte[] buf = new byte[1024]; Stream resource = null; try { String resourcePath = FontResources.AFMS + afmPath + ".afm"; resource = ResourceUtil.GetResourceStream(resourcePath); if (resource == null) { throw new iText.IO.IOException("{0} was not found as resource.").SetMessageParams(resourcePath); } MemoryStream stream = new MemoryStream(); int read; while ((read = resource.Read(buf)) >= 0) { stream.Write(buf, 0, read); } buf = stream.ToArray(); } finally { if (resource != null) { try { resource.Dispose(); } catch (Exception) { } } } return(new RandomAccessFileOrArray(sourceFactory.CreateSource(buf))); } else { if (afmPath != null) { if (afmPath.ToLowerInvariant().EndsWith(".afm")) { return(new RandomAccessFileOrArray(sourceFactory.CreateBestSource(afmPath))); } else { if (afmPath.ToLowerInvariant().EndsWith(".pfm")) { MemoryStream ba = new MemoryStream(); RandomAccessFileOrArray rf = new RandomAccessFileOrArray(sourceFactory.CreateBestSource(afmPath)); Pfm2afm.Convert(rf, ba); rf.Close(); return(new RandomAccessFileOrArray(sourceFactory.CreateSource(ba.ToArray()))); } else { throw new iText.IO.IOException(iText.IO.IOException._1IsNotAnAfmOrPfmFontFile).SetMessageParams(afmPath); } } } else { if (afmData != null) { RandomAccessFileOrArray rf = new RandomAccessFileOrArray(sourceFactory.CreateSource(afmData)); if (IsAfmFile(rf)) { return(rf); } else { MemoryStream ba = new MemoryStream(); try { Pfm2afm.Convert(rf, ba); } catch (Exception) { throw new iText.IO.IOException("Invalid afm or pfm font file."); } finally { rf.Close(); } return(new RandomAccessFileOrArray(sourceFactory.CreateSource(ba.ToArray()))); } } else { throw new iText.IO.IOException("Invalid afm or pfm font file."); } } } }
public virtual byte[] GetFontStreamBytes() { if (fontParser.IsBuiltInFont()) { return(null); } if (fontStreamBytes != null) { return(fontStreamBytes); } RandomAccessFileOrArray raf = null; try { raf = fontParser.GetPostscriptBinary(); int fileLength = (int)raf.Length(); fontStreamBytes = new byte[fileLength - 18]; fontStreamLengths = new int[3]; int bytePtr = 0; for (int k = 0; k < 3; ++k) { if (raf.Read() != 0x80) { ILogger logger = LoggerFactory.GetLogger(typeof(iText.IO.Font.Type1Font)); logger.Error(LogMessageConstant.START_MARKER_MISSING_IN_PFB_FILE); return(null); } if (raf.Read() != PFB_TYPES[k]) { ILogger logger = LoggerFactory.GetLogger(typeof(iText.IO.Font.Type1Font)); logger.Error("incorrect.segment.type.in.pfb.file"); return(null); } int size = raf.Read(); size += raf.Read() << 8; size += raf.Read() << 16; size += raf.Read() << 24; fontStreamLengths[k] = size; while (size != 0) { int got = raf.Read(fontStreamBytes, bytePtr, size); if (got < 0) { ILogger logger = LoggerFactory.GetLogger(typeof(iText.IO.Font.Type1Font)); logger.Error("premature.end.in.pfb.file"); return(null); } bytePtr += got; size -= got; } } return(fontStreamBytes); } catch (Exception) { ILogger logger = LoggerFactory.GetLogger(typeof(iText.IO.Font.Type1Font)); logger.Error("type1.font.file.exception"); return(null); } finally { if (raf != null) { try { raf.Close(); } catch (Exception) { } } } }
/// <exception cref="System.IO.IOException"/> public virtual RandomAccessFileOrArray GetMetricsFile() { isBuiltInFont = false; if (FontConstants.BUILTIN_FONTS_14.Contains(afmPath)) { isBuiltInFont = true; byte[] buf = new byte[1024]; Stream resource = null; try { String resourcePath = FontConstants.AFM_RESOURCE_PATH + afmPath + ".afm"; resource = ResourceUtil.GetResourceStream(resourcePath); if (resource == null) { throw new iText.IO.IOException("1.not.found.as.resource").SetMessageParams(resourcePath); } MemoryStream stream = new MemoryStream(); int read; while ((read = resource.Read(buf)) >= 0) { stream.Write(buf, 0, read); } buf = stream.ToArray(); } finally { if (resource != null) { try { resource.Close(); } catch (Exception) { } } } return(new RandomAccessFileOrArray(sourceFactory.CreateSource(buf))); } else { if (afmPath != null) { if (afmPath.ToLower(System.Globalization.CultureInfo.InvariantCulture).EndsWith(".afm")) { return(new RandomAccessFileOrArray(sourceFactory.CreateBestSource(afmPath))); } else { if (afmPath.ToLower(System.Globalization.CultureInfo.InvariantCulture).EndsWith(".pfm")) { MemoryStream ba = new MemoryStream(); RandomAccessFileOrArray rf = new RandomAccessFileOrArray(sourceFactory.CreateBestSource(afmPath)); Pfm2afm.Convert(rf, ba); rf.Close(); return(new RandomAccessFileOrArray(sourceFactory.CreateSource(ba.ToArray()))); } else { throw new iText.IO.IOException(iText.IO.IOException._1IsNotAnAfmOrPfmFontFile).SetMessageParams(afmPath); } } } else { if (afmData != null) { RandomAccessFileOrArray rf = new RandomAccessFileOrArray(sourceFactory.CreateSource(afmData)); if (IsAfmFile(rf)) { return(rf); } else { MemoryStream ba = new MemoryStream(); try { Pfm2afm.Convert(rf, ba); } catch (Exception) { throw new iText.IO.IOException("invalid.afm.or.pfm.font.file"); } finally { rf.Close(); } return(new RandomAccessFileOrArray(sourceFactory.CreateSource(ba.ToArray()))); } } else { throw new iText.IO.IOException("invalid.afm.or.pfm.font.file"); } } } }
/// <summary>Parses images from pdf document.</summary> /// <param name="filePath">The pdf-file full path.</param> /// <returns>Collection of images and streams that are associated with them.</returns> public static List <ParsedImage> ParseImages(string filePath) { var imgList = new List <ParsedImage>(); var raf = new RandomAccessFileOrArray(filePath); var reader = new PdfReader(raf, null); try { for (var pageNumber = 1; pageNumber <= reader.NumberOfPages; pageNumber++) { var pg = reader.GetPageN(pageNumber); var size = reader.GetPageSize(pageNumber); var res = (PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES)); var xobj = (PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT)); if (xobj == null) { continue; } foreach (var name in xobj.Keys) { var obj = xobj.Get(name); if (!obj.IsIndirect()) { continue; } var tg = (PdfDictionary)PdfReader.GetPdfObject(obj); var type = (PdfName)PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE)); if (!PdfName.IMAGE.Equals(type)) { continue; } var refIndex = Convert.ToInt32(((PRIndirectReference)obj).Number.ToString(CultureInfo.InvariantCulture)); var pdfObj = reader.GetPdfObject(refIndex); var pdfStrem = (PdfStream)pdfObj; var bytes = PdfReader.GetStreamBytesRaw((PRStream)pdfStrem); if (bytes == null) { continue; } var memStream = new MemoryStream(bytes) { Position = 0 }; var img = Image.FromStream(memStream); imgList.Add(new ParsedImage { Image = img, ImageStream = memStream, Format = img.RawFormat, Width = size.Width, Height = size.Height, PerformedRotation = RotateFlipType.RotateNoneFlipNone }); } } } catch (Exception exception) { Console.WriteLine(exception.Message); } finally { reader.Close(); raf.Close(); } return(imgList); }
public static void xpdfPage(PdfReader reader, int pageNum, TextWriter outp) { outp.WriteLine("==============Page " + pageNum + "===================="); PdfDictionary pageDictionary = reader.GetPageN(pageNum); if (_outputDictionary) { outp.WriteLine("- - - - - Dictionary - - - - - -"); //outp.WriteLine(PdfContentReaderTool.GetDictionaryDetail(pageDictionary)); //string s = PdfContentReaderTool.GetDictionaryDetail(pageDictionary); string s = GetDictionaryDetail(pageDictionary); outp.WriteLine(s); } if (_outputXObject) { outp.WriteLine("- - - - - XObject summary - - - - - -"); outp.WriteLine(PdfContentReaderTool.GetXObjectDetail(pageDictionary.GetAsDict(PdfName.RESOURCES))); } if (_outputContentStream) { outp.WriteLine("- - - - - Content stream - - - - - -"); RandomAccessFileOrArray f = reader.SafeFile; byte[] contentBytes = reader.GetPageContent(pageNum, f); f.Close(); outp.Flush(); foreach (byte b in contentBytes) { outp.Write((char)b); } outp.Flush(); } Test_iTextSharp.LocationTextExtractionStrategy strategy = new Test_iTextSharp.LocationTextExtractionStrategy(); //GetTextFromPage(reader, pageNum, strategy); Test_iTextSharp.PdfTools.ProcessContentPage(reader, pageNum, strategy); if (_outputText) { outp.WriteLine("- - - - - Text extraction - - - - - -"); //LocationTextExtractionStrategy strategy = new LocationTextExtractionStrategy(); //String extractedText = PdfTextExtractor.GetTextFromPage(reader, pageNum, new LocationTextExtractionStrategy()); string extractedText = strategy.GetResultantText(); if (extractedText.Length != 0) { outp.WriteLine(extractedText); outp.WriteLine(); } else { outp.WriteLine("No text found on page " + pageNum); } } if (_outputTextBlocks1) { outp.WriteLine("- - - - - Text blocks extraction 1 - - - - - -"); //GetTextFromPage(reader, pageNum, strategy); //PrintTextBlocks(outp, strategy.textBlocks); foreach (Test_iTextSharp.TextBlock textBlock in strategy.textBlocks) { PrintTextBlock(outp, textBlock, 0); } outp.WriteLine(); } if (_outputTextBlocks2) { outp.WriteLine("- - - - - Text blocks extraction 2 - - - - - -"); foreach (Test_iTextSharp.TextBlock textBlock in strategy.textBlocks) { outp.Write("block "); //outp.WriteLine(GetTextBlock(textBlock)); outp.WriteLine(textBlock.GetText()); if (textBlock.childs.Count > 0) { outp.WriteLine(" **** warning childs blocks not printed ****"); } } outp.WriteLine(); } if (_outputTextBlocks3) { outp.WriteLine("- - - - - Text blocks extraction 3 - - - - - -"); foreach (Test_iTextSharp.TextBlock textBlock in strategy.textBlocks) { bool first = true; //foreach (string s in GetTextBlockByLines(textBlock, _outputMaxCol)) foreach (string s in textBlock.GetTextByLines(_outputMaxCol)) { if (first) { outp.Write("block "); first = false; } else { outp.Write(" "); } outp.WriteLine(s); } if (textBlock.childs.Count > 0) { outp.WriteLine(" **** warning childs blocks not printed ****"); } } outp.WriteLine(); } outp.WriteLine(); }
public static void CrearPDF(string RutaInicial, string RutaFinal, List <cNervio> Nervios) { try { Document Doc = new Document(PageSize.LETTER, 30, 25, 90, 50); FileStream stream = new FileStream(RutaInicial, FileMode.Create); PdfWriter Writer = PdfWriter.GetInstance(Doc, stream); Doc.AddTitle("Memorias de Cálculo"); Doc.AddCreator("efe Prima Ce"); Doc.Open(); EncabezadoEfePrimaCe(Doc, Writer); List <PdfPTable> Tablas = new List <PdfPTable>(); Nervios.ForEach(Nervio => { Tablas.AddRange(CrearTablasNervio(Nervio)); //AddSpaces(1, Doc); }); int Contador = 0; for (int i = 0; i < Tablas.Count; i++) { if (Contador == 5) { Doc.NewPage(); EncabezadoEfePrimaCe(Doc, Writer); Contador = 0; } Doc.Add(Tablas[i]); Contador += 1; } Doc.Close(); Writer.Close(); #region Eliminar Hojas en Blanco Del PDF int EnBlanco = 100; PdfReader reader = new PdfReader(RutaInicial); var raf = new RandomAccessFileOrArray(RutaInicial); Document Doc2 = new Document(reader.GetPageSizeWithRotation(1)); PdfCopy Writer2 = new PdfCopy(Doc2, new FileStream(RutaFinal, FileMode.Create)); Doc2.Open(); int n = reader.NumberOfPages; for (int i = 1; i <= n; i++) { byte[] Bites = reader.GetPageContent(i, raf); if (Bites.Length > EnBlanco) { PdfImportedPage page = Writer2.GetImportedPage(reader, i); Writer2.AddPage(page); } } Doc2.Close(); Writer2.Close(); raf.Close(); reader.Close(); #endregion System.Diagnostics.Process Proc = new System.Diagnostics.Process(); Proc.StartInfo.FileName = RutaFinal; Proc.Start(); } catch { cFunctionsProgram.VentanaEmergenteExclamacion("La acción no se puede completar porque el archivo esta abierto."); return; } }