/// <summary> /// CustomPdfReader to be able to work with streams. /// </summary> public CustomPdfReader(Stream isp, X509Certificate certificate, ICipherParameters certificateKey) { this.certificate = certificate; this.certificateKey = certificateKey; tokens = new PRTokeniser(new RandomAccessFileOrArray(isp)); ReadPdf(); }
private void CheckNumberValue(String data, String expectedValue) { PRTokeniser tok = new PRTokeniser(new RandomAccessFileOrArray(GetBytes(data))); tok.NextValidToken(); Assert.AreEqual(PRTokeniser.TokType.NUMBER, tok.TokenType, "Wrong type"); Assert.AreEqual(expectedValue, tok.StringValue, "Wrong multiple minus signs number handling"); }
public static long[] CheckObjectStart(byte[] line) { try { PRTokeniser tk = new PRTokeniser(new RandomAccessFileOrArray(new RandomAccessSourceFactory().CreateSource(line))); int num = 0; int gen = 0; if (!tk.NextToken() || tk.TokenType != TokType.NUMBER) { return(null); } num = tk.IntValue; if (!tk.NextToken() || tk.TokenType != TokType.NUMBER) { return(null); } gen = tk.IntValue; if (!tk.NextToken()) { return(null); } if (!tk.StringValue.Equals("obj")) { return(null); } return(new long[] { num, gen }); } catch { } return(null); }
public static int[] CheckObjectStart(byte[] line) { try { PRTokeniser tk = new PRTokeniser(line); int num = 0; int gen = 0; if (!tk.NextToken() || tk.TokenType != TokType.NUMBER) { return(null); } num = tk.IntValue; if (!tk.NextToken() || tk.TokenType != TokType.NUMBER) { return(null); } gen = tk.IntValue; if (!tk.NextToken()) { return(null); } if (!tk.StringValue.Equals("obj")) { return(null); } return(new int[] { num, gen }); } catch { } return(null); }
private void CheckTokenTypes(String data, params PRTokeniser.TokType[] expectedTypes) { PRTokeniser tok = new PRTokeniser(new RandomAccessFileOrArray(GetBytes(data))); for (int i = 0; i < expectedTypes.Length; i++) { tok.NextValidToken(); //System.out.println(tok.getTokenType() + " -> " + tok.getStringValue()); Assert.AreEqual(expectedTypes[i], tok.TokenType, "Position " + i); } }
public static byte[] ASCIIHexDecode(byte[] inb) { MemoryStream ostr = new MemoryStream(); bool first = true; int n1 = 0; for (int k = 0; k < inb.Length; ++k) { int ch = inb[k] & 0xff; if (ch == '>') { break; } if (PRTokeniser.isWhitespace(ch)) { continue; } int n = PRTokeniser.getHex(ch); if (n == -1) { throw new RuntimeException("Illegal character in ASCIIHexDecode."); } if (first) { n1 = n; } else { ostr.WriteByte((byte)((n1 << 4) + n)); } first = !first; } if (!first) { ostr.WriteByte((byte)(n1 << 4)); } return(ostr.ToArray()); }
/** Decodes an escaped name in the form "/AB#20CD" into "AB CD". * @param name the name to decode * @return the decoded name */ public static string decodeName(string name) { StringBuilder buf = new StringBuilder(); try { int len = name.Length; for (int k = 1; k < len; ++k) { char c = name[k]; if (c == '#') { c = (char)((PRTokeniser.getHex(name[k + 1]) << 4) + PRTokeniser.getHex(name[k + 2])); k += 2; } buf.Append(c); } } catch (Exception e) { e.GetType(); // empty on purpose } return(buf.ToString()); }
virtual protected internal void ReadObjStm(PRStream stream, IntHashtable map) { if (stream == null) return; int first = stream.GetAsNumber(PdfName.FIRST).IntValue; int n = stream.GetAsNumber(PdfName.N).IntValue; byte[] b = GetStreamBytes(stream, tokens.File); PRTokeniser saveTokens = tokens; tokens = new PRTokeniser(new RandomAccessFileOrArray(new RandomAccessSourceFactory().CreateSource(b))); try { int[] address = new int[n]; int[] objNumber = new int[n]; bool ok = true; for (int k = 0; k < n; ++k) { ok = tokens.NextToken(); if (!ok) break; if (tokens.TokenType != PRTokeniser.TokType.NUMBER) { ok = false; break; } objNumber[k] = tokens.IntValue; ok = tokens.NextToken(); if (!ok) break; if (tokens.TokenType != PRTokeniser.TokType.NUMBER) { ok = false; break; } address[k] = tokens.IntValue + first; } if (!ok) throw new InvalidPdfException(MessageLocalization.GetComposedMessage("error.reading.objstm")); for (int k = 0; k < n; ++k) { if (map.ContainsKey(k)) { tokens.Seek(address[k]); tokens.NextToken(); PdfObject obj; if (tokens.TokenType == PRTokeniser.TokType.NUMBER) { obj = new PdfNumber(tokens.StringValue); } else { tokens.Seek(address[k]); obj = ReadPRObject(); } xrefObj[objNumber[k]] = obj; } } } finally { tokens = saveTokens; } }
/** * Reads and parses a pdf document. Contrary to the other constructors only the xref is read * into memory. The reader is said to be working in "partial" mode as only parts of the pdf * are read as needed. The pdf is left open but may be closed at any time with * <CODE>PdfReader.Close()</CODE>, reopen is automatic. * @param raf the document location * @param ownerPassword the password or <CODE>null</CODE> for no password * @throws IOException on error */ public PdfReader(RandomAccessFileOrArray raf, byte[] ownerPassword) { password = ownerPassword; partial = true; tokens = new PRTokeniser(raf); ReadPdfPartial(); }
/** Reads and parses a PDF document. * @param url the Uri of the document * @param ownerPassword the password to read the document * @throws IOException on error */ public PdfReader(Uri url, byte[] ownerPassword) { password = ownerPassword; tokens = new PRTokeniser(new RandomAccessFileOrArray(url)); ReadPdf(); }
/** Reads and parses a PDF document. * @param pdfIn the byte array with the document * @param ownerPassword the password to read the document * @throws IOException on error */ public PdfReader(byte[] pdfIn, byte[] ownerPassword) { password = ownerPassword; tokens = new PRTokeniser(pdfIn); ReadPdf(); }
IDictionary<string, IList<object>> ParseDAParam(PdfString DA) { IDictionary<string, IList<object>> commandArguments = new Dictionary<string, IList<object>>(); PRTokeniser tokeniser = new PRTokeniser(new RandomAccessFileOrArray(new RandomAccessSourceFactory().CreateSource(DA.GetBytes()))); IList<object> currentArguments = new List<object>(); while (tokeniser.NextToken()) { if (tokeniser.TokenType == PRTokeniser.TokType.OTHER) { String key = tokeniser.StringValue; if (key == "RG" || key == "G" || key == "K") { key = STROKE_COLOR; } else if (key == "rg" || key == "g" || key == "k") { key = FILL_COLOR; } if (commandArguments.ContainsKey(key)) { commandArguments[key] = currentArguments; } else { commandArguments.Add(key, currentArguments); } currentArguments = new List<object>(); } else { switch (tokeniser.TokenType) { case PRTokeniser.TokType.NUMBER: currentArguments.Add(new PdfNumber(tokeniser.StringValue)); break; case PRTokeniser.TokType.NAME: currentArguments.Add(new PdfName(tokeniser.StringValue)); break; default: currentArguments.Add(tokeniser.StringValue); break; } } } return commandArguments; }
/** Reads and parses a PDF document. * @param pdfIn the byte array with the document * @throws IOException on error */ public PdfReader(byte[] pdfIn) { tokens = new PRTokeniser(pdfIn); readPdf(); }
/** Reads and parses a PDF document. * @param filename the file name of the document * @throws IOException on error */ public PdfReader(string filename) { tokens = new PRTokeniser(filename); readPdf(); }
public static byte[] ASCII85Decode(byte[] inb) { MemoryStream ostr = new MemoryStream(); int state = 0; int[] chn = new int[5]; for (int k = 0; k < inb.Length; ++k) { int ch = inb[k] & 0xff; if (ch == '~') { break; } if (PRTokeniser.isWhitespace(ch)) { continue; } if (ch == 'z' && state == 0) { ostr.WriteByte(0); ostr.WriteByte(0); ostr.WriteByte(0); ostr.WriteByte(0); continue; } if (ch < '!' || ch > 'u') { throw new RuntimeException("Illegal character in ASCII85Decode."); } chn[state] = ch - '!'; ++state; if (state == 5) { state = 0; int r = 0; for (int j = 0; j < 5; ++j) { r = r * 85 + chn[j]; } ostr.WriteByte((byte)(r >> 24)); ostr.WriteByte((byte)(r >> 16)); ostr.WriteByte((byte)(r >> 8)); ostr.WriteByte((byte)r); } } int r2 = 0; if (state == 1) { throw new RuntimeException("Illegal length in ASCII85Decode."); } if (state == 2) { r2 = chn[0] * 85 + chn[1]; ostr.WriteByte((byte)r2); } else if (state == 3) { r2 = chn[0] * 85 * 85 + chn[1] * 85 + chn[2]; ostr.WriteByte((byte)(r2 >> 8)); ostr.WriteByte((byte)r2); } else if (state == 4) { r2 = chn[0] * 85 * 85 * 85 + chn[1] * 85 * 85 + chn[2] * 85 + chn[3]; ostr.WriteByte((byte)(r2 >> 16)); ostr.WriteByte((byte)(r2 >> 8)); ostr.WriteByte((byte)r2); } return(ostr.ToArray()); }
private void OpenPdf() { _pdfPages.Clear(); try { var openFileDialog = new OpenFileDialog { DefaultExt = ".pdf", Filter = "Pdf documents (.pdf)|*.pdf" }; bool? result = openFileDialog.ShowDialog(); if (result == true) { string filename = openFileDialog.FileName; var pdfReader = new PdfReader(filename); for (int i = 1; i <= pdfReader.NumberOfPages; i++) { byte[] pagesBytes = pdfReader.GetPageContent(i); var token = new PRTokeniser(pagesBytes); var pageContent = new StringBuilder(); while (token.NextToken()) { if (token.TokenType == PRTokeniser.TokType.STRING) { pageContent.Append(token.StringValue); } } _pdfPages.Add(pageContent.ToString()); } } RaisePropertyChanged("MaxIndex"); } catch (Exception) { MessageBox.Show("Fail to load file"); } CurrentIndex = 1; }
/** * Processes PDF syntax * @param contentBytes the bytes of a content stream * @param resources the resources that come with the content stream */ public void ProcessContent(byte[] contentBytes, PdfDictionary resources){ this.resources.Push(resources); PRTokeniser tokeniser = new PRTokeniser(contentBytes); PdfContentParser ps = new PdfContentParser(tokeniser); List<PdfObject> operands = new List<PdfObject>(); while (ps.Parse(operands).Count > 0){ PdfLiteral oper = (PdfLiteral)operands[operands.Count-1]; if ("BI".Equals(oper.ToString())){ // we don't call invokeOperator for embedded images - this is one area of the PDF spec that is particularly nasty and inconsistent ImageRenderInfo renderInfo = ImageRenderInfo.CreatedForEmbeddedImage(Gs().ctm, InlineImageUtils.ParseInlineImage(ps)); renderListener.RenderImage(renderInfo); } else { InvokeOperator(oper, operands); } } this.resources.Pop(); }
/** * Creates a new instance of PdfContentParser * @param tokeniser the tokeniser with the content */ public PdfContentParser(PRTokeniser tokeniser) { this.tokeniser = tokeniser; }
public void Process(Crawler crawler, PropertyBag propertyBag) { AspectF.Define. NotNull(crawler, "crawler"). NotNull(propertyBag, "propertyBag"); if (propertyBag.StatusCode != HttpStatusCode.OK) { return; } if (!IsPdfContent(propertyBag.ContentType)) { return; } PdfReader pdfReader = new PdfReader(propertyBag.Response); try { object title = pdfReader.Info["Title"]; if (!title.IsNull()) { string pdfTitle = Convert.ToString(title, CultureInfo.InvariantCulture).Trim(); if (!pdfTitle.IsNullOrEmpty()) { propertyBag.Title = pdfTitle; } } StringBuilder sb = new StringBuilder(); // Following code from: // http://www.vbforums.com/showthread.php?t=475759 for (int p = 1; p <= pdfReader.NumberOfPages; p++) { byte[] pageBytes = pdfReader.GetPageContent(p); if (pageBytes.IsNull()) { continue; } PRTokeniser token = new PRTokeniser(pageBytes); while (token.NextToken()) { int tknType = token.TokenType; string tknValue = token.StringValue; if (tknType == PRTokeniser.TK_STRING) { sb.Append(token.StringValue); sb.Append(" "); } else if (tknType == 1 && tknValue == "-600") { sb.Append(" "); } else if (tknType == 10 && tknValue == "TJ") { sb.Append(" "); } } } propertyBag.Text = sb.ToString(); } finally { pdfReader.Close(); } }
// --------------------------------------------------------------------------- /** * Parses the PDF using PRTokeniser * @param src the ]original PDF file ] */ public string ParsePdf(byte[] src) { PdfReader reader = new PdfReader(src); // we can inspect the syntax of the imported page byte[] streamBytes = reader.GetPageContent(1); StringBuilder sb = new StringBuilder(); PRTokeniser tokenizer = new PRTokeniser(streamBytes); while (tokenizer.NextToken()) { if (tokenizer.TokenType == PRTokeniser.TokType.STRING) { sb.AppendLine(tokenizer.StringValue); } } return sb.ToString(); }
/** Reads and parses a PDF document. * @param filename the file name of the document * @param ownerPassword the password to read the document * @throws IOException on error */ public PdfReader(String filename, byte[] ownerPassword) { password = ownerPassword; tokens = new PRTokeniser(filename); ReadPdf(); }
protected internal PdfObject ReadOneObjStm(PRStream stream, int idx) { int first = stream.GetAsNumber(PdfName.FIRST).IntValue; byte[] b = GetStreamBytes(stream, tokens.File); PRTokeniser saveTokens = tokens; tokens = new PRTokeniser(b); try { int address = 0; bool ok = true; ++idx; for (int k = 0; k < idx; ++k) { ok = tokens.NextToken(); if (!ok) break; if (tokens.TokenType != PRTokeniser.TK_NUMBER) { ok = false; break; } ok = tokens.NextToken(); if (!ok) break; if (tokens.TokenType != PRTokeniser.TK_NUMBER) { ok = false; break; } address = tokens.IntValue + first; } if (!ok) throw new InvalidPdfException(MessageLocalization.GetComposedMessage("error.reading.objstm")); tokens.Seek(address); return ReadPRObject(); } finally { tokens = saveTokens; } }
/** Reads and parses a PDF document. * @param filename the file name of the document * @param certificate the certificate to read the document * @param certificateKey the private key of the certificate * @param certificateKeyProvider the security provider for certificateKey * @throws IOException on error */ public PdfReader(String filename, X509Certificate certificate, ICipherParameters certificateKey) { this.certificate = certificate; this.certificateKey = certificateKey; tokens = new PRTokeniser(filename); ReadPdf(); }
/// <summary> /// Parses a stream object and removes OCGs. </summary> /// <param name="stream"> a stream object </param> /// <param name="resources"> the resources dictionary of that object (containing info about the OCGs) </param> public virtual void Parse(PRStream stream, PdfDictionary resources) { baos = new MemoryStream(); properties = resources.GetAsDict(PdfName.PROPERTIES); xobj = new HashSet2<PdfName>(); PdfDictionary xobjects = resources.GetAsDict(PdfName.XOBJECT); if (xobjects != null) { // remove XObject (form or image) that belong to an OCG that needs to be removed foreach (PdfName name in xobjects.Keys) { PRStream xobject = (PRStream) xobjects.GetAsStream(name); PdfDictionary oc = xobject.GetAsDict(PdfName.OC); if (oc != null) { PdfString ocname = oc.GetAsString(PdfName.NAME); if (ocname != null && ocgs.Contains(ocname.ToString())) { xobj.Add(name); } } } foreach (PdfName name in xobj) { xobjects.Remove(name); } } // parse the content stream byte[] contentBytes = PdfReader.GetStreamBytes(stream); PRTokeniser tokeniser = new PRTokeniser(new RandomAccessFileOrArray(contentBytes)); PdfContentParser ps = new PdfContentParser(tokeniser); List<PdfObject> operands = new List<PdfObject>(); while (ps.Parse(operands).Count > 0) { PdfLiteral @operator = (PdfLiteral) operands[operands.Count - 1]; ProcessOperator(this, @operator, operands); } baos.Flush(); baos.Close(); stream.SetData(baos.GetBuffer()); }
/** * Reads and parses a PDF document. * @param is the <CODE>InputStream</CODE> containing the document. The stream is read to the * end but is not closed * @param ownerPassword the password to read the document * @throws IOException on error */ public PdfReader(Stream isp, byte[] ownerPassword) { password = ownerPassword; tokens = new PRTokeniser(new RandomAccessFileOrArray(isp)); ReadPdf(); }
/** * Processes PDF syntax. * <b>Note:</b> If you re-use a given {@link PdfContentStreamProcessor}, you must call {@link PdfContentStreamProcessor#reset()} * @param contentBytes the bytes of a content stream * @param resources the resources that come with the content stream */ public void ProcessContent(byte[] contentBytes, PdfDictionary resources){ this.resources.Push(resources); PRTokeniser tokeniser = new PRTokeniser(new RandomAccessFileOrArray(new RandomAccessSourceFactory().CreateSource(contentBytes))); PdfContentParser ps = new PdfContentParser(tokeniser); List<PdfObject> operands = new List<PdfObject>(); while (ps.Parse(operands).Count > 0){ PdfLiteral oper = (PdfLiteral)operands[operands.Count-1]; if ("BI".Equals(oper.ToString())){ // we don't call invokeOperator for embedded images - this is one area of the PDF spec that is particularly nasty and inconsistent PdfDictionary colorSpaceDic = resources != null ? resources.GetAsDict(PdfName.COLORSPACE) : null; ImageRenderInfo renderInfo = ImageRenderInfo.CreateForEmbeddedImage(Gs().ctm, InlineImageUtils.ParseInlineImage(ps, colorSpaceDic), colorSpaceDic); renderListener.RenderImage(renderInfo); } else { InvokeOperator(oper, operands); } } this.resources.Pop(); }
public static long[] CheckObjectStart (byte[] line) { try { PRTokeniser tk = new PRTokeniser(new RandomAccessFileOrArray(new RandomAccessSourceFactory().CreateSource(line))); int num = 0; int gen = 0; if (!tk.NextToken() || tk.TokenType != TokType.NUMBER) return null; num = tk.IntValue; if (!tk.NextToken() || tk.TokenType != TokType.NUMBER) return null; gen = tk.IntValue; if (!tk.NextToken()) return null; if (!tk.StringValue.Equals("obj")) return null; return new long[]{num, gen}; } catch { } return null; }
/** Creates an independent duplicate. * @param reader the <CODE>PdfReader</CODE> to duplicate */ public PdfReader(PdfReader reader) { this.appendable = reader.appendable; this.consolidateNamedDestinations = reader.consolidateNamedDestinations; this.encrypted = reader.encrypted; this.rebuilt = reader.rebuilt; this.sharedStreams = reader.sharedStreams; this.tampered = reader.tampered; this.password = reader.password; this.pdfVersion = reader.pdfVersion; this.eofPos = reader.eofPos; this.freeXref = reader.freeXref; this.lastXref = reader.lastXref; this.newXrefType = reader.newXrefType; this.tokens = new PRTokeniser(reader.tokens.SafeFile); if (reader.decrypt != null) this.decrypt = new PdfEncryption(reader.decrypt); this.pValue = reader.pValue; this.rValue = reader.rValue; this.xrefObj = new List<PdfObject>(reader.xrefObj); for (int k = 0; k < reader.xrefObj.Count; ++k) { this.xrefObj[k] = DuplicatePdfObject(reader.xrefObj[k], this); } this.pageRefs = new PageRefs(reader.pageRefs, this); this.trailer = (PdfDictionary)DuplicatePdfObject(reader.trailer, this); this.catalog = trailer.GetAsDict(PdfName.ROOT); this.rootPages = catalog.GetAsDict(PdfName.PAGES); this.fileLength = reader.fileLength; this.partial = reader.partial; this.hybridXref = reader.hybridXref; this.objStmToOffset = reader.objStmToOffset; this.xref = reader.xref; this.cryptoRef = (PRIndirectReference)DuplicatePdfObject(reader.cryptoRef, this); this.ownerPasswordUsed = reader.ownerPasswordUsed; }
virtual protected internal PdfObject ReadOneObjStm (PRStream stream, int idx) { int first = stream.GetAsNumber(PdfName.FIRST).IntValue; byte[] b = GetStreamBytes(stream, tokens.File); PRTokeniser saveTokens = tokens; tokens = new PRTokeniser(new RandomAccessFileOrArray(new RandomAccessSourceFactory().CreateSource(b))); try { int address = 0; bool ok = true; ++idx; for (int k = 0; k < idx; ++k) { ok = tokens.NextToken(); if (!ok) break; if (tokens.TokenType != PRTokeniser.TokType.NUMBER) { ok = false; break; } ok = tokens.NextToken(); if (!ok) break; if (tokens.TokenType != PRTokeniser.TokType.NUMBER) { ok = false; break; } address = tokens.IntValue + first; } if (!ok) throw new InvalidPdfException(MessageLocalization.GetComposedMessage("error.reading.objstm")); tokens.Seek(address); tokens.NextToken(); PdfObject obj; if (tokens.TokenType == PRTokeniser.TokType.NUMBER) { obj = new PdfNumber(tokens.StringValue); } else { tokens.Seek(address); obj = ReadPRObject(); } return obj; } finally { tokens = saveTokens; } }
public static Object[] SplitDAelements(String da) { PRTokeniser tk = new PRTokeniser(PdfEncodings.ConvertToBytes(da, null)); ArrayList stack = new ArrayList(); Object[] ret = new Object[3]; while (tk.NextToken()) { if (tk.TokenType == PRTokeniser.TK_COMMENT) continue; if (tk.TokenType == PRTokeniser.TK_OTHER) { String oper = tk.StringValue; if (oper.Equals("Tf")) { if (stack.Count >= 2) { ret[DA_FONT] = stack[stack.Count - 2]; ret[DA_SIZE] = float.Parse((String)stack[stack.Count - 1], System.Globalization.NumberFormatInfo.InvariantInfo); } } else if (oper.Equals("g")) { if (stack.Count >= 1) { float gray = float.Parse((String)stack[stack.Count - 1], System.Globalization.NumberFormatInfo.InvariantInfo); if (gray != 0) ret[DA_COLOR] = new GrayColor(gray); } } else if (oper.Equals("rg")) { if (stack.Count >= 3) { float red = float.Parse((String)stack[stack.Count - 3], System.Globalization.NumberFormatInfo.InvariantInfo); float green = float.Parse((String)stack[stack.Count - 2], System.Globalization.NumberFormatInfo.InvariantInfo); float blue = float.Parse((String)stack[stack.Count - 1], System.Globalization.NumberFormatInfo.InvariantInfo); ret[DA_COLOR] = new Color(red, green, blue); } } else if (oper.Equals("k")) { if (stack.Count >= 4) { float cyan = float.Parse((String)stack[stack.Count - 4], System.Globalization.NumberFormatInfo.InvariantInfo); float magenta = float.Parse((String)stack[stack.Count - 3], System.Globalization.NumberFormatInfo.InvariantInfo); float yellow = float.Parse((String)stack[stack.Count - 2], System.Globalization.NumberFormatInfo.InvariantInfo); float black = float.Parse((String)stack[stack.Count - 1], System.Globalization.NumberFormatInfo.InvariantInfo); ret[DA_COLOR] = new CMYKColor(cyan, magenta, yellow, black); } } stack.Clear(); } else stack.Add(tk.StringValue); } return ret; }
/** * Constructs a new PdfReader. This is the master constructor. * @param byteSource source of bytes for the reader * @param partialRead if true, the reader is opened in partial mode (PDF is parsed on demand), if false, the entire PDF is parsed into memory as the reader opens * @param ownerPassword the password or null if no password is required * @param certificate the certificate or null if no certificate is required * @param certificateKey the key or null if no certificate key is required * @param certificateKeyProvider the name of the key provider, or null if no key is required * @param closeSourceOnConstructorError if true, the byteSource will be closed if there is an error during construction of this reader */ private PdfReader(IRandomAccessSource byteSource, bool partialRead, byte[] ownerPassword, X509Certificate certificate, ICipherParameters certificateKey, bool closeSourceOnConstructorError) { this.certificate = certificate; this.certificateKey = certificateKey; this.password = ownerPassword; this.partial = partialRead; try { tokens = GetOffsetTokeniser(byteSource); if (partialRead) ReadPdfPartial(); else ReadPdf(); } catch (IOException e) { if (closeSourceOnConstructorError) byteSource.Close(); throw e; } GetCounter().Read(fileLength); }
/** * Processes PDF syntax. * <b>Note:</b> If you re-use a given {@link PdfContentStreamProcessor}, you must call {@link PdfContentStreamProcessor#reset()} * @param contentBytes the bytes of a content stream * @param resources the resources that come with the content stream */ public void ProcessContent(byte[] contentBytes, PdfDictionary resources){ this.resources.Push(resources); PRTokeniser tokeniser = new PRTokeniser(contentBytes); PdfContentParser ps = new PdfContentParser(tokeniser); List<iTextSharp.text.pdf.PdfObject> operands = new List<iTextSharp.text.pdf.PdfObject>(); while (ps.Parse(operands).Count > 0){ PdfLiteral oper = (PdfLiteral)operands[operands.Count-1]; // w.GetOperatorInfo(oper) //w.wr.Print("operator info {0} type {1} string {2}", oper.GetType().ToString(), oper.Type, oper.ToString()); if ("BI".Equals(oper.ToString())){ // we don't call invokeOperator for embedded images - this is one area of the PDF spec that is particularly nasty and inconsistent PdfDictionary colorSpaceDic = resources != null ? resources.GetAsDict(PdfName.COLORSPACE) : null; // 'iTextSharp.text.pdf.parser.ImageRenderInfo.CreateForEmbeddedImage(iTextSharp.text.pdf.parser.Matrix, iTextSharp.text.pdf.parser.InlineImageInfo, iTextSharp.text.pdf.PdfDictionary)' is inaccessible due to its protection level ImageRenderInfo renderInfo = ImageRenderInfo.CreateForEmbeddedImage(Gs().ctm, InlineImageUtils.ParseInlineImage(ps, colorSpaceDic), colorSpaceDic); renderListener.RenderImage(renderInfo); } else { InvokeOperator(oper, operands); } } this.resources.Pop(); }
/** * Utility method that checks the provided byte source to see if it has junk bytes at the beginning. If junk bytes * are found, construct a tokeniser that ignores the junk. Otherwise, construct a tokeniser for the byte source as it is * @param byteSource the source to check * @return a tokeniser that is guaranteed to start at the PDF header * @throws IOException if there is a problem reading the byte source */ private static PRTokeniser GetOffsetTokeniser(IRandomAccessSource byteSource) { PRTokeniser tok = new PRTokeniser(new RandomAccessFileOrArray(byteSource)); int offset = tok.GetHeaderOffset(); if (offset != 0){ IRandomAccessSource offsetSource = new WindowRandomAccessSource(byteSource, offset); tok = new PRTokeniser(new RandomAccessFileOrArray(offsetSource)); } return tok; }
/** * Constructs a new PdfReader. This is the master constructor. */ private PdfReader(IRandomAccessSource byteSource, bool partialRead, byte[] ownerPassword, X509Certificate certificate, ICipherParameters certificateKey) { this.certificate = certificate; this.certificateKey = certificateKey; this.password = ownerPassword; this.partial = partialRead; tokens = GetOffsetTokeniser(byteSource); if (partialRead){ ReadPdfPartial(); } else { ReadPdf(); } }
public static long[] CheckObjectStart (byte[] line) { try { PRTokeniser tk = new PRTokeniser(line); int num = 0; int gen = 0; if (!tk.NextToken() || tk.TokenType != TokType.NUMBER) return null; num = tk.IntValue; if (!tk.NextToken() || tk.TokenType != TokType.NUMBER) return null; gen = tk.IntValue; if (!tk.NextToken()) return null; if (!tk.StringValue.Equals("obj")) return null; return new long[]{num, gen}; } catch { } return null; }
virtual public bool CompareInnerText(String path1, String path2) { PdfReader reader1 = new PdfReader(path1); byte[] streamBytes1 = reader1.GetPageContent(1); PRTokeniser tokenizer1 = new PRTokeniser(new RandomAccessFileOrArray(new RandomAccessSourceFactory().CreateSource(streamBytes1))); PdfReader reader2 = new PdfReader(path2); byte[] streamBytes2 = reader2.GetPageContent(1); PRTokeniser tokenizer2 = new PRTokeniser(new RandomAccessFileOrArray(new RandomAccessSourceFactory().CreateSource(streamBytes2))); try { while (tokenizer1.NextToken()) { if (!tokenizer2.NextToken()) return false; else { if (tokenizer1.TokenType != tokenizer2.TokenType) return false; else { if (tokenizer1.TokenType == tokenizer2.TokenType && tokenizer2.TokenType == PRTokeniser.TokType.NUMBER) { if (Math.Abs(float.Parse(tokenizer1.StringValue, CultureInfo.InvariantCulture) - float.Parse(tokenizer2.StringValue, CultureInfo.InvariantCulture)) > 0.001) return false; } else if (!tokenizer1.StringValue.Equals(tokenizer2.StringValue)) return false; } } } return true; } finally { reader1.Close(); reader2.Close(); } }
protected internal void ReadObjStm(PRStream stream, IntHashtable map) { int first = stream.GetAsNumber(PdfName.FIRST).IntValue; int n = stream.GetAsNumber(PdfName.N).IntValue; byte[] b = GetStreamBytes(stream, tokens.File); PRTokeniser saveTokens = tokens; tokens = new PRTokeniser(b); try { int[] address = new int[n]; int[] objNumber = new int[n]; bool ok = true; for (int k = 0; k < n; ++k) { ok = tokens.NextToken(); if (!ok) break; if (tokens.TokenType != PRTokeniser.TK_NUMBER) { ok = false; break; } objNumber[k] = tokens.IntValue; ok = tokens.NextToken(); if (!ok) break; if (tokens.TokenType != PRTokeniser.TK_NUMBER) { ok = false; break; } address[k] = tokens.IntValue + first; } if (!ok) throw new InvalidPdfException("Error reading ObjStm"); for (int k = 0; k < n; ++k) { if (map.ContainsKey(k)) { tokens.Seek(address[k]); PdfObject obj = ReadPRObject(); xrefObj[objNumber[k]] = obj; } } } finally { tokens = saveTokens; } }