/// <summary> /// Set the hash key for the specified object. /// </summary> public void SetHashKey(PdfObjectID id) { #if !NETFX_CORE //#if !SILVERLIGHT byte[] objectId = new byte[5]; _md5.Initialize(); // Split the object number and generation objectId[0] = (byte)id.ObjectNumber; objectId[1] = (byte)(id.ObjectNumber >> 8); objectId[2] = (byte)(id.ObjectNumber >> 16); objectId[3] = (byte)id.GenerationNumber; objectId[4] = (byte)(id.GenerationNumber >> 8); _md5.TransformBlock(_encryptionKey, 0, _encryptionKey.Length, _encryptionKey, 0); _md5.TransformBlock(objectId, 0, objectId.Length, objectId, 0); if (_document.revision == 1) { _md5.TransformFinalBlock(salt, 0, salt.Length); } else { _md5.TransformFinalBlock(salt, 0, 0); } _key = _md5.Hash; _md5.Initialize(); _keySize = _encryptionKey.Length + 5; if (_keySize > 16) { _keySize = 16; } //#endif #endif }
/// <summary> /// Reads the compressed object with the specified index. /// </summary> internal void ReadReferences(PdfCrossReferenceTable xrefTable) { ////// Create parser for stream. ////Parser parser = new Parser(_document, new MemoryStream(Stream.Value)); for (int idx = 0; idx < _header.Length; idx++) { int objectNumber = _header[idx][0]; int offset = _header[idx][1]; PdfObjectID objectID = new PdfObjectID(objectNumber); // HACK: -1 indicates compressed object. PdfReference iref = new PdfReference(objectID, -1); ////iref.ObjectID = objectID; ////iref.Value = xrefStream; if (!xrefTable.Contains(iref.ObjectID)) { xrefTable.Add(iref); } else { GetType(); } } }
/// <summary> /// Set the hash key for the specified object. /// </summary> internal void SetHashKey(PdfObjectID id) { #if !NETFX_CORE && !DNC10 //#if !SILVERLIGHT byte[] objectId = new byte[5]; // Split the object number and generation objectId[0] = (byte)id.ObjectNumber; objectId[1] = (byte)(id.ObjectNumber >> 8); objectId[2] = (byte)(id.ObjectNumber >> 16); objectId[3] = (byte)id.GenerationNumber; objectId[4] = (byte)(id.GenerationNumber >> 8); _md5.Initialize(); _md5.TransformBlock(_encryptionKey, 0, _encryptionKey.Length, _encryptionKey, 0); _md5.TransformBlock(objectId, 0, objectId.Length, objectId, 0); if (_document._securitySettings.DocumentSecurityLevel == PdfDocumentSecurityLevel.Encrypted128BitAes) { // Additional padding needed for AES encryption byte[] aesPadding = new byte[] { 0x73, 0x41, 0x6C, 0x54 }; // 'sAlT' _md5.TransformFinalBlock(aesPadding, 0, aesPadding.Length); } else { _md5.TransformFinalBlock(objectId, 0, 0); } _key = _md5.Hash; _md5.Initialize(); _keySize = _encryptionKey.Length + 5; if (_keySize > 16) { _keySize = 16; } //#endif #endif }
// /// <summary> // /// Reads a real value directly or (optionally) indirectly from the PDF data stream. // /// </summary> // double ReadReal(bool canBeIndirect) // { // Symbol symbol = this.lexer.ScanNextToken(canBeIndirect); // if (symbol == Symbol.Real || symbol == Symbol.Integer) // return this.lexer.TokenToReal; // else if (symbol == Symbol.R) // { // int position = this.lexer.Position; //// MoveToObject(this.lexer.Token); // ReadObjectID(null); // double f = ReadReal(); // ReadSymbol(Symbol.EndObj); // this.lexer.Position = position; // return f; // } // throw new PdfReaderException(PSSR.UnexpectedToken(this.lexer.Token)); // } // // double ReadReal() // { // return ReadReal(false); // } // /// <summary> // /// Reads an object from the PDF input stream. If the object has a specialized parser, it it used. // /// </summary> // public static PdfObject ReadObject(PdfObject pdfObject, PdfObjectID objectID) // { // if (pdfObject == null) // throw new ArgumentNullException("pdfObject"); // if (pdfObject.Document == null) // throw new ArgumentException(PSSR.OwningDocumentRequired, "pdfObject"); // // Type type = pdfObject.GetType(); // PdfParser parser = CreateParser(pdfObject.Document, type); // return parser.ReadObject(pdfObject, objectID, false); // } /// <summary> /// Reads an object from the PDF input stream using the default parser. /// </summary> public static PdfObject ReadObject(PdfDocument owner, PdfObjectID objectID) { if (owner == null) throw new ArgumentNullException("owner"); Parser parser = new Parser(owner); return parser.ReadObject(null, objectID, false); }
/// <summary> /// Initializes a new PdfReference instance from the specified object identifier and file position. /// </summary> public PdfReference(PdfObjectID objectID, int position) { _objectID = objectID; _position = position; #if UNIQUE_IREF && DEBUG _uid = ++s_counter; #endif }
/// <summary> /// Initializes a new PdfReference instance from the specified object identifier and file position. /// </summary> public PdfReference(PdfObjectID objectID, int position) { this.objectID = objectID; this.position = position; #if UNIQUE_IREF && DEBUG this.uid = ++PdfReference.counter; #endif }
/// <summary> /// Initializes a new PdfReference instance from the specified object identifier and file position. /// </summary> public PdfReference(PdfObjectID objectID, int position) { this.objectID = objectID; this.position = position; #if UNIQUE_IREF && DEBUG uid = ++counter; #endif }
/// <summary> /// Gets a cross reference entry from an object identifier. /// Returns null if no object with the specified ID exists in the object table. /// </summary> public PdfReference this[PdfObjectID objectID] { get { PdfReference iref; ObjectTable.TryGetValue(objectID, out iref); return(iref); } }
/// <summary> /// /// </summary> PdfTrailer ReadXRefTableAndTrailer(PdfReferenceTable xrefTable) { Debug.Assert(xrefTable != null); Symbol symbol = ScanNextToken(); // Is it an xref stream? if (symbol == Symbol.Integer) throw new PdfReaderException(PSSR.CannotHandleXRefStreams); // TODO: It is very high on the todo list, but still undone Debug.Assert(symbol == Symbol.XRef); while (true) { symbol = ScanNextToken(); if (symbol == Symbol.Integer) { int start = this.lexer.TokenToInteger; int length = ReadInteger(); for (int id = start; id < start + length; id++) { int position = ReadInteger(); int generation = ReadInteger(); ReadSymbol(Symbol.Keyword); string token = lexer.Token; // Skip start entry if (id == 0) continue; // Skip unused entries. if (token != "n") continue; // Even it is restricted, an object can exists in more than one subsection. // (PDF Reference Implementation Notes 15). PdfObjectID objectID = new PdfObjectID(id, generation); // Ignore the latter one if (xrefTable.Contains(objectID)) continue; xrefTable.Add(new PdfReference(objectID, position)); } } else if (symbol == Symbol.Trailer) { ReadSymbol(Symbol.BeginDictionary); PdfTrailer trailer = new PdfTrailer(this.document); this.ReadDictionary(trailer, false); return trailer; } else throw new PdfReaderException(PSSR.UnexpectedToken(this.lexer.Token)); } }
/// <summary> /// Set the hash key for the specified object. /// </summary> internal void SetHashKey(PdfObjectID id) { byte[] objectId = new byte[5]; _md5.Initialize(); // Split the object number and generation objectId[0] = (byte)id.ObjectNumber; objectId[1] = (byte)(id.ObjectNumber >> 8); objectId[2] = (byte)(id.ObjectNumber >> 16); objectId[3] = (byte)id.GenerationNumber; objectId[4] = (byte)(id.GenerationNumber >> 8); _md5.TransformBlock(_encryptionKey, 0, _encryptionKey.Length, _encryptionKey, 0); _md5.TransformFinalBlock(objectId, 0, objectId.Length); _key = _md5.Hash; _md5.Initialize(); _keySize = _encryptionKey.Length + 5; if (_keySize > 16) { _keySize = 16; } }
/// <summary> /// Pdf Reference 1.7, Chapter 7.6.2, Algorithm #1 /// </summary> /// <param name="id"></param> public void CreateHashKey(PdfObjectID id) { var objectId = new byte[5]; md5.Initialize(); // Split the object number and generation objectId[0] = (byte)id.ObjectNumber; objectId[1] = (byte)(id.ObjectNumber >> 8); objectId[2] = (byte)(id.ObjectNumber >> 16); objectId[3] = (byte)id.GenerationNumber; objectId[4] = (byte)(id.GenerationNumber >> 8); md5.TransformBlock(encryptionKey, 0, encryptionKey.Length, encryptionKey, 0); // ?? incomplete md5.TransformFinalBlock(objectId, 0, objectId.Length); key = md5.Hash; md5.Initialize(); keySize = encryptionKey.Length + 5; if (keySize > 16) { keySize = 16; } }
/// <summary> /// Set the hash key for the specified object. /// </summary> internal void SetHashKey(PdfObjectID id) { #if !SILVERLIGHT byte[] objectId = new byte[5]; this.md5.Initialize(); // Split the object number and generation objectId[0] = (byte)id.ObjectNumber; objectId[1] = (byte)(id.ObjectNumber >> 8); objectId[2] = (byte)(id.ObjectNumber >> 16); objectId[3] = (byte)id.GenerationNumber; objectId[4] = (byte)(id.GenerationNumber >> 8); this.md5.TransformBlock(this.encryptionKey, 0, this.encryptionKey.Length, this.encryptionKey, 0); this.md5.TransformFinalBlock(objectId, 0, objectId.Length); this.key = this.md5.Hash; this.md5.Initialize(); this.keySize = this.encryptionKey.Length + 5; if (this.keySize > 16) { this.keySize = 16; } #endif }
/// <summary> /// Set the hash key for the specified object. /// </summary> internal void SetHashKey(PdfObjectID id) { stringEncryptor.CreateHashKey(id); streamEncryptor.CreateHashKey(id); }
/// <summary> /// Opens an existing PDF document. /// </summary> public static PdfDocument Open(Stream stream, string password, PdfDocumentOpenMode openmode, PdfPasswordProvider passwordProvider) { PdfDocument document; try { Lexer lexer = new Lexer(stream); document = new PdfDocument(lexer); document._state |= DocumentState.Imported; document._openMode = openmode; document._fileSize = stream.Length; // Get file version. byte[] header = new byte[1024]; stream.Position = 0; stream.Read(header, 0, 1024); document._version = GetPdfFileVersion(header); if (document._version == 0) throw new InvalidOperationException(PSSR.InvalidPdf); document._irefTable.IsUnderConstruction = true; Parser parser = new Parser(document); // Read all trailers or cross-reference streams, but no objects. document._trailer = parser.ReadTrailer(); Debug.Assert(document._irefTable.IsUnderConstruction); document._irefTable.IsUnderConstruction = false; // Is document encrypted? PdfReference xrefEncrypt = document._trailer.Elements[PdfTrailer.Keys.Encrypt] as PdfReference; if (xrefEncrypt != null) { //xrefEncrypt.Value = parser.ReadObject(null, xrefEncrypt.ObjectID, false); PdfObject encrypt = parser.ReadObject(null, xrefEncrypt.ObjectID, false, false); encrypt.Reference = xrefEncrypt; xrefEncrypt.Value = encrypt; PdfStandardSecurityHandler securityHandler = document.SecurityHandler; TryAgain: PasswordValidity validity = securityHandler.ValidatePassword(password); if (validity == PasswordValidity.Invalid) { if (passwordProvider != null) { PdfPasswordProviderArgs args = new PdfPasswordProviderArgs(); passwordProvider(args); if (args.Abort) return null; password = args.Password; goto TryAgain; } else { if (password == null) throw new PdfReaderException(PSSR.PasswordRequired); else throw new PdfReaderException(PSSR.InvalidPassword); } } else if (validity == PasswordValidity.UserPassword && openmode == PdfDocumentOpenMode.Modify) { if (passwordProvider != null) { PdfPasswordProviderArgs args = new PdfPasswordProviderArgs(); passwordProvider(args); if (args.Abort) return null; password = args.Password; goto TryAgain; } else throw new PdfReaderException(PSSR.OwnerPasswordRequired); } } else { if (password != null) { // Password specified but document is not encrypted. // ignore } } PdfReference[] irefs2 = document._irefTable.AllReferences; int count2 = irefs2.Length; // 3rd: Create iRefs for all compressed objects. Dictionary<int, object> objectStreams = new Dictionary<int, object>(); for (int idx = 0; idx < count2; idx++) { PdfReference iref = irefs2[idx]; PdfCrossReferenceStream xrefStream = iref.Value as PdfCrossReferenceStream; if (xrefStream != null) { for (int idx2 = 0; idx2 < xrefStream.Entries.Count; idx2++) { PdfCrossReferenceStream.CrossReferenceStreamEntry item = xrefStream.Entries[idx2]; // Is type xref to compressed object? if (item.Type == 2) { //PdfReference irefNew = parser.ReadCompressedObject(new PdfObjectID((int)item.Field2), (int)item.Field3); //document._irefTable.Add(irefNew); int objectNumber = (int)item.Field2; if (!objectStreams.ContainsKey(objectNumber)) { objectStreams.Add(objectNumber, null); PdfObjectID objectID = new PdfObjectID((int)item.Field2); parser.ReadIRefsFromCompressedObject(objectID); } } } } } // 4th: Read compressed objects. for (int idx = 0; idx < count2; idx++) { PdfReference iref = irefs2[idx]; PdfCrossReferenceStream xrefStream = iref.Value as PdfCrossReferenceStream; if (xrefStream != null) { for (int idx2 = 0; idx2 < xrefStream.Entries.Count; idx2++) { PdfCrossReferenceStream.CrossReferenceStreamEntry item = xrefStream.Entries[idx2]; // Is type xref to compressed object? if (item.Type == 2) { PdfReference irefNew = parser.ReadCompressedObject(new PdfObjectID((int)item.Field2), (int)item.Field3); Debug.Assert(document._irefTable.Contains(iref.ObjectID)); //document._irefTable.Add(irefNew); } } } } PdfReference[] irefs = document._irefTable.AllReferences; int count = irefs.Length; // Read all indirect objects. for (int idx = 0; idx < count; idx++) { PdfReference iref = irefs[idx]; if (iref.Value == null) { #if DEBUG_ if (iref.ObjectNumber == 1074) iref.GetType(); #endif try { Debug.Assert(document._irefTable.Contains(iref.ObjectID)); PdfObject pdfObject = parser.ReadObject(null, iref.ObjectID, false, false); Debug.Assert(pdfObject.Reference == iref); pdfObject.Reference = iref; Debug.Assert(pdfObject.Reference.Value != null, "Something went wrong."); } catch (Exception ex) { Debug.WriteLine(ex.Message); // 4STLA rethrow exception to notify caller. throw; } } else { Debug.Assert(document._irefTable.Contains(iref.ObjectID)); //iref.GetType(); } // Set maximum object number. document._irefTable._maxObjectNumber = Math.Max(document._irefTable._maxObjectNumber, iref.ObjectNumber); } // Encrypt all objects. if (xrefEncrypt != null) { document.SecurityHandler.EncryptDocument(); } // Fix references of trailer values and then objects and irefs are consistent. document._trailer.Finish(); #if DEBUG_ // Some tests... PdfReference[] reachables = document.xrefTable.TransitiveClosure(document.trailer); reachables.GetType(); reachables = document.xrefTable.AllXRefs; document.xrefTable.CheckConsistence(); #endif if (openmode == PdfDocumentOpenMode.Modify) { // Create new or change existing document IDs. if (document.Internals.SecondDocumentID == "") document._trailer.CreateNewDocumentIDs(); else { byte[] agTemp = Guid.NewGuid().ToByteArray(); document.Internals.SecondDocumentID = PdfEncoders.RawEncoding.GetString(agTemp, 0, agTemp.Length); } // Change modification date document.Info.ModificationDate = DateTime.Now; // Remove all unreachable objects int removed = document._irefTable.Compact(); if (removed != 0) Debug.WriteLine("Number of deleted unreachable objects: " + removed); // Force flattening of page tree PdfPages pages = document.Pages; Debug.Assert(pages != null); //bool b = document.irefTable.Contains(new PdfObjectID(1108)); //b.GetType(); document._irefTable.CheckConsistence(); document._irefTable.Renumber(); document._irefTable.CheckConsistence(); } } catch (Exception ex) { Debug.WriteLine(ex.Message); throw; } return document; }
/// <summary> /// Reads the compressed object with the specified index in the object stream /// of the object with the specified object id. /// </summary> internal PdfReference ReadCompressedObject(PdfObjectID objectID, int index) { PdfReference iref; #if true Debug.Assert(_document._irefTable.ObjectTable.ContainsKey(objectID)); if (!_document._irefTable.ObjectTable.TryGetValue(objectID, out iref)) { throw new NotImplementedException("This case is not coded or something else went wrong"); } #else // We should never come here because the object stream must be a type 1 entry in the xref stream // and iref was created before. // Has the specified object already an iref in the object table? if (!_document._irefTable.ObjectTable.TryGetValue(objectID, out iref)) { try { #if true_ iref = new PdfReference(objectID,); iref.ObjectID = objectID; _document._irefTable.Add(os); #else PdfDictionary dict = (PdfDictionary)ReadObject(null, objectID, false, false); PdfObjectStream os = new PdfObjectStream(dict); iref = new PdfReference(os); iref.ObjectID = objectID; _document._irefTable.Add(os); #endif } catch (Exception ex) { Debug.WriteLine(ex.Message); throw; } } #endif // Read in object stream object when we come here for the very first time. if (iref.Value == null) { try { Debug.Assert(_document._irefTable.Contains(iref.ObjectID)); PdfDictionary pdfObject = (PdfDictionary)ReadObject(null, iref.ObjectID, false, false); PdfObjectStream objectStream = new PdfObjectStream(pdfObject); Debug.Assert(objectStream.Reference == iref); // objectStream.Reference = iref; Superfluous, see Assert in line before. Debug.Assert(objectStream.Reference.Value != null, "Something went wrong."); } catch (Exception ex) { Debug.WriteLine(ex.Message); throw; } } Debug.Assert(iref.Value != null); PdfObjectStream objectStreamStream = iref.Value as PdfObjectStream; if (objectStreamStream == null) { Debug.Assert(((PdfDictionary)iref.Value).Elements.GetName("/Type") == "/ObjStm"); objectStreamStream = new PdfObjectStream((PdfDictionary)iref.Value); Debug.Assert(objectStreamStream.Reference == iref); // objectStream.Reference = iref; Superfluous, see Assert in line before. Debug.Assert(objectStreamStream.Reference.Value != null, "Something went wrong."); } Debug.Assert(objectStreamStream != null); //PdfObjectStream objectStreamStream = (PdfObjectStream)iref.Value; if (objectStreamStream == null) throw new Exception("Something went wrong here."); return objectStreamStream.ReadCompressedObject(index); }
/// <summary> /// Reads cross reference table(s) and trailer(s). /// </summary> private PdfTrailer ReadXRefTableAndTrailer(PdfCrossReferenceTable xrefTable) { Debug.Assert(xrefTable != null); Symbol symbol = ScanNextToken(); if (symbol == Symbol.XRef) // Is it a cross-reference table? { // Reference: 3.4.3 Cross-Reference Table / Page 93 while (true) { symbol = ScanNextToken(); if (symbol == Symbol.Integer) { int start = _lexer.TokenToInteger; int length = ReadInteger(); for (int id = start; id < start + length; id++) { int position = ReadInteger(); int generation = ReadInteger(); ReadSymbol(Symbol.Keyword); string token = _lexer.Token; // Skip start entry if (id == 0) continue; // Skip unused entries. if (token != "n") continue; // Even it is restricted, an object can exists in more than one subsection. // (PDF Reference Implementation Notes 15). PdfObjectID objectID = new PdfObjectID(id, generation); // Ignore the latter one. if (xrefTable.Contains(objectID)) continue; xrefTable.Add(new PdfReference(objectID, position)); } } else if (symbol == Symbol.Trailer) { ReadSymbol(Symbol.BeginDictionary); PdfTrailer trailer = new PdfTrailer(_document); ReadDictionary(trailer, false); return trailer; } else ParserDiagnostics.HandleUnexpectedToken(_lexer.Token); } } // ReSharper disable once RedundantIfElseBlock because of code readability. else if (symbol == Symbol.Integer) // Is it an cross-reference stream? { // Reference: 3.4.7 Cross-Reference Streams / Page 93 // TODO: Handle PDF files larger than 2 GiB, see implementation note 21 in Appendix H. // The parsed integer is the object id of the cross-refernece stream. return ReadXRefStream(xrefTable); } return null; }
/// <summary> /// Indicates whether the specified object is already imported. /// </summary> public bool Contains(PdfObjectID externalID) { return(_externalIDs.ContainsKey(externalID.ToString())); }
/// <summary> /// Parses whatever comes until the specified stop symbol is reached. /// </summary> void ParseObject(Symbol stop) { #if DEBUG_ ParseObjectCounter++; Debug.WriteLine(ParseObjectCounter.ToString()); if (ParseObjectCounter == 178) { GetType(); } #endif Symbol symbol; while ((symbol = ScanNextToken()) != Symbol.Eof) { if (symbol == stop) { return; } switch (symbol) { case Symbol.Comment: // ignore comments break; case Symbol.Null: this.stack.Shift(PdfNull.Value); break; case Symbol.Boolean: this.stack.Shift(new PdfBoolean(this.lexer.TokenToBoolean)); break; case Symbol.Integer: this.stack.Shift(new PdfInteger(this.lexer.TokenToInteger)); break; case Symbol.UInteger: this.stack.Shift(new PdfUInteger(this.lexer.TokenToUInteger)); break; case Symbol.Real: this.stack.Shift(new PdfReal(this.lexer.TokenToReal)); break; case Symbol.String: //this.stack.Shift(new PdfString(this.lexer.Token, PdfStringFlags.PDFDocEncoding)); this.stack.Shift(new PdfString(this.lexer.Token, PdfStringFlags.RawEncoding)); break; case Symbol.UnicodeString: this.stack.Shift(new PdfString(this.lexer.Token, PdfStringFlags.Unicode)); break; case Symbol.HexString: this.stack.Shift(new PdfString(this.lexer.Token, PdfStringFlags.HexLiteral)); break; case Symbol.UnicodeHexString: this.stack.Shift(new PdfString(this.lexer.Token, PdfStringFlags.Unicode | PdfStringFlags.HexLiteral)); break; case Symbol.Name: this.stack.Shift(new PdfName(this.lexer.Token)); break; case Symbol.R: { Debug.Assert(this.stack.GetItem(-1) is PdfInteger && this.stack.GetItem(-2) is PdfInteger); PdfObjectID objectID = new PdfObjectID(this.stack.GetInteger(-2), this.stack.GetInteger(-1)); PdfReference iref = this.document.irefTable[objectID]; if (iref == null) { // If a document has more than one PdfXRefTable it is possible that the first trailer has // indirect references to objects whos iref entry is not yet read in. if (this.document.irefTable.IsUnderConstruction) { // XRefTable not complete when trailer is read. Create temporary irefs that are // removed later in PdfTrailer.FixXRefs. iref = new PdfReference(objectID, 0); this.stack.Reduce(iref, 2); break; } // PDF Reference section 3.2.9: // An indirect reference to an undefined object is not an error; // it is simply treated as a reference to the null object. this.stack.Reduce(PdfNull.Value, 2); // Let's see what null objects are good for... //Debug.Assert(false, "Null object detected!"); //this.stack.Reduce(PdfNull.Value, 2); } else { this.stack.Reduce(iref, 2); } break; } case Symbol.BeginArray: PdfArray array = new PdfArray(this.document); ReadArray(array, false); this.stack.Shift(array); break; case Symbol.BeginDictionary: PdfDictionary dict = new PdfDictionary(this.document); ReadDictionary(dict, false); this.stack.Shift(dict); break; case Symbol.BeginStream: throw new NotImplementedException(); default: string error = this.lexer.Token; Debug.Assert(false, "Unexpected: " + error); break; } } throw new PdfReaderException("Unexpected end of file."); }
/// <summary> /// Sets PDF input stream position to the specified object. /// </summary> public int MoveToObject(PdfObjectID objectID) { int position = _document._irefTable[objectID].Position; return _lexer.Position = position; }
/// <summary> /// Adds a cloned object to this table. /// </summary> /// <param name="externalID">The object identifier in the foreign object.</param> /// <param name="iref">The cross reference to the clone of the foreign object, which belongs to /// this document. In general the clone has a different object identifier.</param> public void Add(PdfObjectID externalID, PdfReference iref) { _externalIDs[externalID.ToString()] = iref; }
/// <summary> /// Reads the compressed object with the specified number at the given offset. /// The parser must be initialized with the stream an object stream object. /// </summary> internal PdfReference ReadCompressedObject(int objectNumber, int offset) { #if DEBUG__ if (objectNumber == 1034) GetType(); #endif // Generation is always 0 for compressed objects. PdfObjectID objectID = new PdfObjectID(objectNumber); _lexer.Position = offset; PdfObject obj = ReadObject(null, objectID, false, true); return obj.Reference; }
/// <summary> /// Indicates whether the specified object is already imported. /// </summary> public bool Contains(PdfObjectID externalID) { return this.externalIDs.Contains(externalID.ToString()); }
/// <summary> /// Gets the cloned object that corresponds to the specified external identifier. /// </summary> public PdfReference this[PdfObjectID externalID] { get { return (PdfReference)this.externalIDs[externalID.ToString()]; } }
/// <summary> /// Adds a cloned object to this table. /// </summary> /// <param name="externalID">The object identifier in the forein object.</param> /// <param name="iref">The cross reference to the clone of the forein object, which belongs to /// this document. In general the clone has a different object identifier.</param> public void Add(PdfObjectID externalID, PdfReference iref) { this.externalIDs[externalID.ToString()] = iref; }
/// <summary> /// Gets the cloned object that corresponds to the specified external identifier. /// </summary> public PdfReference this[PdfObjectID externalID] => (PdfReference)externalIDs[externalID.ToString()];
public PositionNotFoundException(PdfObjectID id) : base($"Object with ID {id} resolved with negative position ") { }
/// <summary> /// Opens an existing PDF document asynchronously. /// </summary> public static async Task <PdfDocument> OpenAsync( Stream stream, string password = null, PdfDocumentOpenMode openmode = PdfDocumentOpenMode.Modify, PdfPasswordProvider passwordProvider = null) { PdfDocument document; #if !DEBUG try #endif { Lexer lexer = new Lexer(stream); document = new PdfDocument(lexer); document._state |= DocumentState.Imported; document._openMode = openmode; document._fileSize = stream.Length; // Get file version. byte[] header = new byte[1024]; stream.Position = 0; stream.Read(header, 0, 1024); document._version = GetPdfFileVersion(header); if (document._version == 0) { throw new InvalidOperationException(PSSR.InvalidPdf); } document._irefTable.IsUnderConstruction = true; Parser parser = new Parser(document); // Read all trailers or cross-reference streams, but no objects. document._trailer = await parser.ReadTrailerAsync(); if (document._trailer == null) { ParserDiagnostics.ThrowParserException("Invalid PDF file: no trailer found."); // TODO L10N using PSSR. } Debug.Assert(document._irefTable.IsUnderConstruction); document._irefTable.IsUnderConstruction = false; // Is document encrypted? PdfReference xrefEncrypt = document._trailer.Elements[PdfTrailer.Keys.Encrypt] as PdfReference; if (xrefEncrypt != null) { PdfObject encrypt = await parser.ReadObjectAsync(null, xrefEncrypt.ObjectID, false, false); encrypt.Reference = xrefEncrypt; xrefEncrypt.Value = encrypt; PdfStandardSecurityHandler securityHandler = document.SecurityHandler; TryAgain: PasswordValidity validity = securityHandler.ValidatePassword(password); if (validity == PasswordValidity.Invalid) { if (passwordProvider != null) { PdfPasswordProviderArgs args = new PdfPasswordProviderArgs(); passwordProvider(args); if (args.Abort) { return(null); } password = args.Password; goto TryAgain; } else { if (password == null) { throw new PdfReaderException(PSSR.PasswordRequired); } else { throw new PdfReaderException(PSSR.InvalidPassword); } } } else if (validity == PasswordValidity.UserPassword && openmode == PdfDocumentOpenMode.Modify) { if (passwordProvider != null) { PdfPasswordProviderArgs args = new PdfPasswordProviderArgs(); passwordProvider(args); if (args.Abort) { return(null); } password = args.Password; goto TryAgain; } else { throw new PdfReaderException(PSSR.OwnerPasswordRequired); } } } else { if (password != null) { // Password specified but document is not encrypted. // ignore } } PdfReference[] irefs2 = document._irefTable.AllReferences; int count2 = irefs2.Length; // 3rd: Create iRefs for all compressed objects. Dictionary <int, object> objectStreams = new Dictionary <int, object>(); for (int idx = 0; idx < count2; idx++) { PdfReference iref = irefs2[idx]; if (iref.Value is PdfCrossReferenceStream xrefStream) { for (int idx2 = 0; idx2 < xrefStream.Entries.Count; idx2++) { PdfCrossReferenceStream.CrossReferenceStreamEntry item = xrefStream.Entries[idx2]; // Is type xref to compressed object? if (item.Type == 2) { //PdfReference irefNew = parser.ReadCompressedObject(new PdfObjectID((int)item.Field2), (int)item.Field3); //document._irefTable.Add(irefNew); int objectNumber = (int)item.Field2; if (!objectStreams.ContainsKey(objectNumber)) { objectStreams.Add(objectNumber, null); PdfObjectID objectID = new PdfObjectID((int)item.Field2); parser.ReadIRefsFromCompressedObject(objectID); } } } } } // 4th: Read compressed objects. for (int idx = 0; idx < count2; idx++) { PdfReference iref = irefs2[idx]; if (iref.Value is PdfCrossReferenceStream xrefStream) { for (int idx2 = 0; idx2 < xrefStream.Entries.Count; idx2++) { PdfCrossReferenceStream.CrossReferenceStreamEntry item = xrefStream.Entries[idx2]; // Is type xref to compressed object? if (item.Type == 2) { PdfReference irefNew = parser.ReadCompressedObject(new PdfObjectID((int)item.Field2), (int)item.Field3); Debug.Assert(document._irefTable.Contains(iref.ObjectID)); //document._irefTable.Add(irefNew); } } } } PdfReference[] irefs = document._irefTable.AllReferences; int count = irefs.Length; // Read all indirect objects. for (int idx = 0; idx < count; idx++) { PdfReference iref = irefs[idx]; if (iref.Value == null) { #if DEBUG_ if (iref.ObjectNumber == 1074) { iref.GetType(); } #endif try { Debug.Assert(document._irefTable.Contains(iref.ObjectID)); PdfObject pdfObject = await parser.ReadObjectAsync(null, iref.ObjectID, false, false); Debug.Assert(pdfObject.Reference == iref); pdfObject.Reference = iref; Debug.Assert(pdfObject.Reference.Value != null, "Something went wrong."); } catch (Exception ex) { Debug.WriteLine(ex.Message); // 4STLA rethrow exception to notify caller. throw; } } else { Debug.Assert(document._irefTable.Contains(iref.ObjectID)); //iref.GetType(); } // Set maximum object number. document._irefTable._maxObjectNumber = Math.Max(document._irefTable._maxObjectNumber, iref.ObjectNumber); } // Decrypt all objects. if (xrefEncrypt != null) { document.SecurityHandler.DecryptDocument(); } // Fix references of trailer values and then objects and irefs are consistent. document._trailer.Finish(); #if DEBUG_ // Some tests... PdfReference[] reachables = document.xrefTable.TransitiveClosure(document.trailer); reachables.GetType(); reachables = document.xrefTable.AllXRefs; document.xrefTable.CheckConsistence(); #endif if (openmode == PdfDocumentOpenMode.Modify) { // Create new or change existing document IDs. if (document.Internals.SecondDocumentID == "") { document._trailer.CreateNewDocumentIDs(); } else { byte[] agTemp = Guid.NewGuid().ToByteArray(); document.Internals.SecondDocumentID = PdfEncoders.RawEncoding.GetString(agTemp, 0, agTemp.Length); } // Change modification date document.Info.ModificationDate = DateTime.Now; // Remove all unreachable objects int removed = document._irefTable.Compact(); if (removed != 0) { Debug.WriteLine("Number of deleted unreachable objects: " + removed); } // Force flattening of page tree PdfPages pages = document.Pages; Debug.Assert(pages != null); //bool b = document.irefTable.Contains(new PdfObjectID(1108)); //b.GetType(); document._irefTable.CheckConsistence(); document._irefTable.Renumber(); document._irefTable.CheckConsistence(); } } #if !DEBUG catch (Exception ex) { Debug.WriteLine(ex.Message); throw; } #endif return(document); }
/// <summary> /// Gets the cloned object that corresponds to the specified external identifier. /// </summary> public PdfReference this[PdfObjectID externalID] { get { return(_externalIDs[externalID.ToString()]); } }
// /// <summary> // /// Reads a real value directly or (optionally) indirectly from the PDF data stream. // /// </summary> // double ReadReal(bool canBeIndirect) // { // Symbol symbol = lexer.ScanNextToken(canBeIndirect); // if (symbol == Symbol.Real || symbol == Symbol.Integer) // return lexer.TokenToReal; // else if (symbol == Symbol.R) // { // int position = lexer.Position; //// MoveToObject(lexer.Token); // ReadObjectID(null); // double f = ReadReal(); // ReadSymbol(Symbol.EndObj); // lexer.Position = position; // return f; // } // thr ow new PdfReaderException(PSSR.UnexpectedToken(lexer.Token)); // } // // double ReadReal() // { // return ReadReal(false); // } // /// <summary> // /// Reads an object from the PDF input stream. If the object has a specialized parser, it it used. // /// </summary> // public static PdfObject ReadObject(PdfObject pdfObject, PdfObjectID objectID) // { // if (pdfObject == null) // thr ow new ArgumentNullException("pdfObject"); // if (pdfObject.Document == null) // th row new ArgumentException(PSSR.OwningDocumentRequired, "pdfObject"); // // Type type = pdfObject.GetType(); // PdfParser parser = CreateParser(pdfObject.Document, type); // return parser.ReadObject(pdfObject, objectID, false); // } /// <summary> /// Reads an object from the PDF input stream using the default parser. /// </summary> public static PdfObject ReadObject(PdfDocument owner, PdfObjectID objectID) { if (owner == null) throw new ArgumentNullException("owner"); Parser parser = new Parser(owner); return parser.ReadObject(null, objectID, false, false); }
/// <summary> /// Reads PDF object from input stream. /// </summary> /// <param name="pdfObject">Either the instance of a derived type or null. If it is null /// an appropriate object is created.</param> /// <param name="objectID">The address of the object.</param> /// <param name="includeReferences">If true, specifies that all indirect objects /// are included recursively.</param> public PdfObject ReadObject(PdfObject pdfObject, PdfObjectID objectID, bool includeReferences) { MoveToObject(objectID); int objectNumber = ReadInteger(); int generationNumber = ReadInteger(); #if DEBUG // The following assertion sometime failed (see below) //Debug.Assert(objectID == new PdfObjectID(objectNumber, generationNumber)); if (objectID != new PdfObjectID(objectNumber, generationNumber)) { // A special kind of bug? Or is this an undocumented PDF feature? // PDF4NET 2.6 provides a sample called 'Unicode', which produces a file 'unicode.pdf' // The iref table of this file contains the following entries: // iref // 0 148 // 0000000000 65535 f // 0000000015 00000 n // 0000000346 00000 n // .... // 0000083236 00000 n // 0000083045 00000 n // 0000083045 00000 n // 0000083045 00000 n // 0000083045 00000 n // 0000080334 00000 n // .... // Object 84, 85, 86, and 87 maps to the same dictionary, but all PDF readers I tested // ignores this mismatch! The following assertion failed about 50 times with this file. #if true_ string message = String.Format("xref entry {0} {1} maps to object {2} {3}.", objectID.ObjectNumber, objectID.GenerationNumber, objectNumber, generationNumber); Debug.Assert(false, message); #endif } #endif // Always use object ID from iref table (see above) objectNumber = objectID.ObjectNumber; generationNumber = objectID.GenerationNumber; #if true_ Debug.WriteLine(String.Format("obj: {0} {1}", objectNumber, generationNumber)); #endif ReadSymbol(Symbol.Obj); bool checkForStream = false; Symbol symbol = ScanNextToken(); switch (symbol) { case Symbol.BeginArray: PdfArray array; if (pdfObject == null) { array = new PdfArray(this.document); } else { array = (PdfArray)pdfObject; } //PdfObject.RegisterObject(array, objectID, generation); pdfObject = ReadArray(array, includeReferences); pdfObject.SetObjectID(objectNumber, generationNumber); break; case Symbol.BeginDictionary: PdfDictionary dict; if (pdfObject == null) { dict = new PdfDictionary(this.document); } else { dict = (PdfDictionary)pdfObject; } //PdfObject.RegisterObject(dict, objectID, generation); checkForStream = true; pdfObject = ReadDictionary(dict, includeReferences); pdfObject.SetObjectID(objectNumber, generationNumber); break; // Acrobat 6 Professional proudly presents: The Null object! // Even with a one-digit object number an indirect reference �x 0 R� to this object is // one character larger than the direct use of �null�. Probable this is the reason why // it is true that Acrobat Web Capture 6.0 creates this object, but obviously never // creates a reference to it! case Symbol.Null: pdfObject = new PdfNullObject(this.document); pdfObject.SetObjectID(objectNumber, generationNumber); ReadSymbol(Symbol.EndObj); return(pdfObject); case Symbol.Boolean: pdfObject = new PdfBooleanObject(this.document, string.Compare(this.lexer.Token, Boolean.TrueString, true) == 0); //!!!mod THHO 19.11.09 pdfObject.SetObjectID(objectNumber, generationNumber); ReadSymbol(Symbol.EndObj); return(pdfObject); case Symbol.Integer: pdfObject = new PdfIntegerObject(this.document, this.lexer.TokenToInteger); pdfObject.SetObjectID(objectNumber, generationNumber); ReadSymbol(Symbol.EndObj); return(pdfObject); case Symbol.UInteger: pdfObject = new PdfUIntegerObject(this.document, this.lexer.TokenToUInteger); pdfObject.SetObjectID(objectNumber, generationNumber); ReadSymbol(Symbol.EndObj); return(pdfObject); case Symbol.Real: pdfObject = new PdfRealObject(this.document, this.lexer.TokenToReal); pdfObject.SetObjectID(objectNumber, generationNumber); ReadSymbol(Symbol.EndObj); return(pdfObject); case Symbol.String: pdfObject = new PdfStringObject(this.document, this.lexer.Token); pdfObject.SetObjectID(objectNumber, generationNumber); ReadSymbol(Symbol.EndObj); return(pdfObject); case Symbol.Name: pdfObject = new PdfNameObject(this.document, this.lexer.Token); pdfObject.SetObjectID(objectNumber, generationNumber); ReadSymbol(Symbol.EndObj); return(pdfObject); case Symbol.Keyword: // Should not come here anymore throw new NotImplementedException("Keyword"); default: // Should not come here anymore throw new NotImplementedException("unknown token \"" + symbol + "\""); } symbol = ScanNextToken(); if (symbol == Symbol.BeginStream) { PdfDictionary dict = (PdfDictionary)pdfObject; Debug.Assert(checkForStream, "Unexpected stream..."); int length = GetStreamLength(dict); byte[] bytes = this.lexer.ReadStream(length); #if true_ if (dict.Elements.GetString("/Filter") == "/FlateDecode") { if (dict.Elements["/Subtype"] == null) { try { byte[] decoded = Filtering.FlateDecode.Decode(bytes); if (decoded.Length == 0) { goto End; } string pageContent = Filtering.FlateDecode.DecodeToString(bytes); if (pageContent.Length > 100) { pageContent = pageContent.Substring(pageContent.Length - 100); } pageContent.GetType(); bytes = decoded; dict.Elements.Remove("/Filter"); dict.Elements.SetInteger("/Length", bytes.Length); } catch { } } End :; } #endif PdfDictionary.PdfStream stream = new PdfDictionary.PdfStream(bytes, dict); dict.Stream = stream; ReadSymbol(Symbol.EndStream); symbol = ScanNextToken(); } if (symbol != Symbol.EndObj) { throw new PdfReaderException(PSSR.UnexpectedToken(this.lexer.Token)); } return(pdfObject); }
/// <summary> /// Parses whatever comes until the specified stop symbol is reached. /// </summary> private void ParseObject(Symbol stop) { #if DEBUG_ ParseObjectCounter++; Debug.WriteLine(ParseObjectCounter.ToString()); if (ParseObjectCounter == 178) GetType(); #endif Symbol symbol; while ((symbol = ScanNextToken()) != Symbol.Eof) { if (symbol == stop) return; switch (symbol) { case Symbol.Comment: // ignore comments break; case Symbol.Null: _stack.Shift(PdfNull.Value); break; case Symbol.Boolean: _stack.Shift(new PdfBoolean(_lexer.TokenToBoolean)); break; case Symbol.Integer: _stack.Shift(new PdfInteger(_lexer.TokenToInteger)); break; case Symbol.UInteger: _stack.Shift(new PdfUInteger(_lexer.TokenToUInteger)); break; case Symbol.Real: _stack.Shift(new PdfReal(_lexer.TokenToReal)); break; case Symbol.String: //stack.Shift(new PdfString(lexer.Token, PdfStringFlags.PDFDocEncoding)); _stack.Shift(new PdfString(_lexer.Token, PdfStringFlags.RawEncoding)); break; case Symbol.UnicodeString: _stack.Shift(new PdfString(_lexer.Token, PdfStringFlags.Unicode)); break; case Symbol.HexString: _stack.Shift(new PdfString(_lexer.Token, PdfStringFlags.HexLiteral)); break; case Symbol.UnicodeHexString: _stack.Shift(new PdfString(_lexer.Token, PdfStringFlags.Unicode | PdfStringFlags.HexLiteral)); break; case Symbol.Name: _stack.Shift(new PdfName(_lexer.Token)); break; case Symbol.R: { Debug.Assert(_stack.GetItem(-1) is PdfInteger && _stack.GetItem(-2) is PdfInteger); PdfObjectID objectID = new PdfObjectID(_stack.GetInteger(-2), _stack.GetInteger(-1)); PdfReference iref = _document._irefTable[objectID]; if (iref == null) { // If a document has more than one PdfXRefTable it is possible that the first trailer has // indirect references to objects whos iref entry is not yet read in. if (_document._irefTable.IsUnderConstruction) { // XRefTable not complete when trailer is read. Create temporary irefs that are // removed later in PdfTrailer.FixXRefs. iref = new PdfReference(objectID, 0); _stack.Reduce(iref, 2); break; } // PDF Reference section 3.2.9: // An indirect reference to an undefined object is not an error; // it is simply treated as a reference to the null object. _stack.Reduce(PdfNull.Value, 2); // Let's see what null objects are good for... //Debug.Assert(false, "Null object detected!"); //stack.Reduce(PdfNull.Value, 2); } else _stack.Reduce(iref, 2); break; } case Symbol.BeginArray: PdfArray array = new PdfArray(_document); ReadArray(array, false); _stack.Shift(array); break; case Symbol.BeginDictionary: PdfDictionary dict = new PdfDictionary(_document); ReadDictionary(dict, false); _stack.Shift(dict); break; case Symbol.BeginStream: throw new NotImplementedException(); // Not expected here: //case Symbol.None: //case Symbol.Keyword: //case Symbol.EndStream: //case Symbol.EndArray: //case Symbol.EndDictionary: //case Symbol.Obj: //case Symbol.EndObj: //case Symbol.XRef: //case Symbol.Trailer: //case Symbol.StartXRef: //case Symbol.Eof: default: ParserDiagnostics.HandleUnexpectedToken(_lexer.Token); SkipCharsUntil(stop); return; } } ParserDiagnostics.ThrowParserException("Unexpected end of file."); }
/// <summary> /// Sets PDF input stream position to the specified object. /// </summary> public int MoveToObject(PdfObjectID objectID) { int position = this.document.irefTable[objectID].Position; return(this.lexer.Position = position); }
/// <summary> /// Reads the irefs from the compressed object with the specified index in the object stream /// of the object with the specified object id. /// </summary> internal void ReadIRefsFromCompressedObject(PdfObjectID objectID) { PdfReference iref; Debug.Assert(_document._irefTable.ObjectTable.ContainsKey(objectID)); if (!_document._irefTable.ObjectTable.TryGetValue(objectID, out iref)) { // We should never come here because the object stream must be a type 1 entry in the xref stream // and iref was created before. throw new NotImplementedException("This case is not coded or something else went wrong"); } // Read in object stream object when we come here for the very first time. if (iref.Value == null) { try { Debug.Assert(_document._irefTable.Contains(iref.ObjectID)); PdfDictionary pdfObject = (PdfDictionary)ReadObject(null, iref.ObjectID, false, false); PdfObjectStream objectStream = new PdfObjectStream(pdfObject); Debug.Assert(objectStream.Reference == iref); // objectStream.Reference = iref; Superfluous, see Assert in line before. Debug.Assert(objectStream.Reference.Value != null, "Something went wrong."); } catch (Exception ex) { Debug.WriteLine(ex.Message); throw; } } Debug.Assert(iref.Value != null); PdfObjectStream objectStreamStream = iref.Value as PdfObjectStream; if (objectStreamStream == null) { Debug.Assert(((PdfDictionary)iref.Value).Elements.GetName("/Type") == "/ObjStm"); objectStreamStream = new PdfObjectStream((PdfDictionary)iref.Value); Debug.Assert(objectStreamStream.Reference == iref); // objectStream.Reference = iref; Superfluous, see Assert in line before. Debug.Assert(objectStreamStream.Reference.Value != null, "Something went wrong."); } Debug.Assert(objectStreamStream != null); //PdfObjectStream objectStreamStream = (PdfObjectStream)iref.Value; if (objectStreamStream == null) throw new Exception("Something went wrong here."); objectStreamStream.ReadReferences(_document._irefTable); }
/// <summary> /// Returns the object with the specified Identifier, or null, if no such object exists. /// </summary> public PdfObject GetObject(PdfObjectID objectID) { return(_document._irefTable[objectID].Value); }
/// <summary> /// Indicates whether the specified object identifier is in the table. /// </summary> public bool Contains(PdfObjectID objectID) { return ObjectTable.ContainsKey(objectID); }
/// <summary> /// Reads PDF object from input stream. /// </summary> /// <param name="pdfObject">Either the instance of a derived type or null. If it is null /// an appropriate object is created.</param> /// <param name="objectID">The address of the object.</param> /// <param name="includeReferences">If true, specifies that all indirect objects /// are included recursively.</param> /// <param name="fromObjecStream">If true, the objects is parsed from an object stream.</param> public PdfObject ReadObject(PdfObject pdfObject, PdfObjectID objectID, bool includeReferences, bool fromObjecStream) { #if DEBUG_ Debug.WriteLine("ReadObject: " + objectID); if (objectID.ObjectNumber == 20) GetType(); #endif int objectNumber = objectID.ObjectNumber; int generationNumber = objectID.GenerationNumber; if (!fromObjecStream) { MoveToObject(objectID); objectNumber = ReadInteger(); generationNumber = ReadInteger(); } #if DEBUG // The following assertion sometime failed (see below) //Debug.Assert(objectID == new PdfObjectID(objectNumber, generationNumber)); if (!fromObjecStream && objectID != new PdfObjectID(objectNumber, generationNumber)) { // A special kind of bug? Or is this an undocumented PDF feature? // PDF4NET 2.6 provides a sample called 'Unicode', which produces a file 'unicode.pdf' // The iref table of this file contains the following entries: // iref // 0 148 // 0000000000 65535 f // 0000000015 00000 n // 0000000346 00000 n // .... // 0000083236 00000 n // 0000083045 00000 n // 0000083045 00000 n // 0000083045 00000 n // 0000083045 00000 n // 0000080334 00000 n // .... // Object 84, 85, 86, and 87 maps to the same dictionary, but all PDF readers I tested // ignores this mismatch! The following assertion failed about 50 times with this file. #if true_ string message = String.Format("xref entry {0} {1} maps to object {2} {3}.", objectID.ObjectNumber, objectID.GenerationNumber, objectNumber, generationNumber); Debug.Assert(false, message); #endif } #endif // Always use object ID from iref table (see above). objectNumber = objectID.ObjectNumber; generationNumber = objectID.GenerationNumber; #if true_ Debug.WriteLine(String.Format("obj: {0} {1}", objectNumber, generationNumber)); #endif if (!fromObjecStream) ReadSymbol(Symbol.Obj); bool checkForStream = false; Symbol symbol = ScanNextToken(); switch (symbol) { case Symbol.BeginArray: PdfArray array; if (pdfObject == null) array = new PdfArray(_document); else array = (PdfArray)pdfObject; //PdfObject.RegisterObject(array, objectID, generation); pdfObject = ReadArray(array, includeReferences); pdfObject.SetObjectID(objectNumber, generationNumber); break; case Symbol.BeginDictionary: PdfDictionary dict; if (pdfObject == null) dict = new PdfDictionary(_document); else dict = (PdfDictionary)pdfObject; //PdfObject.RegisterObject(dict, objectID, generation); checkForStream = true; pdfObject = ReadDictionary(dict, includeReferences); pdfObject.SetObjectID(objectNumber, generationNumber); break; // Acrobat 6 Professional proudly presents: The Null object! // Even with a one-digit object number an indirect reference «x 0 R» to this object is // one character larger than the direct use of «null». Probable this is the reason why // it is true that Acrobat Web Capture 6.0 creates this object, but obviously never // creates a reference to it! case Symbol.Null: pdfObject = new PdfNullObject(_document); pdfObject.SetObjectID(objectNumber, generationNumber); if (!fromObjecStream) ReadSymbol(Symbol.EndObj); return pdfObject; case Symbol.Boolean: pdfObject = new PdfBooleanObject(_document, String.Compare(_lexer.Token, Boolean.TrueString, StringComparison.OrdinalIgnoreCase) == 0); pdfObject.SetObjectID(objectNumber, generationNumber); if (!fromObjecStream) ReadSymbol(Symbol.EndObj); return pdfObject; case Symbol.Integer: pdfObject = new PdfIntegerObject(_document, _lexer.TokenToInteger); pdfObject.SetObjectID(objectNumber, generationNumber); if (!fromObjecStream) ReadSymbol(Symbol.EndObj); return pdfObject; case Symbol.UInteger: pdfObject = new PdfUIntegerObject(_document, _lexer.TokenToUInteger); pdfObject.SetObjectID(objectNumber, generationNumber); if (!fromObjecStream) ReadSymbol(Symbol.EndObj); return pdfObject; case Symbol.Real: pdfObject = new PdfRealObject(_document, _lexer.TokenToReal); pdfObject.SetObjectID(objectNumber, generationNumber); if (!fromObjecStream) ReadSymbol(Symbol.EndObj); return pdfObject; case Symbol.String: pdfObject = new PdfStringObject(_document, _lexer.Token); pdfObject.SetObjectID(objectNumber, generationNumber); if (!fromObjecStream) ReadSymbol(Symbol.EndObj); return pdfObject; case Symbol.Name: pdfObject = new PdfNameObject(_document, _lexer.Token); pdfObject.SetObjectID(objectNumber, generationNumber); if (!fromObjecStream) ReadSymbol(Symbol.EndObj); return pdfObject; case Symbol.Keyword: // Should not come here anymore. ParserDiagnostics.HandleUnexpectedToken(_lexer.Token); break; default: // Should not come here anymore. ParserDiagnostics.HandleUnexpectedToken(_lexer.Token); break; } symbol = ScanNextToken(); if (symbol == Symbol.BeginStream) { PdfDictionary dict = (PdfDictionary)pdfObject; Debug.Assert(checkForStream, "Unexpected stream..."); #if true_ ReadStream(dict); #else int length = GetStreamLength(dict); byte[] bytes = _lexer.ReadStream(length); #if true_ if (dict.Elements.GetString("/Filter") == "/FlateDecode") { if (dict.Elements["/Subtype"] == null) { try { byte[] decoded = Filtering.FlateDecode.Decode(bytes); if (decoded.Length == 0) goto End; string pageContent = Filtering.FlateDecode.DecodeToString(bytes); if (pageContent.Length > 100) pageContent = pageContent.Substring(pageContent.Length - 100); pageContent.GetType(); bytes = decoded; dict.Elements.Remove("/Filter"); dict.Elements.SetInteger("/Length", bytes.Length); } catch { } } End: ; } #endif PdfDictionary.PdfStream stream = new PdfDictionary.PdfStream(bytes, dict); dict.Stream = stream; ReadSymbol(Symbol.EndStream); symbol = ScanNextToken(); #endif } if (!fromObjecStream && symbol != Symbol.EndObj) ParserDiagnostics.ThrowParserException(PSSR.UnexpectedToken(_lexer.Token)); return pdfObject; }
/// <summary> /// Reads cross reference stream(s). /// </summary> private PdfTrailer ReadXRefStream(PdfCrossReferenceTable xrefTable) { // Read cross reference stream. //Debug.Assert(_lexer.Symbol == Symbol.Integer); int number = _lexer.TokenToInteger; int generation = ReadInteger(); Debug.Assert(generation == 0); ReadSymbol(Symbol.Obj); ReadSymbol(Symbol.BeginDictionary); PdfObjectID objectID = new PdfObjectID(number, generation); PdfCrossReferenceStream xrefStream = new PdfCrossReferenceStream(_document); ReadDictionary(xrefStream, false); ReadSymbol(Symbol.BeginStream); ReadStream(xrefStream); //xrefTable.Add(new PdfReference(objectID, position)); PdfReference iref = new PdfReference(xrefStream); iref.ObjectID = objectID; iref.Value = xrefStream; xrefTable.Add(iref); Debug.Assert(xrefStream.Stream != null); //string sValue = new RawEncoding().GetString(xrefStream.Stream.UnfilteredValue,); //sValue.GetType(); byte[] bytesRaw = xrefStream.Stream.UnfilteredValue; byte[] bytes = bytesRaw; // HACK: Should be done in UnfilteredValue. if (xrefStream.Stream.HasDecodeParams) { int predictor = xrefStream.Stream.DecodePredictor; int columns = xrefStream.Stream.DecodeColumns; bytes = DecodeCrossReferenceStream(bytesRaw, columns, predictor); } #if DEBUG_ for (int idx = 0; idx < bytes.Length; idx++) { if (idx % 4 == 0) Console.WriteLine(); Console.Write("{0:000} ", (int)bytes[idx]); } Console.WriteLine(); #endif // bytes.GetType(); // Add to table. // xrefTable.Add(new PdfReference(objectID, -1)); int size = xrefStream.Elements.GetInteger(PdfCrossReferenceStream.Keys.Size); PdfArray index = xrefStream.Elements.GetValue(PdfCrossReferenceStream.Keys.Index) as PdfArray; int prev = xrefStream.Elements.GetInteger(PdfCrossReferenceStream.Keys.Prev); PdfArray w = (PdfArray)xrefStream.Elements.GetValue(PdfCrossReferenceStream.Keys.W); // E.g.: W[1 2 1] ¤ Index[7 12] ¤ Size 19 // Setup subsections. int subsectionCount; int[][] subsections = null; int subsectionEntryCount = 0; if (index == null) { // Setup with default values. subsectionCount = 1; subsections = new int[subsectionCount][]; subsections[0] = new int[] { 0, size }; // HACK: What is size? Contratiction in PDF reference. subsectionEntryCount = size; } else { // Read subsections from array. Debug.Assert(index.Elements.Count % 2 == 0); subsectionCount = index.Elements.Count / 2; subsections = new int[subsectionCount][]; for (int idx = 0; idx < subsectionCount; idx++) { subsections[idx] = new int[] { index.Elements.GetInteger(2 * idx), index.Elements.GetInteger(2 * idx + 1) }; subsectionEntryCount += subsections[idx][1]; } } // W key. Debug.Assert(w.Elements.Count == 3); int[] wsize = { w.Elements.GetInteger(0), w.Elements.GetInteger(1), w.Elements.GetInteger(2) }; int wsum = StreamHelper.WSize(wsize); if (wsum * subsectionEntryCount != bytes.Length) GetType(); Debug.Assert(wsum * subsectionEntryCount == bytes.Length, "Check implementation here."); int testcount = subsections[0][1]; int[] currentSubsection = subsections[0]; #if DEBUG && CORE if (PdfDiagnostics.TraceXrefStreams) { for (int idx = 0; idx < testcount; idx++) { uint field1 = StreamHelper.ReadBytes(bytes, idx * wsum, wsize[0]); uint field2 = StreamHelper.ReadBytes(bytes, idx * wsum + wsize[0], wsize[1]); uint field3 = StreamHelper.ReadBytes(bytes, idx * wsum + wsize[0] + wsize[1], wsize[2]); string res = String.Format("{0,2:00}: {1} {2,5} {3} // ", idx, field1, field2, field3); switch (field1) { case 0: res += "Fee list: object number, generation number"; break; case 1: res += "Not compresed: offset, generation number"; break; case 2: res += "Compressed: object stream object number, index in stream"; break; default: res += "??? Type undefined"; break; } Debug.WriteLine(res); } } #endif int index2 = -1; for (int ssc = 0; ssc < subsectionCount; ssc++) { int abc = subsections[ssc][1]; for (int idx = 0; idx < abc; idx++) { index2++; PdfCrossReferenceStream.CrossReferenceStreamEntry item = new PdfCrossReferenceStream.CrossReferenceStreamEntry(); item.Type = StreamHelper.ReadBytes(bytes, index2 * wsum, wsize[0]); item.Field2 = StreamHelper.ReadBytes(bytes, index2 * wsum + wsize[0], wsize[1]); item.Field3 = StreamHelper.ReadBytes(bytes, index2 * wsum + wsize[0] + wsize[1], wsize[2]); xrefStream.Entries.Add(item); switch (item.Type) { case 0: // Nothing to do, not needed. break; case 1: // offset / generation number //// Even it is restricted, an object can exists in more than one subsection. //// (PDF Reference Implementation Notes 15). int position = (int)item.Field2; objectID = ReadObjectNumber(position); #if DEBUG if (objectID.ObjectNumber == 1074) GetType(); #endif Debug.Assert(objectID.GenerationNumber == item.Field3); //// Ignore the latter one. if (!xrefTable.Contains(objectID)) { #if DEBUG GetType(); #endif // Add iref for all uncrompressed objects. xrefTable.Add(new PdfReference(objectID, position)); } break; case 2: // Nothing to do yet. break; } } } return xrefStream; }
/// <summary> /// Indicates whether the specified object identifier is in the table. /// </summary> public bool Contains(PdfObjectID objectID) { return(ObjectTable.ContainsKey(objectID)); }
/// <summary> /// Gets a cross reference entry from an object identifier. /// Returns null if no object with the specified ID exists in the object table. /// </summary> public PdfReference this[PdfObjectID objectID] { get { PdfReference iref; ObjectTable.TryGetValue(objectID, out iref); return iref; } }