protected override void WriteStandard() { // 1. Header [PDF:1.6:3.4.1]. WriteHeader(); // 2. Body [PDF:1.6:3.4.2]. int xrefSize = file.IndirectObjects.Count; StringBuilder xrefBuilder = new StringBuilder(XRefChunk); { /* * NOTE: A standard xref table comprises just one section composed by just one subsection. * NOTE: As xref-table free entries MUST be arrayed as a linked list, * it's needed to cache intermingled in-use entries in order to properly render * the object number of the next free entry inside the previous one. */ AppendXRefSubsectionIndexer(xrefBuilder, 0, xrefSize); StringBuilder xrefInUseBlockBuilder = new StringBuilder(); IndirectObjects indirectObjects = file.IndirectObjects; PdfReference freeReference = indirectObjects[0].Reference; // Initialized to the first free entry. for (int index = 1; index < xrefSize; index++) { // Current entry insertion. PdfIndirectObject indirectObject = indirectObjects[index]; if (indirectObject.IsInUse()) // In-use entry. { // Add in-use entry! AppendXRefEntry(xrefInUseBlockBuilder, indirectObject.Reference, stream.Length); // Add in-use entry content! indirectObject.WriteTo(stream, file); } else // Free entry. { // Add free entry! AppendXRefEntry(xrefBuilder, freeReference, index); // End current block! xrefBuilder.Append(xrefInUseBlockBuilder); // Initialize next block! xrefInUseBlockBuilder.Length = 0; freeReference = indirectObject.Reference; } } // Add last free entry! AppendXRefEntry(xrefBuilder, freeReference, 0); // End last block! xrefBuilder.Append(xrefInUseBlockBuilder); } // 3. XRef table (unique section) [PDF:1.6:3.4.3]. long startxref = stream.Length; stream.Write(xrefBuilder.ToString()); // 4. Trailer [PDF:1.6:3.4.4]. WriteTrailer(startxref, xrefSize, null); }
protected override void WriteStandard( ) { // 1. Header [PDF:1.6:3.4.1]. WriteHeader(); // 2. Body [PDF:1.6:3.4.2,3,7]. XRefEntry xrefStreamEntry; { // 2.1. Content indirect objects. IndirectObjects indirectObjects = file.IndirectObjects; // Create the xref stream indirect object! /* * NOTE: Standard xref information structure comprises just one section; the xref stream is * generated on-the-fly and kept volatile not to interfere with the existing file structure. */ /* * NOTE: This xref stream indirect object is purposely temporary (i.e. not registered into the * file's indirect objects collection). */ XRefStream xrefStream = new XRefStream(file); new PdfIndirectObject( file, xrefStream, xrefStreamEntry = new XRefEntry(indirectObjects.Count, 0) ); XRefEntry prevFreeEntry = null; foreach (PdfIndirectObject indirectObject in indirectObjects) { prevFreeEntry = AddXRefEntry( indirectObject.XrefEntry, indirectObject, xrefStream, prevFreeEntry, null ); } prevFreeEntry.Offset = 0; // Links back to the first free object. NOTE: The first entry in the table (object number 0) is always free. // 2.2. XRef stream. UpdateTrailer(xrefStream.Header, stream); AddXRefEntry( xrefStreamEntry, xrefStream.Container, xrefStream, null, null ); } // 3. Tail. WriteTail(xrefStreamEntry.Offset); }
/** * <summary>Serializes the object stream entries into the stream body.</summary> */ private void Flush( IOutputStream stream ) { // 1. Body. int dataByteOffset; { // Serializing the entries into the stream buffer... IBuffer indexBuffer = new bytes.Buffer(); IBuffer dataBuffer = new bytes.Buffer(); IndirectObjects indirectObjects = File.IndirectObjects; int objectIndex = -1; File context = File; foreach (KeyValuePair <int, ObjectEntry> entry in Entries) { int objectNumber = entry.Key; // Update the xref entry! XRefEntry xrefEntry = indirectObjects[objectNumber].XrefEntry; xrefEntry.Offset = ++objectIndex; /* * NOTE: The entry offset MUST be updated only after its serialization, in order not to * interfere with its possible data-object retrieval from the old serialization. */ int entryValueOffset = (int)dataBuffer.Length; // Index. indexBuffer .Append(objectNumber.ToString()).Append(Chunk.Space) // Object number. .Append(entryValueOffset.ToString()).Append(Chunk.Space); // Byte offset (relative to the first one). // Data. entry.Value.DataObject.WriteTo(dataBuffer, context); entry.Value.offset = entryValueOffset; } // Get the stream buffer! IBuffer body = Body; // Delete the old entries! body.SetLength(0); // Add the new entries! body.Append(indexBuffer); dataByteOffset = (int)body.Length; body.Append(dataBuffer); } // 2. Header. { PdfDictionary header = Header; header[PdfName.N] = PdfInteger.Get(Entries.Count); header[PdfName.First] = PdfInteger.Get(dataByteOffset); } }
public File( ) { Initialize(); version = VersionEnum.PDF14.GetVersion(); trailer = PrepareTrailer(new PdfDictionary()); indirectObjects = new IndirectObjects(this, null); document = new Document(this); }
protected override void WriteStandard() { // 1. Header [PDF:1.6:3.4.1]. WriteHeader(); // 2. Body [PDF:1.6:3.4.2,3,7]. XRefEntry xrefStreamEntry; { // Create the xref stream! /* * NOTE: Standard xref information structure comprises just one section; the xref stream is * generated on-the-fly and kept volatile not to interfere with the existing file structure. */ XRefStream xrefStream = new XRefStream(file); // 2.1. Indirect objects. IndirectObjects indirectObjects = file.IndirectObjects; // Indirect objects serialization. XRefEntry prevFreeEntry = null; ObjectStream objectStream = null; foreach (PdfIndirectObject indirectObject in indirectObjects) { if (indirectObject.IsCompressible()) { if (objectStream == null || objectStream.Count >= ObjectStreamMaxEntryCount) { file.Register(objectStream = new ObjectStream()); } indirectObject.Compress(objectStream); } prevFreeEntry = AddXRefEntry(indirectObject, xrefStream, prevFreeEntry, null); } prevFreeEntry.Offset = 0; // Links back to the first free object. NOTE: The first entry in the table (object number 0) is always free. // 2.2. XRef stream. UpdateTrailer(xrefStream.Header, stream); xrefStreamEntry = new XRefEntry(indirectObjects.Count, 0, (int)stream.Length, XRefEntry.UsageEnum.InUse); /* * NOTE: This xref stream indirect object is purposely temporary (i.e. not registered into * the file's indirect objects collection). */ AddXRefEntry(new PdfIndirectObject(file, xrefStream, xrefStreamEntry), xrefStream, null, null); } // 3. Tail. WriteTail(xrefStreamEntry.Offset); }
/// <summary> /// Returns a string representation of the VerbPhrase. /// </summary> /// <returns>A string representation of the VerbPhrase.</returns> public override string ToString() { var empty = string.Empty; return(!VerboseOutput? base.ToString() : string.Join("\n", base.ToString(), Subjects.Any() ? $"Subjects: {Subjects.Format(s => s.Text)}" : empty, SubjectComplement != null ? $"\nAttached Subject Complement{SubjectComplement.Text}" : empty, DirectObjects.Any() ? $"Direct Objects: {DirectObjects.Format(o => o.Text)}" : empty, IndirectObjects.Any() ? $"Indirect Objects: {IndirectObjects.Format(o => o.Text)}" : empty, ObjectOfThePreposition != null ? $"Via Preposition Object: {ObjectOfThePreposition.Text}" : empty, Modality != null ? $"Modality: {Modality.Text}" : empty, AdverbialModifiers.Any() ? $"Modifiers: {AdverbialModifiers.Format(m => m.Text)}" : empty, $"\nPossessive: [{(IsPossessive ? "Yes" : "No")}]", $"\nClassifier: [{(IsClassifier ? "Yes" : "No")}]", $"\nPrevailing Form: [{PrevailingForm.SpaceByCase().RemoveSubstrings("Verb").Trim()}]" )); }
/** * <summary>Removes indirect objects which have no reference in the document structure.</summary> * <param name="file">File to optimize.</param> */ public static void RemoveOrphanedObjects( File file ) { // 1. Collecting alive indirect objects... ISet <int> aliveObjectNumbers = new HashSet <int>(); { // Alive indirect objects collector. IVisitor visitor = new AliveObjectCollector(aliveObjectNumbers); // Walk through the document structure to collect alive indirect objects! file.Trailer.Accept(visitor, null); } // 2. Removing orphaned indirect objects... IndirectObjects indirectObjects = file.IndirectObjects; for (int objectNumber = 0, objectCount = indirectObjects.Count; objectNumber < objectCount; objectNumber++) { if (!aliveObjectNumbers.Contains(objectNumber)) { indirectObjects.RemoveAt(objectNumber); } } }
protected override void WriteIncremental( ) { // 1. Original content (header, body and previous trailer). FileParser parser = file.Reader.Parser; stream.Write(parser.Stream); // 2. Body update (modified indirect objects insertion). XRefEntry xrefStreamEntry; { // 2.1. Content indirect objects. IndirectObjects indirectObjects = file.IndirectObjects; // Create the xref stream! /* * NOTE: Incremental xref information structure comprises multiple sections; this update adds * a new section. */ XRefStream xrefStream = new XRefStream(file); XRefEntry prevFreeEntry = null; /* * NOTE: Extension object streams are necessary to update original object streams whose * entries have been modified. */ IDictionary <int, ObjectStream> extensionObjectStreams = new Dictionary <int, ObjectStream>(); foreach (PdfIndirectObject indirectObject in new List <PdfIndirectObject>(indirectObjects.ModifiedObjects.Values)) { prevFreeEntry = AddXRefEntry( indirectObject.XrefEntry, indirectObject, xrefStream, prevFreeEntry, extensionObjectStreams ); } foreach (ObjectStream extensionObjectStream in extensionObjectStreams.Values) { prevFreeEntry = AddXRefEntry( extensionObjectStream.Container.XrefEntry, extensionObjectStream.Container, xrefStream, prevFreeEntry, null ); } if (prevFreeEntry != null) { prevFreeEntry.Offset = 0; } // Links back to the first free object. NOTE: The first entry in the table (object number 0) is always free. // 2.2. XRef stream. /* * NOTE: This xref stream indirect object is purposely temporary (i.e. not registered into the * file's indirect objects collection). */ new PdfIndirectObject( file, xrefStream, xrefStreamEntry = new XRefEntry(indirectObjects.Count, 0) ); UpdateTrailer(xrefStream.Header, stream); xrefStream.Header[PdfName.Prev] = PdfInteger.Get((int)parser.RetrieveXRefOffset()); AddXRefEntry( xrefStreamEntry, xrefStream.Container, xrefStream, null, null ); } // 3. Tail. WriteTail(xrefStreamEntry.Offset); }
public void Load(Stream stream, bool immediate = false, byte[] bytes = null) { if (_open) { throw new ApplicationException("Document already has a stream open."); } _stream = stream; _parser = new Parser(_stream); _parser.ResolveReference += Parser_ResolveReference; // PDF file should have a well known marker at top of file _parser.ParseHeader(out int versionMajor, out int versionMinor); Version = new PdfVersion(this, versionMajor, versionMinor); // Find stream position of the last cross-reference table long xRefPosition = _parser.ParseXRefOffset(); bool lastHeader = true; do { // Get the aggregated set of entries from all the cross-reference table sections List <TokenXRefEntry> xrefs = _parser.ParseXRef(xRefPosition); // Should always be positioned at the trailer after parsing cross-table references PdfDictionary trailer = new PdfDictionary(this, _parser.ParseTrailer()); PdfInteger size = trailer.MandatoryValue <PdfInteger>("Size"); foreach (TokenXRefEntry xref in xrefs) { // Ignore unused entries and entries smaller than the defined size from the trailer dictionary if (xref.Used && (xref.Id < size.Value)) { IndirectObjects.AddXRef(xref); } } if (lastHeader) { // Replace the default decryption handler with one from the document settings DecryptHandler = PdfDecrypt.CreateDecrypt(this, trailer); // We only care about the latest defined catalog and information dictionary _refCatalog = trailer.MandatoryValue <PdfObjectReference>("Root"); _refInfo = trailer.OptionalValue <PdfObjectReference>("Info"); } // If there is a previous cross-reference table, then we want to process that as well PdfInteger prev = trailer.OptionalValue <PdfInteger>("Prev"); if (prev != null) { xRefPosition = prev.Value; } else { xRefPosition = 0; } lastHeader = false; } while (xRefPosition > 0); _open = true; // Must load all objects immediately so the stream can then be closed if (immediate) { // Is there enough work to justify using multiple threads if ((bytes != null) && (IndirectObjects.Count > BACKGROUND_TRIGGER)) { // Setup the synchronization event so we wait until all work is completed _backgroundCount = NUM_BACKGROUND_ITEMS; _backgroundEvent = new ManualResetEvent(false); List <int> ids = IndirectObjects.Ids.ToList(); int idCount = ids.Count; int batchSize = idCount / NUM_BACKGROUND_ITEMS; for (int i = 0, index = 0; i < NUM_BACKGROUND_ITEMS; i++, index += batchSize) { // Create a parser per unit of work, so they can work in parallel MemoryStream memoryStream = new MemoryStream(bytes); Parser parser = new Parser(memoryStream); // Make sure the last batch includes all the remaining Ids ThreadPool.QueueUserWorkItem(BackgroundResolveReference, new BackgroundArgs() { Parser = parser, Ids = ids, Index = index, Count = (i == (NUM_BACKGROUND_ITEMS - 1)) ? idCount - index : batchSize }); } _backgroundEvent.WaitOne(); _backgroundEvent.Dispose(); _backgroundEvent = null; } else { IndirectObjects.ResolveAllReferences(this); } Close(); } }
protected override void WriteIncremental() { // 1. Original content (header, body and previous trailer). FileParser parser = file.Reader.Parser; stream.Write(parser.Stream); // 2. Body update (modified indirect objects insertion). XRefEntry xrefStreamEntry; { // Create the xref stream! /* * NOTE: Incremental xref information structure comprises multiple sections; this update adds * a new section. */ XRefStream xrefStream = new XRefStream(file); // 2.1. Indirect objects. IndirectObjects indirectObjects = file.IndirectObjects; // 2.1.1. Modified indirect objects serialization. XRefEntry prevFreeEntry = null; /* * NOTE: Any uncompressed indirect object will be compressed. */ ObjectStream objectStream = null; /* * NOTE: Any previously-compressed indirect object will have its original object stream * updated through a new extension object stream. */ IDictionary <int, ObjectStream> extensionObjectStreams = new Dictionary <int, ObjectStream>(); int indirectObjectsPrecompressCount = indirectObjects.Count; foreach (PdfIndirectObject indirectObject in new List <PdfIndirectObject>(indirectObjects.ModifiedObjects.Values)) { if (indirectObject.IsCompressible()) { if (objectStream == null || objectStream.Count >= ObjectStreamMaxEntryCount) { file.Register(objectStream = new ObjectStream()); } indirectObject.Compress(objectStream); } prevFreeEntry = AddXRefEntry( indirectObject, xrefStream, prevFreeEntry, extensionObjectStreams ); } // 2.1.2. Additional object streams serialization. for (int index = indirectObjectsPrecompressCount, limit = indirectObjects.Count; index < limit; index++) { prevFreeEntry = AddXRefEntry( indirectObjects[index], xrefStream, prevFreeEntry, null ); } if (prevFreeEntry != null) { prevFreeEntry.Offset = 0; // Links back to the first free object. NOTE: The first entry in the table (object number 0) is always free. } // 2.2. XRef stream. UpdateTrailer(xrefStream.Header, stream); xrefStream.Header[PdfName.Prev] = PdfInteger.Get((int)parser.RetrieveXRefOffset()); AddXRefEntry( /* * NOTE: This xref stream indirect object is purposely temporary (i.e. not registered into * the file's indirect objects collection). */ new PdfIndirectObject( file, xrefStream, xrefStreamEntry = new XRefEntry(indirectObjects.Count, 0, (int)stream.Length, XRefEntry.UsageEnum.InUse) ), xrefStream, null, null ); } // 3. Tail. WriteTail(xrefStreamEntry.Offset); }
public File( IInputStream stream ) { Initialize(); reader = new Reader(stream, this); Reader.FileInfo info = reader.ReadInfo(); version = info.Version; trailer = PrepareTrailer(info.Trailer); if(trailer.ContainsKey(PdfName.Encrypt)) // Encrypted file. throw new NotImplementedException("Encrypted files are currently not supported."); indirectObjects = new IndirectObjects(this, info.XrefEntries); document = new Document(trailer[PdfName.Root]); document.Configuration.XrefMode = (PdfName.XRef.Equals(trailer[PdfName.Type]) ? Document.ConfigurationImpl.XRefModeEnum.Compressed : Document.ConfigurationImpl.XRefModeEnum.Plain); }
protected override void WriteIncremental( ) { try { // 1. Original content (head, body and previous trailer). Parser parser = file.Reader.Parser; stream.Write(parser.Stream); // 2. Body update (modified indirect objects insertion). XRefEntry xrefStreamEntry; { // 2.1. Content indirect objects. IndirectObjects indirectObjects = file.IndirectObjects; // Create the xref stream indirect object! /* * NOTE: Incremental xref table comprises multiple sections each one composed by multiple subsections; * this update adds a new section. */ /* * NOTE: This xref stream indirect object is purposely temporary (i.e. not registered into the file's * indirect objects collection). */ XRefStream xrefStream; PdfIndirectObject xrefStreamIndirectObject = new PdfIndirectObject( file, xrefStream = new XRefStream(file), xrefStreamEntry = new XRefEntry(indirectObjects.Count, 0, 0, XRefEntry.UsageEnum.InUse) ); XRefEntry prevFreeEntry = null; foreach (PdfIndirectObject indirectObject in indirectObjects.ModifiedObjects.Values) { prevFreeEntry = AddXRefEntry( indirectObject.XrefEntry, indirectObject, xrefStream, prevFreeEntry ); } if (prevFreeEntry != null) { prevFreeEntry.Offset = 0; } // Linking back to the first free object. NOTE: The first entry in the table (object number 0) is always free. // 2.2. XRef stream. xrefStream.Header[PdfName.Prev] = new PdfInteger((int)parser.RetrieveXRefOffset()); AddXRefEntry( xrefStreamEntry, xrefStreamIndirectObject, xrefStream, null ); } // 3. Tail. WriteTail(xrefStreamEntry.Offset); } catch (Exception e) { throw new Exception("Incremental writing failed.", e); } }
protected override void WriteStandard( ) { try { // 1. Header [PDF:1.6:3.4.1]. WriteHeader(); // 2. Body [PDF:1.6:3.4.2,3,7]. XRefEntry xrefStreamEntry; { // 2.1. Content indirect objects. IndirectObjects indirectObjects = file.IndirectObjects; // Create the xref stream indirect object! /* * NOTE: A standard xref stream comprises just one section composed by just one subsection. * The xref stream is generated on-the-fly and kept volatile not to interfere with the existing * file structure. */ /* * NOTE: This xref stream indirect object is purposely temporary (i.e. not registered into the file's * indirect objects collection). */ XRefStream xrefStream; PdfIndirectObject xrefStreamIndirectObject = new PdfIndirectObject( file, xrefStream = new XRefStream(file), xrefStreamEntry = new XRefEntry(indirectObjects.Count, 0, 0, XRefEntry.UsageEnum.InUse) ); XRefEntry prevFreeEntry = null; foreach (PdfIndirectObject indirectObject in indirectObjects) { PdfIndirectObject actualIndirectObject; if (indirectObject.DataObject is XRefStream) { /* * NOTE: Existing xref streams MUST be suppressed, * temporarily replacing them with free entries. */ actualIndirectObject = new PdfIndirectObject( file, null, new XRefEntry( indirectObject.Reference.ObjectNumber, XRefEntry.GenerationUnreusable, 0, XRefEntry.UsageEnum.Free ) ); } else { actualIndirectObject = indirectObject; } prevFreeEntry = AddXRefEntry( (XRefEntry)actualIndirectObject.XrefEntry.Clone(), // NOTE: Xref entry is cloned to preserve the original one. actualIndirectObject, xrefStream, prevFreeEntry ); } prevFreeEntry.Offset = 0; // Linking back to the first free object. NOTE: The first entry in the table (object number 0) is always free. // 2.2. XRef stream. AddXRefEntry( xrefStreamEntry, xrefStreamIndirectObject, xrefStream, null ); } // 3. Tail. WriteTail(xrefStreamEntry.Offset); } catch (Exception e) { throw new Exception("Standard writing failed.", e); } }