/// <summary> /// Flushes to the output stream modified objects that can belong only to the given page, which makes this method /// "safe" compared to the /// <see cref="UnsafeFlushDeep(int)"/> /// . Flushed object frees the memory, but it's impossible to /// modify such objects or read data from them. This method releases all other page structure objects that are not /// modified. /// <p> /// This method is mainly designed for the append mode. It is similar to the /// <see cref="PdfPage.Flush()"/> /// , but it /// additionally releases all page objects that were not flushed. This method is ideal for small amendments of pages, /// but it makes more sense to use /// <see cref="PdfPage.Flush()"/> /// for newly created or heavily modified pages. <br /> /// This method will throw an exception for documents opened in reading mode (see /// <see cref="PageFlushingHelper"/> /// for more details on modes). It is also not advised to be used in stamping mode: even though it will indeed /// release the objects and free the memory, the released objects will definitely be re-read again on document /// closing, which would affect performance. /// </p> /// <p> /// When using this method in append mode (or in stamping mode), be careful not to try to modify the object instances /// obtained before this method call! See /// <see cref="PageFlushingHelper"/> /// for details on released and flushed objects state. /// </p> /// <p> /// This method shall be used only when it's known that the page and its inner structures processing is finished. /// This includes reading data from pages, page modification and page handling via addons/utilities. /// </p> /// </summary> /// <param name="pageNum">the page number which low level objects structure is to be flushed or released from memory. /// </param> public virtual void AppendModeFlush(int pageNum) { if (pdfDoc.GetWriter() == null) { throw new ArgumentException(PdfException.FlushingHelperFLushingModeIsNotForDocReadingMode); } PdfPage page = pdfDoc.GetPage(pageNum); if (page.IsFlushed()) { return; } page.GetDocument().DispatchEvent(new PdfDocumentEvent(PdfDocumentEvent.END_PAGE, page)); bool pageWasModified = page.GetPdfObject().IsModified(); page.SetModified(); release = true; pageWasModified = FlushPage(pageNum) || pageWasModified; PdfArray annots = page.GetPdfObject().GetAsArray(PdfName.Annots); if (annots != null && !annots.IsFlushed()) { ArrayFlushIfModified(annots); } PdfObject thumb = page.GetPdfObject().Get(PdfName.Thumb, false); FlushIfModified(thumb); PdfObject contents = page.GetPdfObject().Get(PdfName.Contents, false); if (contents is PdfIndirectReference) { if (contents.CheckState(PdfObject.MODIFIED) && !contents.CheckState(PdfObject.FLUSHED)) { PdfObject contentsDirectObj = ((PdfIndirectReference)contents).GetRefersTo(); if (contentsDirectObj.IsArray()) { ArrayFlushIfModified((PdfArray)contentsDirectObj); } else { contentsDirectObj.Flush(); } } } else { // already checked that modified if (contents is PdfArray) { ArrayFlushIfModified((PdfArray)contents); } else { if (contents is PdfStream) { FlushIfModified(contents); } } } // Page tags flushing is supported only in PdfPage#flush and #unsafeFlushDeep: it makes sense to flush tags // completely for heavily modified or new pages. For the slightly modified pages it should be enough to release // the tag structure objects via tag structure releasing utility. if (!pageWasModified) { page.GetPdfObject().GetIndirectReference().ClearState(PdfObject.MODIFIED); pdfDoc.GetCatalog().GetPageTree().ReleasePage(pageNum); page.UnsetForbidRelease(); page.GetPdfObject().Release(); } else { // inherited and modified resources are handled in #flushPage call in the beginning of method page.ReleaseInstanceFields(); page.GetPdfObject().Flush(); } }
private void FlushObjectRecursively(PdfObject obj, PageFlushingHelper.DeepFlushingContext context) { if (obj == null) { return; } bool avoidReleaseForIndirectObjInstance = false; if (obj.IsIndirectReference()) { PdfIndirectReference indRef = (PdfIndirectReference)obj; if (indRef.refersTo == null || indRef.CheckState(PdfObject.FLUSHED)) { return; } obj = indRef.GetRefersTo(); } else { if (obj.IsFlushed()) { return; } else { if (release && obj.IsIndirect()) { // We should avoid the case when object is going to be released but is stored in containing object // not as indirect reference. This can happen when containing object is somehow modified. // Generally containing objects should not contain released read-only object instance. System.Diagnostics.Debug.Assert(obj.IsReleaseForbidden() || obj.GetIndirectReference() == null); avoidReleaseForIndirectObjInstance = true; } } } if (pdfDoc.IsDocumentFont(obj.GetIndirectReference()) || layersRefs.Contains(obj.GetIndirectReference())) { return; } if (obj.IsDictionary() || obj.IsStream()) { if (!currNestedObjParents.Add(obj)) { return; } FlushDictRecursively((PdfDictionary)obj, context); currNestedObjParents.Remove(obj); } else { if (obj.IsArray()) { if (!currNestedObjParents.Add(obj)) { return; } PdfArray array = (PdfArray)obj; for (int i = 0; i < array.Size(); ++i) { FlushObjectRecursively(array.Get(i, false), context); } currNestedObjParents.Remove(obj); } } if (!avoidReleaseForIndirectObjInstance) { FlushOrRelease(obj); } }
private void SerObject(PdfObject obj, ByteBuffer bb, int level, IDictionary <PdfIndirectReference, byte[]> serializedCache) { if (level <= 0) { return; } if (obj == null) { bb.Append("$Lnull"); return; } PdfIndirectReference reference = null; ByteBuffer savedBb = null; if (obj.IsIndirectReference()) { reference = (PdfIndirectReference)obj; byte[] cached = serializedCache.Get(reference); if (cached != null) { bb.Append(cached); return; } else { savedBb = bb; bb = new ByteBuffer(); obj = reference.GetRefersTo(); } } if (obj.IsStream()) { SerDic((PdfDictionary)obj, bb, level - 1, serializedCache); bb.Append("$B"); if (level > 0) { bb.Append(md5.Digest(((PdfStream)obj).GetBytes(false))); } } else { if (obj.IsDictionary()) { SerDic((PdfDictionary)obj, bb, level - 1, serializedCache); } else { if (obj.IsArray()) { SerArray((PdfArray)obj, bb, level - 1, serializedCache); } else { if (obj.IsString()) { bb.Append("$S").Append(obj.ToString()); } else { // TODO specify length for strings, streams, may be names? if (obj.IsName()) { bb.Append("$N").Append(obj.ToString()); } else { bb.Append("$L").Append(obj.ToString()); } } } } } // PdfNull case is also here if (savedBb != null) { serializedCache.Put(reference, bb.ToByteArray()); savedBb.Append(bb.GetInternalBuffer()); } }
private void LoadPage(int pageNum) { PdfIndirectReference targetPage = pageRefs[pageNum]; if (targetPage != null) { return; } //if we go here, we have to split PdfPages that contains pageNum int parentIndex = FindPageParent(pageNum); PdfPages parent = parents[parentIndex]; PdfArray kids = parent.GetKids(); if (kids == null) { throw new PdfException(PdfException.InvalidPageStructure1).SetMessageParams(pageNum + 1); } int kidsCount = parent.GetCount(); // we should handle separated pages, it means every PdfArray kids must contain either PdfPage or PdfPages, // mix of PdfPage and PdfPages not allowed. bool findPdfPages = false; // NOTE optimization? when we already found needed index for (int i = 0; i < kids.Size(); i++) { PdfDictionary page = kids.GetAsDictionary(i); // null values not allowed in pages tree. if (page == null) { throw new PdfException(PdfException.InvalidPageStructure1).SetMessageParams(pageNum + 1); } PdfObject pageKids = page.Get(PdfName.Kids); if (pageKids != null) { if (pageKids.IsArray()) { findPdfPages = true; } else { // kids must be of type array throw new PdfException(PdfException.InvalidPageStructure1).SetMessageParams(pageNum + 1); } } if (document.GetReader().IsMemorySavingMode() && !findPdfPages && parent.GetFrom() + i != pageNum) { page.Release(); } } if (findPdfPages) { // handle mix of PdfPage and PdfPages. // handle count property! IList <PdfPages> newParents = new List <PdfPages>(kids.Size()); PdfPages lastPdfPages = null; for (int i = 0; i < kids.Size() && kidsCount > 0; i++) { /* * We don't release pdfPagesObject in the end of each loop because we enter this for-cycle only when parent has PdfPages kids. * If all of the kids are PdfPages, then there's nothing to release, because we don't release PdfPages at this point. * If there are kids that are instances of PdfPage, then there's no sense in releasing them: * in this case ParentTreeStructure is being rebuilt by inserting an intermediate PdfPages between the parent and a PdfPage, * thus modifying the page object by resetting its parent, thus making it impossible to release the object. */ PdfDictionary pdfPagesObject = kids.GetAsDictionary(i); if (pdfPagesObject.GetAsArray(PdfName.Kids) == null) { // pdfPagesObject is PdfPage // possible if only first kid is PdfPage if (lastPdfPages == null) { lastPdfPages = new PdfPages(parent.GetFrom(), document, parent); kids.Set(i, lastPdfPages.GetPdfObject()); newParents.Add(lastPdfPages); } else { // Only remove from kids if we did not replace the entry with new PdfPages kids.Remove(i); i--; } // decrement count first so that page is not counted twice when moved to lastPdfPages parent.DecrementCount(); lastPdfPages.AddPage(pdfPagesObject); kidsCount--; } else { // pdfPagesObject is PdfPages int from = lastPdfPages == null?parent.GetFrom() : lastPdfPages.GetFrom() + lastPdfPages.GetCount(); lastPdfPages = new PdfPages(from, kidsCount, pdfPagesObject, parent); newParents.Add(lastPdfPages); kidsCount -= lastPdfPages.GetCount(); } } parents.JRemoveAt(parentIndex); for (int i = newParents.Count - 1; i >= 0; i--) { parents.Add(parentIndex, newParents[i]); } // recursive call, to load needed pageRef. // NOTE optimization? add to loadPage startParentIndex. LoadPage(pageNum); } else { int from = parent.GetFrom(); // Possible exception in case kids.getSize() < parent.getCount(). // In any case parent.getCount() has higher priority. // NOTE optimization? when we already found needed index for (int i = 0; i < parent.GetCount(); i++) { PdfObject kid = kids.Get(i, false); if (kid is PdfIndirectReference) { pageRefs[from + i] = (PdfIndirectReference)kid; } else { pageRefs[from + i] = kid.GetIndirectReference(); } } } }
private void LoadPage(int pageNum) { PdfDictionary targetPage = pageRefs[pageNum]; if (targetPage != null) { return; } //if we go here, we have to split PdfPages that contains pageNum int parentIndex = FindPageParent(pageNum); PdfPages parent = parents[parentIndex]; PdfArray kids = parent.GetKids(); if (kids == null) { throw new PdfException(PdfException.InvalidPageStructure1).SetMessageParams(pageNum + 1); } int kidsCount = parent.GetCount(); // we should handle separated pages, it means every PdfArray kids must contain either PdfPage or PdfPages, // mix of PdfPage and PdfPages not allowed. bool findPdfPages = false; // NOTE optimization? when we already found needed index for (int i = 0; i < kids.Size(); i++) { PdfDictionary page = kids.GetAsDictionary(i); if (page == null) { // null values not allowed in pages tree. throw new PdfException(PdfException.InvalidPageStructure1).SetMessageParams(pageNum + 1); } PdfObject pageKids = page.Get(PdfName.Kids); if (pageKids != null) { if (pageKids.IsArray()) { findPdfPages = true; } else { // kids must be of type array throw new PdfException(PdfException.InvalidPageStructure1).SetMessageParams(pageNum + 1); } } } if (findPdfPages) { // handle mix of PdfPage and PdfPages. // handle count property! IList <PdfPages> newParents = new List <PdfPages>(kids.Size()); PdfPages lastPdfPages = null; for (int i = 0; i < kids.Size() && kidsCount > 0; i++) { PdfDictionary pdfPagesObject = kids.GetAsDictionary(i); if (pdfPagesObject.GetAsArray(PdfName.Kids) == null) { // pdfPagesObject is PdfPage if (lastPdfPages == null) { // possible if only first kid is PdfPage lastPdfPages = new PdfPages(parent.GetFrom(), document, parent); kids.Set(i, lastPdfPages.GetPdfObject()); newParents.Add(lastPdfPages); } else { // Only remove from kids if we did not replace the entry with new PdfPages kids.Remove(i); i--; } // decrement count first so that page is not counted twice when moved to lastPdfPages parent.DecrementCount(); lastPdfPages.AddPage(pdfPagesObject); kidsCount--; } else { // pdfPagesObject is PdfPages int from = lastPdfPages == null?parent.GetFrom() : lastPdfPages.GetFrom() + lastPdfPages.GetCount(); lastPdfPages = new PdfPages(from, kidsCount, pdfPagesObject, parent); newParents.Add(lastPdfPages); kidsCount -= lastPdfPages.GetCount(); } } parents.JRemoveAt(parentIndex); for (int i = newParents.Count - 1; i >= 0; i--) { parents.Add(parentIndex, newParents[i]); } // recursive call, to load needed pageRef. // NOTE optimization? add to loadPage startParentIndex. LoadPage(pageNum); } else { int from = parent.GetFrom(); // Possible exception in case kids.getSize() < parent.getCount(). // In any case parent.getCount() has higher priority. // NOTE optimization? when we already found needed index for (int i = 0; i < parent.GetCount(); i++) { pageRefs[from + i] = kids.GetAsDictionary(i); } } }
// TODO 2: object is not checked if it was already serialized on start, double work could be done // TODO 3: indirect objects often stored multiple times as parts of the other objects private void SerObject(PdfObject obj, int level, ByteBufferOutputStream bb) { if (level <= 0) { return; } if (obj == null) { bb.Append("$Lnull"); return; } PdfIndirectReference reference = null; ByteBufferOutputStream savedBb = null; int indRefKey = -1; if (obj.IsIndirectReference()) { reference = (PdfIndirectReference)obj; indRefKey = CalculateIndRefKey(reference); byte[] cached = objToSerializedContent.Get(indRefKey); if (cached != null) { bb.Append(cached); return; } else { savedBb = bb; bb = new ByteBufferOutputStream(); obj = reference.GetRefersTo(); } } if (obj.IsStream()) { bb.Append("$B"); SerDic((PdfDictionary)obj, level - 1, bb); if (level > 0) { md5.Reset(); bb.Append(md5.Digest(((PdfStream)obj).GetBytes(false))); } } else { if (obj.IsDictionary()) { SerDic((PdfDictionary)obj, level - 1, bb); } else { if (obj.IsArray()) { SerArray((PdfArray)obj, level - 1, bb); } else { if (obj.IsString()) { bb.Append("$S").Append(obj.ToString()); } else { if (obj.IsName()) { bb.Append("$N").Append(obj.ToString()); } else { bb.Append("$L").Append(obj.ToString()); } } } } } // PdfNull case is also here if (savedBb != null) { objToSerializedContent[indRefKey] = bb.GetBuffer(); savedBb.Append(bb); } }
private void SerObject(PdfObject obj, int level, ByteBufferOutputStream bb, IntHashtable serialized) { if (level <= 0) { return; } if (obj == null) { bb.Append("$Lnull"); return; } PdfIndirectReference reference = null; ByteBufferOutputStream savedBb = null; if (obj.IsIndirectReference()) { reference = (PdfIndirectReference)obj; int key = GetCopyObjectKey(obj); if (serialized.ContainsKey(key)) { bb.Append((int)serialized.Get(key)); return; } else { savedBb = bb; bb = new ByteBufferOutputStream(); } } if (obj.IsStream()) { bb.Append("$B"); SerDic((PdfDictionary)obj, level - 1, bb, serialized); if (level > 0) { md5.Reset(); bb.Append(md5.Digest(((PdfStream)obj).GetBytes(false))); } } else { if (obj.IsDictionary()) { SerDic((PdfDictionary)obj, level - 1, bb, serialized); } else { if (obj.IsArray()) { SerArray((PdfArray)obj, level - 1, bb, serialized); } else { if (obj.IsString()) { bb.Append("$S").Append(obj.ToString()); } else { if (obj.IsName()) { bb.Append("$N").Append(obj.ToString()); } else { bb.Append("$L").Append(obj.ToString()); } } } } } if (savedBb != null) { int key = GetCopyObjectKey(reference); if (!serialized.ContainsKey(key)) { serialized.Put(key, CalculateHash(bb.GetBuffer())); } savedBb.Append(bb); } }