internal IToken CopyToken(IPdfTokenScanner source, IToken token) { if (!existingCopies.TryGetValue(source, out var refs)) { refs = new Dictionary <IndirectReference, IndirectReferenceToken>(); existingCopies.Add(source, refs); } return(WriterUtil.CopyToken(context, token, source, refs)); }
/// <summary> /// Add a new page with the specified size, this page will be included in the output when <see cref="Build"/> is called. /// </summary> /// <param name="document">Source document.</param> /// <param name="pageNumber">Page to copy.</param> /// <returns>A builder for editing the page.</returns> public PdfPageBuilder AddPage(PdfDocument document, int pageNumber) { if (!existingCopies.TryGetValue(document.Structure.TokenScanner, out var refs)) { refs = new Dictionary <IndirectReference, IndirectReferenceToken>(); existingCopies.Add(document.Structure.TokenScanner, refs); } if (!existingTrees.TryGetValue(document, out var pagesInfos)) { pagesInfos = new Dictionary <int, PageInfo>(); int i = 1; foreach (var(pageDict, parents) in WriterUtil.WalkTree(document.Structure.Catalog.PageTree)) { pagesInfos[i] = new PageInfo { Page = pageDict, Parents = parents }; i++; } existingTrees.Add(document, pagesInfos); } if (!pagesInfos.TryGetValue(pageNumber, out PageInfo pageInfo)) { throw new KeyNotFoundException($"Page {pageNumber} was not found in the source document."); } // copy content streams var streams = new List <PdfPageBuilder.CopiedContentStream>(); if (pageInfo.Page.TryGet(NameToken.Contents, out IToken contentsToken)) { // Adobe Acrobat errors if content streams ref'd by multiple pages, turn off // dedup if on to avoid issues var prev = context.AttemptDeduplication; context.AttemptDeduplication = false; if (contentsToken is ArrayToken array) { foreach (var item in array.Data) { if (item is IndirectReferenceToken ir) { streams.Add(new PdfPageBuilder.CopiedContentStream( WriterUtil.CopyToken(context, ir, document.Structure.TokenScanner, refs) as IndirectReferenceToken)); } } } else if (contentsToken is IndirectReferenceToken ir) { streams.Add(new PdfPageBuilder.CopiedContentStream( WriterUtil.CopyToken(context, ir, document.Structure.TokenScanner, refs) as IndirectReferenceToken)); } context.AttemptDeduplication = prev; } // manually copy page dict / resources as we need to modify some var copiedPageDict = new Dictionary <NameToken, IToken>(); Dictionary <NameToken, IToken> resources = new Dictionary <NameToken, IToken>(); // just put all parent resources into new page foreach (var dict in pageInfo.Parents) { if (dict.TryGet(NameToken.Resources, out var resourceToken)) { CopyResourceDict(resourceToken, resources); } if (dict.TryGet(NameToken.MediaBox, out var mb)) { copiedPageDict[NameToken.MediaBox] = WriterUtil.CopyToken(context, mb, document.Structure.TokenScanner, refs); } if (dict.TryGet(NameToken.CropBox, out var cb)) { copiedPageDict[NameToken.CropBox] = WriterUtil.CopyToken(context, cb, document.Structure.TokenScanner, refs); } if (dict.TryGet(NameToken.Rotate, out var rt)) { copiedPageDict[NameToken.Rotate] = WriterUtil.CopyToken(context, rt, document.Structure.TokenScanner, refs); } } foreach (var kvp in pageInfo.Page.Data) { if (kvp.Key == NameToken.Contents || kvp.Key == NameToken.Parent || kvp.Key == NameToken.Type) { // don't copy these as they'll be handled during page tree writing continue; } if (kvp.Key == NameToken.Resources) { // merge parent resources into child CopyResourceDict(kvp.Value, resources); continue; } if (kvp.Key == NameToken.Annots) { var val = kvp.Value; if (kvp.Value is IndirectReferenceToken ir) { ObjectToken tk = document.Structure.TokenScanner.Get(ir.Data); if (tk == null) { // malformed continue; } val = tk.Data; } if (!(val is ArrayToken arr)) { // should be array... ignore and remove bad dict continue; } // -> ignore links to resolve issues with refencing non-existing pages // at some point should add support for copying the links if the // pages are copied as well but for now just fix corruption var toAdd = new List <IToken>(); foreach (var annot in arr.Data) { DictionaryToken tk = GetRemoteDict(annot); if (tk == null) { // malformed continue; } if (tk.TryGet(NameToken.Subtype, out var st) && st is NameToken nm && nm == NameToken.Link) { // link -> ignore continue; } toAdd.Add(WriterUtil.CopyToken(context, tk, document.Structure.TokenScanner, refs)); } // copy rest copiedPageDict[NameToken.Annots] = new ArrayToken(toAdd); continue; } copiedPageDict[NameToken.Create(kvp.Key)] = WriterUtil.CopyToken(context, kvp.Value, document.Structure.TokenScanner, refs); } copiedPageDict[NameToken.Resources] = new DictionaryToken(resources); var builder = new PdfPageBuilder(pages.Count + 1, this, streams, copiedPageDict); pages[builder.PageNumber] = builder; return(builder); void CopyResourceDict(IToken token, Dictionary <NameToken, IToken> destinationDict) { DictionaryToken dict = GetRemoteDict(token); if (dict == null) { return; } foreach (var item in dict.Data) { if (!destinationDict.ContainsKey(NameToken.Create(item.Key))) { if (item.Value is IndirectReferenceToken ir) { // convert indirect to direct as PdfPageBuilder needs to modify resource entries var obj = document.Structure.TokenScanner.Get(ir.Data); if (obj.Data is StreamToken) { // rare case, have seen /SubType as stream token, can't make direct destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, item.Value, document.Structure.TokenScanner, refs); } else { destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, obj.Data, document.Structure.TokenScanner, refs); } } else { destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, item.Value, document.Structure.TokenScanner, refs); } continue; } var subDict = GetRemoteDict(item.Value); var destSubDict = destinationDict[NameToken.Create(item.Key)] as DictionaryToken; if (destSubDict == null || subDict == null) { // not a dict.. just overwrite with more important one? should maybe check arrays? if (item.Value is IndirectReferenceToken ir) { // convert indirect to direct as PdfPageBuilder needs to modify resource entries destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, document.Structure.TokenScanner.Get(ir.Data).Data, document.Structure.TokenScanner, refs); } else { destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, item.Value, document.Structure.TokenScanner, refs); } continue; } foreach (var subItem in subDict.Data) { // last copied most important important destinationDict[NameToken.Create(subItem.Key)] = WriterUtil.CopyToken(context, subItem.Value, document.Structure.TokenScanner, refs); } } } DictionaryToken GetRemoteDict(IToken token) { DictionaryToken dict = null; if (token is IndirectReferenceToken ir) { dict = document.Structure.TokenScanner.Get(ir.Data).Data as DictionaryToken; } else if (token is DictionaryToken dt) { dict = dt; } return(dict); } }
/// <summary> /// Add a new page with the specified size, this page will be included in the output when <see cref="Build"/> is called. /// </summary> /// <param name="document">Source document.</param> /// <param name="pageNumber">Page to copy.</param> /// <returns>A builder for editing the page.</returns> public PdfPageBuilder AddPage(PdfDocument document, int pageNumber) { if (!existingCopies.TryGetValue(document.Structure.TokenScanner, out var refs)) { refs = new Dictionary <IndirectReference, IndirectReferenceToken>(); existingCopies.Add(document.Structure.TokenScanner, refs); } if (!existingTrees.TryGetValue(document, out var pagesInfos)) { pagesInfos = new Dictionary <int, PageInfo>(); int i = 1; foreach (var(pageDict, parents) in WriterUtil.WalkTree(document.Structure.Catalog.PageTree)) { pagesInfos[i] = new PageInfo { Page = pageDict, Parents = parents }; i++; } existingTrees.Add(document, pagesInfos); } if (!pagesInfos.TryGetValue(pageNumber, out PageInfo pageInfo)) { throw new KeyNotFoundException($"Page {pageNumber} was not found in the source document."); } // copy content streams var streams = new List <PdfPageBuilder.CopiedContentStream>(); if (pageInfo.Page.TryGet(NameToken.Contents, out IToken contentsToken)) { if (contentsToken is ArrayToken array) { foreach (var item in array.Data) { if (item is IndirectReferenceToken ir) { streams.Add(new PdfPageBuilder.CopiedContentStream( WriterUtil.CopyToken(context, ir, document.Structure.TokenScanner, refs) as IndirectReferenceToken)); } } } else if (contentsToken is IndirectReferenceToken ir) { streams.Add(new PdfPageBuilder.CopiedContentStream( WriterUtil.CopyToken(context, ir, document.Structure.TokenScanner, refs) as IndirectReferenceToken)); } } // manually copy page dict / resources as we need to modify some var copiedPageDict = new Dictionary <NameToken, IToken>(); Dictionary <NameToken, IToken> resources = new Dictionary <NameToken, IToken>(); // just put all parent resources into new page foreach (var dict in pageInfo.Parents) { if (dict.TryGet(NameToken.Resources, out var resourceToken)) { CopyResourceDict(resourceToken, resources); } } foreach (var kvp in pageInfo.Page.Data) { if (kvp.Key == NameToken.Contents || kvp.Key == NameToken.Parent || kvp.Key == NameToken.Type) { continue; } if (kvp.Key == NameToken.Resources) { CopyResourceDict(kvp.Value, resources); continue; } copiedPageDict[NameToken.Create(kvp.Key)] = WriterUtil.CopyToken(context, kvp.Value, document.Structure.TokenScanner, refs); } copiedPageDict[NameToken.Resources] = new DictionaryToken(resources); var builder = new PdfPageBuilder(pages.Count + 1, this, streams, copiedPageDict); pages[builder.PageNumber] = builder; return(builder); void CopyResourceDict(IToken token, Dictionary <NameToken, IToken> destinationDict) { DictionaryToken dict = GetRemoteDict(token); if (dict == null) { return; } foreach (var item in dict.Data) { if (!destinationDict.ContainsKey(NameToken.Create(item.Key))) { if (item.Value is IndirectReferenceToken ir) { // convert indirect to direct as PdfPageBuilder needs to modify resource entries destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, document.Structure.TokenScanner.Get(ir.Data).Data, document.Structure.TokenScanner, refs); } else { destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, item.Value, document.Structure.TokenScanner, refs); } continue; } var subDict = GetRemoteDict(item.Value); var destSubDict = destinationDict[NameToken.Create(item.Key)] as DictionaryToken; if (destSubDict == null || subDict == null) { // not a dict.. just overwrite with more important one? should maybe check arrays? if (item.Value is IndirectReferenceToken ir) { // convert indirect to direct as PdfPageBuilder needs to modify resource entries destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, document.Structure.TokenScanner.Get(ir.Data).Data, document.Structure.TokenScanner, refs); } else { destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, item.Value, document.Structure.TokenScanner, refs); } continue; } foreach (var subItem in subDict.Data) { // last copied most important important destinationDict[NameToken.Create(subItem.Key)] = WriterUtil.CopyToken(context, subItem.Value, document.Structure.TokenScanner, refs); } } } DictionaryToken GetRemoteDict(IToken token) { DictionaryToken dict = null; if (token is IndirectReferenceToken ir) { dict = document.Structure.TokenScanner.Get(ir.Data).Data as DictionaryToken; } else if (token is DictionaryToken dt) { dict = dt; } return(dict); } }