Esempio n. 1
0
        internal IToken CopyToken(IPdfTokenScanner source, IToken token)
        {
            if (!existingCopies.TryGetValue(source, out var refs))
            {
                refs = new Dictionary <IndirectReference, IndirectReferenceToken>();
                existingCopies.Add(source, refs);
            }

            return(WriterUtil.CopyToken(context, token, source, refs));
        }
Esempio n. 2
0
        /// <summary>
        /// Add a new page with the specified size, this page will be included in the output when <see cref="Build"/> is called.
        /// </summary>
        /// <param name="document">Source document.</param>
        /// <param name="pageNumber">Page to copy.</param>
        /// <returns>A builder for editing the page.</returns>
        public PdfPageBuilder AddPage(PdfDocument document, int pageNumber)
        {
            if (!existingCopies.TryGetValue(document.Structure.TokenScanner, out var refs))
            {
                refs = new Dictionary <IndirectReference, IndirectReferenceToken>();
                existingCopies.Add(document.Structure.TokenScanner, refs);
            }

            if (!existingTrees.TryGetValue(document, out var pagesInfos))
            {
                pagesInfos = new Dictionary <int, PageInfo>();
                int i = 1;
                foreach (var(pageDict, parents) in WriterUtil.WalkTree(document.Structure.Catalog.PageTree))
                {
                    pagesInfos[i] = new PageInfo
                    {
                        Page    = pageDict,
                        Parents = parents
                    };
                    i++;
                }

                existingTrees.Add(document, pagesInfos);
            }

            if (!pagesInfos.TryGetValue(pageNumber, out PageInfo pageInfo))
            {
                throw new KeyNotFoundException($"Page {pageNumber} was not found in the source document.");
            }

            // copy content streams
            var streams = new List <PdfPageBuilder.CopiedContentStream>();

            if (pageInfo.Page.TryGet(NameToken.Contents, out IToken contentsToken))
            {
                // Adobe Acrobat errors if content streams ref'd by multiple pages, turn off
                // dedup if on to avoid issues
                var prev = context.AttemptDeduplication;
                context.AttemptDeduplication = false;
                if (contentsToken is ArrayToken array)
                {
                    foreach (var item in array.Data)
                    {
                        if (item is IndirectReferenceToken ir)
                        {
                            streams.Add(new PdfPageBuilder.CopiedContentStream(
                                            WriterUtil.CopyToken(context, ir, document.Structure.TokenScanner, refs) as IndirectReferenceToken));
                        }
                    }
                }
                else if (contentsToken is IndirectReferenceToken ir)
                {
                    streams.Add(new PdfPageBuilder.CopiedContentStream(
                                    WriterUtil.CopyToken(context, ir, document.Structure.TokenScanner, refs) as IndirectReferenceToken));
                }
                context.AttemptDeduplication = prev;
            }

            // manually copy page dict / resources as we need to modify some
            var copiedPageDict = new Dictionary <NameToken, IToken>();
            Dictionary <NameToken, IToken> resources = new Dictionary <NameToken, IToken>();

            // just put all parent resources into new page
            foreach (var dict in pageInfo.Parents)
            {
                if (dict.TryGet(NameToken.Resources, out var resourceToken))
                {
                    CopyResourceDict(resourceToken, resources);
                }
                if (dict.TryGet(NameToken.MediaBox, out var mb))
                {
                    copiedPageDict[NameToken.MediaBox] = WriterUtil.CopyToken(context, mb, document.Structure.TokenScanner, refs);
                }
                if (dict.TryGet(NameToken.CropBox, out var cb))
                {
                    copiedPageDict[NameToken.CropBox] = WriterUtil.CopyToken(context, cb, document.Structure.TokenScanner, refs);
                }
                if (dict.TryGet(NameToken.Rotate, out var rt))
                {
                    copiedPageDict[NameToken.Rotate] = WriterUtil.CopyToken(context, rt, document.Structure.TokenScanner, refs);
                }
            }


            foreach (var kvp in pageInfo.Page.Data)
            {
                if (kvp.Key == NameToken.Contents || kvp.Key == NameToken.Parent || kvp.Key == NameToken.Type)
                {
                    // don't copy these as they'll be handled during page tree writing
                    continue;
                }

                if (kvp.Key == NameToken.Resources)
                {
                    // merge parent resources into child
                    CopyResourceDict(kvp.Value, resources);
                    continue;
                }

                if (kvp.Key == NameToken.Annots)
                {
                    var val = kvp.Value;
                    if (kvp.Value is IndirectReferenceToken ir)
                    {
                        ObjectToken tk = document.Structure.TokenScanner.Get(ir.Data);
                        if (tk == null)
                        {
                            // malformed
                            continue;
                        }
                        val = tk.Data;
                    }

                    if (!(val is ArrayToken arr))
                    {
                        // should be array... ignore and remove bad dict
                        continue;
                    }

                    // -> ignore links to resolve issues with refencing non-existing pages
                    // at some point should add support for copying the links if the
                    // pages are copied as well but for now just fix corruption
                    var toAdd = new List <IToken>();
                    foreach (var annot in arr.Data)
                    {
                        DictionaryToken tk = GetRemoteDict(annot);
                        if (tk == null)
                        {
                            // malformed
                            continue;
                        }
                        if (tk.TryGet(NameToken.Subtype, out var st) && st is NameToken nm && nm == NameToken.Link)
                        {
                            // link -> ignore
                            continue;
                        }
                        toAdd.Add(WriterUtil.CopyToken(context, tk, document.Structure.TokenScanner, refs));
                    }
                    // copy rest
                    copiedPageDict[NameToken.Annots] = new ArrayToken(toAdd);
                    continue;
                }

                copiedPageDict[NameToken.Create(kvp.Key)] =
                    WriterUtil.CopyToken(context, kvp.Value, document.Structure.TokenScanner, refs);
            }

            copiedPageDict[NameToken.Resources] = new DictionaryToken(resources);

            var builder = new PdfPageBuilder(pages.Count + 1, this, streams, copiedPageDict);

            pages[builder.PageNumber] = builder;
            return(builder);

            void CopyResourceDict(IToken token, Dictionary <NameToken, IToken> destinationDict)
            {
                DictionaryToken dict = GetRemoteDict(token);

                if (dict == null)
                {
                    return;
                }
                foreach (var item in dict.Data)
                {
                    if (!destinationDict.ContainsKey(NameToken.Create(item.Key)))
                    {
                        if (item.Value is IndirectReferenceToken ir)
                        {
                            // convert indirect to direct as PdfPageBuilder needs to modify resource entries
                            var obj = document.Structure.TokenScanner.Get(ir.Data);
                            if (obj.Data is StreamToken)
                            {
                                // rare case, have seen /SubType as stream token, can't make direct
                                destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, item.Value, document.Structure.TokenScanner, refs);
                            }
                            else
                            {
                                destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, obj.Data, document.Structure.TokenScanner, refs);
                            }
                        }
                        else
                        {
                            destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, item.Value, document.Structure.TokenScanner, refs);
                        }

                        continue;
                    }

                    var subDict     = GetRemoteDict(item.Value);
                    var destSubDict = destinationDict[NameToken.Create(item.Key)] as DictionaryToken;
                    if (destSubDict == null || subDict == null)
                    {
                        // not a dict.. just overwrite with more important one? should maybe check arrays?
                        if (item.Value is IndirectReferenceToken ir)
                        {
                            // convert indirect to direct as PdfPageBuilder needs to modify resource entries
                            destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, document.Structure.TokenScanner.Get(ir.Data).Data, document.Structure.TokenScanner, refs);
                        }
                        else
                        {
                            destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, item.Value, document.Structure.TokenScanner, refs);
                        }
                        continue;
                    }
                    foreach (var subItem in subDict.Data)
                    {
                        // last copied most important important
                        destinationDict[NameToken.Create(subItem.Key)] = WriterUtil.CopyToken(context, subItem.Value,
                                                                                              document.Structure.TokenScanner, refs);
                    }
                }
            }

            DictionaryToken GetRemoteDict(IToken token)
            {
                DictionaryToken dict = null;

                if (token is IndirectReferenceToken ir)
                {
                    dict = document.Structure.TokenScanner.Get(ir.Data).Data as DictionaryToken;
                }
                else if (token is DictionaryToken dt)
                {
                    dict = dt;
                }
                return(dict);
            }
        }
Esempio n. 3
0
        /// <summary>
        /// Add a new page with the specified size, this page will be included in the output when <see cref="Build"/> is called.
        /// </summary>
        /// <param name="document">Source document.</param>
        /// <param name="pageNumber">Page to copy.</param>
        /// <returns>A builder for editing the page.</returns>
        public PdfPageBuilder AddPage(PdfDocument document, int pageNumber)
        {
            if (!existingCopies.TryGetValue(document.Structure.TokenScanner, out var refs))
            {
                refs = new Dictionary <IndirectReference, IndirectReferenceToken>();
                existingCopies.Add(document.Structure.TokenScanner, refs);
            }

            if (!existingTrees.TryGetValue(document, out var pagesInfos))
            {
                pagesInfos = new Dictionary <int, PageInfo>();
                int i = 1;
                foreach (var(pageDict, parents) in WriterUtil.WalkTree(document.Structure.Catalog.PageTree))
                {
                    pagesInfos[i] = new PageInfo
                    {
                        Page = pageDict, Parents = parents
                    };
                    i++;
                }

                existingTrees.Add(document, pagesInfos);
            }

            if (!pagesInfos.TryGetValue(pageNumber, out PageInfo pageInfo))
            {
                throw new KeyNotFoundException($"Page {pageNumber} was not found in the source document.");
            }

            // copy content streams
            var streams = new List <PdfPageBuilder.CopiedContentStream>();

            if (pageInfo.Page.TryGet(NameToken.Contents, out IToken contentsToken))
            {
                if (contentsToken is ArrayToken array)
                {
                    foreach (var item in array.Data)
                    {
                        if (item is IndirectReferenceToken ir)
                        {
                            streams.Add(new PdfPageBuilder.CopiedContentStream(
                                            WriterUtil.CopyToken(context, ir, document.Structure.TokenScanner, refs) as IndirectReferenceToken));
                        }
                    }
                }
                else if (contentsToken is IndirectReferenceToken ir)
                {
                    streams.Add(new PdfPageBuilder.CopiedContentStream(
                                    WriterUtil.CopyToken(context, ir, document.Structure.TokenScanner, refs) as IndirectReferenceToken));
                }
            }

            // manually copy page dict / resources as we need to modify some
            var copiedPageDict = new Dictionary <NameToken, IToken>();
            Dictionary <NameToken, IToken> resources = new Dictionary <NameToken, IToken>();

            // just put all parent resources into new page
            foreach (var dict in pageInfo.Parents)
            {
                if (dict.TryGet(NameToken.Resources, out var resourceToken))
                {
                    CopyResourceDict(resourceToken, resources);
                }
            }


            foreach (var kvp in pageInfo.Page.Data)
            {
                if (kvp.Key == NameToken.Contents || kvp.Key == NameToken.Parent || kvp.Key == NameToken.Type)
                {
                    continue;
                }

                if (kvp.Key == NameToken.Resources)
                {
                    CopyResourceDict(kvp.Value, resources);
                    continue;
                }

                copiedPageDict[NameToken.Create(kvp.Key)] =
                    WriterUtil.CopyToken(context, kvp.Value, document.Structure.TokenScanner, refs);
            }

            copiedPageDict[NameToken.Resources] = new DictionaryToken(resources);

            var builder = new PdfPageBuilder(pages.Count + 1, this, streams, copiedPageDict);

            pages[builder.PageNumber] = builder;
            return(builder);

            void CopyResourceDict(IToken token, Dictionary <NameToken, IToken> destinationDict)
            {
                DictionaryToken dict = GetRemoteDict(token);

                if (dict == null)
                {
                    return;
                }
                foreach (var item in dict.Data)
                {
                    if (!destinationDict.ContainsKey(NameToken.Create(item.Key)))
                    {
                        if (item.Value is IndirectReferenceToken ir)
                        {
                            // convert indirect to direct as PdfPageBuilder needs to modify resource entries
                            destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, document.Structure.TokenScanner.Get(ir.Data).Data, document.Structure.TokenScanner, refs);
                        }
                        else
                        {
                            destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, item.Value, document.Structure.TokenScanner, refs);
                        }

                        continue;
                    }

                    var subDict     = GetRemoteDict(item.Value);
                    var destSubDict = destinationDict[NameToken.Create(item.Key)] as DictionaryToken;
                    if (destSubDict == null || subDict == null)
                    {
                        // not a dict.. just overwrite with more important one? should maybe check arrays?
                        if (item.Value is IndirectReferenceToken ir)
                        {
                            // convert indirect to direct as PdfPageBuilder needs to modify resource entries
                            destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, document.Structure.TokenScanner.Get(ir.Data).Data, document.Structure.TokenScanner, refs);
                        }
                        else
                        {
                            destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, item.Value, document.Structure.TokenScanner, refs);
                        }
                        continue;
                    }
                    foreach (var subItem in subDict.Data)
                    {
                        // last copied most important important
                        destinationDict[NameToken.Create(subItem.Key)] = WriterUtil.CopyToken(context, subItem.Value,
                                                                                              document.Structure.TokenScanner, refs);
                    }
                }
            }

            DictionaryToken GetRemoteDict(IToken token)
            {
                DictionaryToken dict = null;

                if (token is IndirectReferenceToken ir)
                {
                    dict = document.Structure.TokenScanner.Get(ir.Data).Data as DictionaryToken;
                }
                else if (token is DictionaryToken dt)
                {
                    dict = dt;
                }
                return(dict);
            }
        }