private static int SeparateStructure(PdfDocument document, int startPage, int beforePage, int startPageStructTopIndex
     ) {
     if (!document.IsTagged() || 1 > startPage || startPage > beforePage || beforePage > document.GetNumberOfPages
         () + 1) {
         return -1;
     }
     else {
         if (beforePage == startPage) {
             return startPageStructTopIndex;
         }
         else {
             if (beforePage == document.GetNumberOfPages() + 1) {
                 return document.GetStructTreeRoot().GetKidsObject().Size();
             }
         }
     }
     // Here we separate the structure tree in two parts: struct elems that belong to the pages which indexes are
     // less then separateBeforePage and those struct elems that belong to other pages. Some elems might belong
     // to both parts and actually these are the ones that we are looking for.
     ICollection<PdfObject> firstPartElems = new HashSet<PdfObject>();
     for (int i = startPage; i < beforePage; ++i) {
         PdfPage pageOfFirstHalf = document.GetPage(i);
         ICollection<PdfMcr> pageMcrs = document.GetStructTreeRoot().GetPageMarkedContentReferences(pageOfFirstHalf
             );
         if (pageMcrs != null) {
             foreach (PdfMcr mcr in pageMcrs) {
                 firstPartElems.Add(mcr.GetPdfObject());
                 PdfDictionary top = AddAllParentsToSet(mcr, firstPartElems);
                 if (top != null && top.IsFlushed()) {
                     throw new PdfException(PdfException.TagFromTheExistingTagStructureIsFlushedCannotAddCopiedPageTags);
                 }
             }
         }
     }
     IList<PdfDictionary> clonedTops = new List<PdfDictionary>();
     PdfArray tops = document.GetStructTreeRoot().GetKidsObject();
     // Now we "walk" through all the elems which belong to the first part, and look for the ones that contain both
     // kids from first and second part. We clone found elements and move kids from the second part to cloned elems.
     int lastTopBefore = startPageStructTopIndex - 1;
     for (int i = 0; i < tops.Size(); ++i) {
         PdfDictionary top = tops.GetAsDictionary(i);
         if (firstPartElems.Contains(top)) {
             lastTopBefore = i;
             StructureTreeCopier.LastClonedAncestor lastCloned = new StructureTreeCopier.LastClonedAncestor();
             lastCloned.ancestor = top;
             PdfDictionary topClone = top.Clone(ignoreKeysForClone);
             topClone.Put(PdfName.P, document.GetStructTreeRoot().GetPdfObject());
             lastCloned.clone = topClone;
             SeparateKids(top, firstPartElems, lastCloned, document);
             if (topClone.ContainsKey(PdfName.K)) {
                 topClone.MakeIndirect(document);
                 clonedTops.Add(topClone);
             }
         }
     }
     for (int i = 0; i < clonedTops.Count; ++i) {
         document.GetStructTreeRoot().AddKidObject(lastTopBefore + 1 + i, clonedTops[i]);
     }
     return lastTopBefore + 1;
 }
 private static void CloneParents(PdfDictionary structElem, StructureTreeCopier.LastClonedAncestor lastCloned
     , PdfDocument document) {
     if (lastCloned.ancestor != structElem) {
         PdfDictionary structElemClone = (PdfDictionary)structElem.Clone(ignoreKeysForClone).MakeIndirect(document);
         PdfDictionary currClone = structElemClone;
         PdfDictionary currElem = structElem;
         while (currElem.Get(PdfName.P) != lastCloned.ancestor) {
             PdfDictionary parent = currElem.GetAsDictionary(PdfName.P);
             PdfDictionary parentClone = (PdfDictionary)parent.Clone(ignoreKeysForClone).MakeIndirect(document);
             currClone.Put(PdfName.P, parentClone);
             parentClone.Put(PdfName.K, currClone);
             currClone = parentClone;
             currElem = parent;
         }
         PdfStructElem.AddKidObject(lastCloned.clone, -1, currClone);
         lastCloned.clone = structElemClone;
         lastCloned.ancestor = structElem;
     }
 }
Beispiel #3
0
        private static void SeparateKids(PdfDictionary structElem, ICollection <PdfObject> firstPartElems, StructureTreeCopier.LastClonedAncestor
                                         lastCloned)
        {
            PdfObject k = structElem.Get(PdfName.K);

            // If /K entry is not a PdfArray - it would be a kid which we won't clone at the moment, because it won't contain
            // kids from both parts at the same time. It would either be cloned as an ancestor later, or not cloned at all.
            // If it's kid is struct elem - it would definitely be structElem from the first part, so we simply call separateKids for it.
            if (!k.IsArray())
            {
                if (k.IsDictionary() && PdfStructElem.IsStructElem((PdfDictionary)k))
                {
                    SeparateKids((PdfDictionary)k, firstPartElems, lastCloned);
                }
            }
            else
            {
                PdfDocument document = structElem.GetIndirectReference().GetDocument();
                PdfArray    kids     = (PdfArray)k;
                for (int i = 0; i < kids.Size(); ++i)
                {
                    PdfObject     kid     = kids.Get(i);
                    PdfDictionary dictKid = null;
                    if (kid.IsDictionary())
                    {
                        dictKid = (PdfDictionary)kid;
                    }
                    if (dictKid != null && PdfStructElem.IsStructElem(dictKid))
                    {
                        if (firstPartElems.Contains(kid))
                        {
                            SeparateKids((PdfDictionary)kid, firstPartElems, lastCloned);
                        }
                        else
                        {
                            if (dictKid.IsFlushed())
                            {
                                throw new PdfException(PdfException.TagFromTheExistingTagStructureIsFlushedCannotAddCopiedPageTags);
                            }
                            // elems with no kids will not be marked as from the first part,
                            // but nonetheless we don't want to move all of them to the second part; we just leave them as is
                            if (dictKid.ContainsKey(PdfName.K))
                            {
                                CloneParents(structElem, lastCloned, document);
                                kids.Remove(i--);
                                PdfStructElem.AddKidObject(lastCloned.clone, -1, kid);
                            }
                        }
                    }
                    else
                    {
                        if (!firstPartElems.Contains(kid))
                        {
                            CloneParents(structElem, lastCloned, document);
                            PdfMcr mcr;
                            if (dictKid != null)
                            {
                                if (dictKid.Get(PdfName.Type).Equals(PdfName.MCR))
                                {
                                    mcr = new PdfMcrDictionary(dictKid, new PdfStructElem(lastCloned.clone));
                                }
                                else
                                {
                                    mcr = new PdfObjRef(dictKid, new PdfStructElem(lastCloned.clone));
                                }
                            }
                            else
                            {
                                mcr = new PdfMcrNumber((PdfNumber)kid, new PdfStructElem(lastCloned.clone));
                            }
                            kids.Remove(i--);
                            PdfStructElem.AddKidObject(lastCloned.clone, -1, kid);
                            document.GetStructTreeRoot().GetParentTreeHandler().RegisterMcr(mcr);
                        }
                    }
                }
            }
            // re-register mcr
            if (lastCloned.ancestor == structElem)
            {
                lastCloned.ancestor = lastCloned.ancestor.GetAsDictionary(PdfName.P);
                lastCloned.clone    = lastCloned.clone.GetAsDictionary(PdfName.P);
            }
        }
Beispiel #4
0
        /// <summary>
        /// Copies structure to a
        /// <paramref name="destDocument"/>
        /// and insert it in a specified position in the document.
        /// <br/><br/>
        /// NOTE: Works only for
        /// <c>PdfStructTreeRoot</c>
        /// that is read from the document opened in reading mode,
        /// otherwise an exception is thrown.
        /// <br/>
        /// Also, to insert a tagged page into existing tag structure, existing tag structure shouldn't be flushed, otherwise
        /// an exception may be raised.
        /// </summary>
        /// <param name="destDocument">document to copy structure to.</param>
        /// <param name="insertBeforePage">indicates where the structure to be inserted.</param>
        /// <param name="page2page">association between original page and copied page.</param>
        public static void CopyTo(PdfDocument destDocument, int insertBeforePage, IDictionary <PdfPage, PdfPage> page2page
                                  , PdfDocument callingDocument)
        {
            if (!destDocument.IsTagged())
            {
                return;
            }
            // Here we separate the structure tree in two parts: struct elems that belong to the pages which indexes are
            // less then insertBeforePage and those struct elems that belong to other pages. Some elems might belong
            // to both parts and actually these are the ones that we are looking for.
            ICollection <PdfObject> firstPartElems     = new HashSet <PdfObject>();
            PdfStructTreeRoot       destStructTreeRoot = destDocument.GetStructTreeRoot();

            for (int i = 1; i < insertBeforePage; ++i)
            {
                PdfPage pageOfFirstHalf       = destDocument.GetPage(i);
                ICollection <PdfMcr> pageMcrs = destStructTreeRoot.GetPageMarkedContentReferences(pageOfFirstHalf);
                if (pageMcrs != null)
                {
                    foreach (PdfMcr mcr in pageMcrs)
                    {
                        firstPartElems.Add(mcr.GetPdfObject());
                        PdfDictionary top = AddAllParentsToSet(mcr, firstPartElems);
                        if (top.IsFlushed())
                        {
                            throw new PdfException(PdfException.TagFromTheExistingTagStructureIsFlushedCannotAddCopiedPageTags);
                        }
                    }
                }
            }
            IList <PdfDictionary> clonedTops = new List <PdfDictionary>();
            PdfArray tops = destStructTreeRoot.GetKidsObject();
            // Now we "walk" through all the elems which belong to the first part, and look for the ones that contain both
            // kids from first and second part. We clone found elements and move kids from the second part to cloned elems.
            int lastTopBefore = 0;

            for (int i_1 = 0; i_1 < tops.Size(); ++i_1)
            {
                PdfDictionary top = tops.GetAsDictionary(i_1);
                if (firstPartElems.Contains(top))
                {
                    lastTopBefore = i_1;
                    StructureTreeCopier.LastClonedAncestor lastCloned = new StructureTreeCopier.LastClonedAncestor();
                    lastCloned.ancestor = top;
                    PdfDictionary topClone = top.Clone(ignoreKeysForClone);
                    topClone.Put(PdfName.P, destStructTreeRoot.GetPdfObject());
                    lastCloned.clone = topClone;
                    SeparateKids(top, firstPartElems, lastCloned);
                    if (topClone.ContainsKey(PdfName.K))
                    {
                        topClone.MakeIndirect(destDocument);
                        clonedTops.Add(topClone);
                    }
                }
            }
            for (int i_2 = 0; i_2 < clonedTops.Count; ++i_2)
            {
                destStructTreeRoot.AddKidObject(lastTopBefore + 1 + i_2, clonedTops[i_2]);
            }
            CopyTo(destDocument, page2page, callingDocument, false, lastTopBefore + 1);
        }