private static int SeparateStructure(PdfDocument document, int startPage, int beforePage, int startPageStructTopIndex ) { if (!document.IsTagged() || 1 > startPage || startPage > beforePage || beforePage > document.GetNumberOfPages () + 1) { return -1; } else { if (beforePage == startPage) { return startPageStructTopIndex; } else { if (beforePage == document.GetNumberOfPages() + 1) { return document.GetStructTreeRoot().GetKidsObject().Size(); } } } // Here we separate the structure tree in two parts: struct elems that belong to the pages which indexes are // less then separateBeforePage and those struct elems that belong to other pages. Some elems might belong // to both parts and actually these are the ones that we are looking for. ICollection<PdfObject> firstPartElems = new HashSet<PdfObject>(); for (int i = startPage; i < beforePage; ++i) { PdfPage pageOfFirstHalf = document.GetPage(i); ICollection<PdfMcr> pageMcrs = document.GetStructTreeRoot().GetPageMarkedContentReferences(pageOfFirstHalf ); if (pageMcrs != null) { foreach (PdfMcr mcr in pageMcrs) { firstPartElems.Add(mcr.GetPdfObject()); PdfDictionary top = AddAllParentsToSet(mcr, firstPartElems); if (top != null && top.IsFlushed()) { throw new PdfException(PdfException.TagFromTheExistingTagStructureIsFlushedCannotAddCopiedPageTags); } } } } IList<PdfDictionary> clonedTops = new List<PdfDictionary>(); PdfArray tops = document.GetStructTreeRoot().GetKidsObject(); // Now we "walk" through all the elems which belong to the first part, and look for the ones that contain both // kids from first and second part. We clone found elements and move kids from the second part to cloned elems. int lastTopBefore = startPageStructTopIndex - 1; for (int i = 0; i < tops.Size(); ++i) { PdfDictionary top = tops.GetAsDictionary(i); if (firstPartElems.Contains(top)) { lastTopBefore = i; StructureTreeCopier.LastClonedAncestor lastCloned = new StructureTreeCopier.LastClonedAncestor(); lastCloned.ancestor = top; PdfDictionary topClone = top.Clone(ignoreKeysForClone); topClone.Put(PdfName.P, document.GetStructTreeRoot().GetPdfObject()); lastCloned.clone = topClone; SeparateKids(top, firstPartElems, lastCloned, document); if (topClone.ContainsKey(PdfName.K)) { topClone.MakeIndirect(document); clonedTops.Add(topClone); } } } for (int i = 0; i < clonedTops.Count; ++i) { document.GetStructTreeRoot().AddKidObject(lastTopBefore + 1 + i, clonedTops[i]); } return lastTopBefore + 1; }
private static void CloneParents(PdfDictionary structElem, StructureTreeCopier.LastClonedAncestor lastCloned , PdfDocument document) { if (lastCloned.ancestor != structElem) { PdfDictionary structElemClone = (PdfDictionary)structElem.Clone(ignoreKeysForClone).MakeIndirect(document); PdfDictionary currClone = structElemClone; PdfDictionary currElem = structElem; while (currElem.Get(PdfName.P) != lastCloned.ancestor) { PdfDictionary parent = currElem.GetAsDictionary(PdfName.P); PdfDictionary parentClone = (PdfDictionary)parent.Clone(ignoreKeysForClone).MakeIndirect(document); currClone.Put(PdfName.P, parentClone); parentClone.Put(PdfName.K, currClone); currClone = parentClone; currElem = parent; } PdfStructElem.AddKidObject(lastCloned.clone, -1, currClone); lastCloned.clone = structElemClone; lastCloned.ancestor = structElem; } }
private static void SeparateKids(PdfDictionary structElem, ICollection <PdfObject> firstPartElems, StructureTreeCopier.LastClonedAncestor lastCloned) { PdfObject k = structElem.Get(PdfName.K); // If /K entry is not a PdfArray - it would be a kid which we won't clone at the moment, because it won't contain // kids from both parts at the same time. It would either be cloned as an ancestor later, or not cloned at all. // If it's kid is struct elem - it would definitely be structElem from the first part, so we simply call separateKids for it. if (!k.IsArray()) { if (k.IsDictionary() && PdfStructElem.IsStructElem((PdfDictionary)k)) { SeparateKids((PdfDictionary)k, firstPartElems, lastCloned); } } else { PdfDocument document = structElem.GetIndirectReference().GetDocument(); PdfArray kids = (PdfArray)k; for (int i = 0; i < kids.Size(); ++i) { PdfObject kid = kids.Get(i); PdfDictionary dictKid = null; if (kid.IsDictionary()) { dictKid = (PdfDictionary)kid; } if (dictKid != null && PdfStructElem.IsStructElem(dictKid)) { if (firstPartElems.Contains(kid)) { SeparateKids((PdfDictionary)kid, firstPartElems, lastCloned); } else { if (dictKid.IsFlushed()) { throw new PdfException(PdfException.TagFromTheExistingTagStructureIsFlushedCannotAddCopiedPageTags); } // elems with no kids will not be marked as from the first part, // but nonetheless we don't want to move all of them to the second part; we just leave them as is if (dictKid.ContainsKey(PdfName.K)) { CloneParents(structElem, lastCloned, document); kids.Remove(i--); PdfStructElem.AddKidObject(lastCloned.clone, -1, kid); } } } else { if (!firstPartElems.Contains(kid)) { CloneParents(structElem, lastCloned, document); PdfMcr mcr; if (dictKid != null) { if (dictKid.Get(PdfName.Type).Equals(PdfName.MCR)) { mcr = new PdfMcrDictionary(dictKid, new PdfStructElem(lastCloned.clone)); } else { mcr = new PdfObjRef(dictKid, new PdfStructElem(lastCloned.clone)); } } else { mcr = new PdfMcrNumber((PdfNumber)kid, new PdfStructElem(lastCloned.clone)); } kids.Remove(i--); PdfStructElem.AddKidObject(lastCloned.clone, -1, kid); document.GetStructTreeRoot().GetParentTreeHandler().RegisterMcr(mcr); } } } } // re-register mcr if (lastCloned.ancestor == structElem) { lastCloned.ancestor = lastCloned.ancestor.GetAsDictionary(PdfName.P); lastCloned.clone = lastCloned.clone.GetAsDictionary(PdfName.P); } }
/// <summary> /// Copies structure to a /// <paramref name="destDocument"/> /// and insert it in a specified position in the document. /// <br/><br/> /// NOTE: Works only for /// <c>PdfStructTreeRoot</c> /// that is read from the document opened in reading mode, /// otherwise an exception is thrown. /// <br/> /// Also, to insert a tagged page into existing tag structure, existing tag structure shouldn't be flushed, otherwise /// an exception may be raised. /// </summary> /// <param name="destDocument">document to copy structure to.</param> /// <param name="insertBeforePage">indicates where the structure to be inserted.</param> /// <param name="page2page">association between original page and copied page.</param> public static void CopyTo(PdfDocument destDocument, int insertBeforePage, IDictionary <PdfPage, PdfPage> page2page , PdfDocument callingDocument) { if (!destDocument.IsTagged()) { return; } // Here we separate the structure tree in two parts: struct elems that belong to the pages which indexes are // less then insertBeforePage and those struct elems that belong to other pages. Some elems might belong // to both parts and actually these are the ones that we are looking for. ICollection <PdfObject> firstPartElems = new HashSet <PdfObject>(); PdfStructTreeRoot destStructTreeRoot = destDocument.GetStructTreeRoot(); for (int i = 1; i < insertBeforePage; ++i) { PdfPage pageOfFirstHalf = destDocument.GetPage(i); ICollection <PdfMcr> pageMcrs = destStructTreeRoot.GetPageMarkedContentReferences(pageOfFirstHalf); if (pageMcrs != null) { foreach (PdfMcr mcr in pageMcrs) { firstPartElems.Add(mcr.GetPdfObject()); PdfDictionary top = AddAllParentsToSet(mcr, firstPartElems); if (top.IsFlushed()) { throw new PdfException(PdfException.TagFromTheExistingTagStructureIsFlushedCannotAddCopiedPageTags); } } } } IList <PdfDictionary> clonedTops = new List <PdfDictionary>(); PdfArray tops = destStructTreeRoot.GetKidsObject(); // Now we "walk" through all the elems which belong to the first part, and look for the ones that contain both // kids from first and second part. We clone found elements and move kids from the second part to cloned elems. int lastTopBefore = 0; for (int i_1 = 0; i_1 < tops.Size(); ++i_1) { PdfDictionary top = tops.GetAsDictionary(i_1); if (firstPartElems.Contains(top)) { lastTopBefore = i_1; StructureTreeCopier.LastClonedAncestor lastCloned = new StructureTreeCopier.LastClonedAncestor(); lastCloned.ancestor = top; PdfDictionary topClone = top.Clone(ignoreKeysForClone); topClone.Put(PdfName.P, destStructTreeRoot.GetPdfObject()); lastCloned.clone = topClone; SeparateKids(top, firstPartElems, lastCloned); if (topClone.ContainsKey(PdfName.K)) { topClone.MakeIndirect(destDocument); clonedTops.Add(topClone); } } } for (int i_2 = 0; i_2 < clonedTops.Count; ++i_2) { destStructTreeRoot.AddKidObject(lastTopBefore + 1 + i_2, clonedTops[i_2]); } CopyTo(destDocument, page2page, callingDocument, false, lastTopBefore + 1); }