public override PdfObject GetDestinationPage(IDictionary <String, PdfObject> names) { PdfObject firstObj = ((PdfArray)GetPdfObject()).Get(0); if (firstObj.IsDictionary()) { PdfStructElem structElem = new PdfStructElem((PdfDictionary)firstObj); while (true) { IList <IStructureNode> kids = structElem.GetKids(); IStructureNode firstKid = kids.Count > 0 ? kids[0] : null; if (firstKid is PdfMcr) { return(((PdfMcr)firstKid).GetPageObject()); } else { if (firstKid is PdfStructElem) { structElem = (PdfStructElem)firstKid; } else { break; } } } } return(null); }
/// <summary>Transforms root tags in a way that complies with the PDF References.</summary> /// <remarks> /// Transforms root tags in a way that complies with the PDF References. /// <br/><br/> /// PDF Reference /// 10.7.3 Grouping Elements: /// <br/><br/> /// For most content extraction formats, the document must be a tree with a single top-level element; /// the structure tree root (identified by the StructTreeRoot entry in the document catalog) must have /// only one child in its K (kids) array. If the PDF file contains a complete document, the structure /// type Document is recommended for this top-level element in the logical structure hierarchy. If the /// file contains a well-formed document fragment, one of the structure types Part, Art, Sect, or Div /// may be used instead. /// </remarks> public virtual void NormalizeDocumentRootTag() { // in this method we could deal with existing document, so we don't won't to throw exceptions here bool forbid = forbidUnknownRoles; forbidUnknownRoles = false; IList <IPdfStructElem> rootKids = document.GetStructTreeRoot().GetKids(); if (rootKids.Count == 1 && allowedRootTagRoles.Contains(rootKids[0].GetRole())) { rootTagElement = (PdfStructElem)rootKids[0]; } else { PdfStructElem prevRootTag = rootTagElement; document.GetStructTreeRoot().GetPdfObject().Remove(PdfName.K); if (prevRootTag == null) { rootTagElement = document.GetStructTreeRoot().AddKid(new PdfStructElem(document, PdfName.Document)); } else { document.GetStructTreeRoot().AddKid(rootTagElement); if (!PdfName.Document.Equals(rootTagElement.GetRole())) { WrapAllKidsInTag(rootTagElement, rootTagElement.GetRole()); rootTagElement.SetRole(PdfName.Document); } } int originalRootKidsIndex = 0; bool isBeforeOriginalRoot = true; foreach (IPdfStructElem elem in rootKids) { // StructTreeRoot kids are always PdfStructElem, so we are save here to cast it PdfStructElem kid = (PdfStructElem)elem; if (kid.GetPdfObject() == rootTagElement.GetPdfObject()) { isBeforeOriginalRoot = false; continue; } bool kidIsDocument = PdfName.Document.Equals(kid.GetRole()); if (isBeforeOriginalRoot) { rootTagElement.AddKid(originalRootKidsIndex, kid); originalRootKidsIndex += kidIsDocument ? kid.GetKids().Count : 1; } else { rootTagElement.AddKid(kid); } if (kidIsDocument) { RemoveOldRoot(kid); } } } forbidUnknownRoles = forbid; }
private void WrapAllKidsInTag(PdfStructElem parent, PdfName wrapTagRole) { int kidsNum = parent.GetKids().Count; TagTreePointer tagPointer = new TagTreePointer(document); tagPointer.SetCurrentStructElem(parent).AddTag(0, wrapTagRole); TagTreePointer newParentOfKids = new TagTreePointer(tagPointer); tagPointer.MoveToParent(); for (int i = 0; i < kidsNum; ++i) { tagPointer.RelocateKid(1, newParentOfKids); } }
private void FlushStructElementAndItKids(PdfStructElem elem) { if (waitingTagToAssociatedObj.ContainsKey(elem.GetPdfObject())) { return; } foreach (IStructureNode kid in elem.GetKids()) { if (kid is PdfStructElem) { FlushStructElementAndItKids((PdfStructElem)kid); } } elem.Flush(); }
private void FlushStructElementAndItKids(PdfStructElem elem) { if (connectedStructToModel.ContainsKey(elem.GetPdfObject())) { return; } foreach (IPdfStructElem kid in elem.GetKids()) { if (kid is PdfStructElem) { FlushStructElementAndItKids((PdfStructElem)kid); } } elem.Flush(); }
/// <summary>Removes the current tag.</summary> /// <remarks> /// Removes the current tag. If it has kids, they will become kids of the current tag parent. /// This method call moves this /// <c>TagTreePointer</c> /// to the current tag parent. /// <br /><br /> /// You cannot remove root tag, and also you cannot remove the tag if it's parent is already flushed; /// in this two cases an exception will be thrown. /// </remarks> /// <returns> /// this /// <see cref="TagStructureContext"/> /// instance. /// </returns> public virtual iText.Kernel.Pdf.Tagutils.TagTreePointer RemoveTag() { PdfStructElem currentStructElem = GetCurrentStructElem(); IStructureNode parentElem = currentStructElem.GetParent(); if (parentElem is PdfStructTreeRoot) { throw new PdfException(PdfException.CannotRemoveDocumentRootTag); } IList <IStructureNode> kids = currentStructElem.GetKids(); PdfStructElem parent = (PdfStructElem)parentElem; if (parent.IsFlushed()) { throw new PdfException(PdfException.CannotRemoveTagBecauseItsParentIsFlushed); } // remove waiting tag state if tag is removed Object objForStructDict = tagStructureContext.GetWaitingTagsManager().GetObjForStructDict(currentStructElem .GetPdfObject()); tagStructureContext.GetWaitingTagsManager().RemoveWaitingState(objForStructDict); int removedKidIndex = parent.RemoveKid(currentStructElem); PdfIndirectReference indRef = currentStructElem.GetPdfObject().GetIndirectReference(); if (indRef != null) { // TODO how about possible references to structure element from refs or structure destination for instance? indRef.SetFree(); } foreach (IStructureNode kid in kids) { if (kid is PdfStructElem) { parent.AddKid(removedKidIndex++, (PdfStructElem)kid); } else { PdfMcr mcr = PrepareMcrForMovingToNewParent((PdfMcr)kid, parent); parent.AddKid(removedKidIndex++, mcr); } } currentStructElem.GetPdfObject().Clear(); SetCurrentStructElem(parent); return(this); }
private void WrapAllKidsInTag(PdfStructElem parent, PdfName wrapTagRole, PdfNamespace wrapTagNs) { int kidsNum = parent.GetKids().Count; TagTreePointer tagPointer = new TagTreePointer(parent, document); tagPointer.AddTag(0, wrapTagRole.GetValue()); if (context.TargetTagStructureVersionIs2()) { tagPointer.GetProperties().SetNamespace(wrapTagNs); } TagTreePointer newParentOfKids = new TagTreePointer(tagPointer); tagPointer.MoveToParent(); for (int i = 0; i < kidsNum; ++i) { tagPointer.RelocateKid(1, newParentOfKids); } }
// it is StructTreeRoot // should never happen as we always should have only one root tag and we don't remove it private void FlushParentIfBelongsToPage(PdfStructElem parent, PdfPage currentPage) { if (parent.IsFlushed() || connectedStructToModel.ContainsKey(parent.GetPdfObject()) || parent.GetPdfObject () == rootTagElement.GetPdfObject()) { return; } IList <IPdfStructElem> kids = parent.GetKids(); bool allKidsBelongToPage = true; foreach (IPdfStructElem kid in kids) { if (kid is PdfMcr) { PdfDictionary kidPage = ((PdfMcr)kid).GetPageObject(); if (!kidPage.IsFlushed() && !kidPage.Equals(currentPage.GetPdfObject())) { allKidsBelongToPage = false; break; } } else { if (kid is PdfStructElem) { // If kid is structElem and was already flushed then in kids list there will be null for it instead of // PdfStructElem. And therefore if we get into this if clause it means that some StructElem wasn't flushed. allKidsBelongToPage = false; break; } } } if (allKidsBelongToPage) { IPdfStructElem parentsParent = parent.GetParent(); parent.Flush(); if (parentsParent is PdfStructElem) { FlushParentIfBelongsToPage((PdfStructElem)parentsParent, currentPage); } } return; }
internal virtual void FlushParentIfBelongsToPage(PdfStructElem parent, PdfPage currentPage) { if (parent.IsFlushed() || waitingTagsManager.GetObjForStructDict(parent.GetPdfObject()) != null || parent. GetParent() is PdfStructTreeRoot) { return; } IList <IStructureNode> kids = parent.GetKids(); bool readyToBeFlushed = true; foreach (IStructureNode kid in kids) { if (kid is PdfMcr) { PdfDictionary kidPage = ((PdfMcr)kid).GetPageObject(); if (!kidPage.IsFlushed() && (currentPage == null || !kidPage.Equals(currentPage.GetPdfObject()))) { readyToBeFlushed = false; break; } } else { if (kid is PdfStructElem) { // If kid is structElem and was already flushed then in kids list there will be null for it instead of // PdfStructElement. And therefore if we get into this if-clause it means that some StructElem wasn't flushed. readyToBeFlushed = false; break; } } } if (readyToBeFlushed) { IStructureNode parentsParent = parent.GetParent(); parent.Flush(); if (parentsParent is PdfStructElem) { FlushParentIfBelongsToPage((PdfStructElem)parentsParent, currentPage); } } }
protected internal virtual void InspectKid(IStructureNode kid) { try { if (kid is PdfStructElem) { PdfStructElem structElemKid = (PdfStructElem)kid; PdfName s = structElemKid.GetRole(); String tagN = s.GetValue(); String tag = FixTagName(tagN); @out.Write("<"); @out.Write(tag); InspectAttributes(structElemKid); @out.Write(">" + Environment.NewLine); PdfString alt = (structElemKid).GetAlt(); if (alt != null) { @out.Write("<alt><![CDATA["); @out.Write(iText.IO.Util.StringUtil.ReplaceAll(alt.GetValue(), "[\\000]*", "")); @out.Write("]]></alt>" + Environment.NewLine); } InspectKids(structElemKid.GetKids()); @out.Write("</"); @out.Write(tag); @out.Write(">" + Environment.NewLine); } else { if (kid is PdfMcr) { ParseTag((PdfMcr)kid); } else { @out.Write(" <flushedKid/> "); } } } catch (System.IO.IOException e) { throw new iText.IO.IOException(iText.IO.IOException.UnknownIOException, e); } }
private void AddStructTreeRootKidsToTheRootTag(IList <IStructureNode> rootKids) { int originalRootKidsIndex = 0; bool isBeforeOriginalRoot = true; foreach (IStructureNode elem in rootKids) { // StructTreeRoot kids are always PdfStructElement, so we are save here to cast it PdfStructElem kid = (PdfStructElem)elem; if (kid.GetPdfObject() == rootTagElement.GetPdfObject()) { isBeforeOriginalRoot = false; continue; } // This boolean is used to "flatten" possible deep "stacking" of the tag structure in case of the multiple pages copying operations. // This could happen due to the wrapping of all the kids in the createNewRootTag or ensureExistingRootTagIsDocument methods. // And therefore, we don't need here to resolve mappings, because we exactly know which role we set. bool kidIsDocument = PdfName.Document.Equals(kid.GetRole()); if (kidIsDocument && kid.GetNamespace() != null && context.TargetTagStructureVersionIs2()) { // we flatten only tags of document role in standard structure namespace String kidNamespaceName = kid.GetNamespace().GetNamespaceName(); kidIsDocument = StandardNamespaces.PDF_1_7.Equals(kidNamespaceName) || StandardNamespaces.PDF_2_0.Equals(kidNamespaceName ); } if (isBeforeOriginalRoot) { rootTagElement.AddKid(originalRootKidsIndex, kid); originalRootKidsIndex += kidIsDocument ? kid.GetKids().Count : 1; } else { rootTagElement.AddKid(kid); } if (kidIsDocument) { RemoveOldRoot(kid); } } }