public override PdfObject GetDestinationPage(IDictionary <String, PdfObject> names)
        {
            PdfObject firstObj = ((PdfArray)GetPdfObject()).Get(0);

            if (firstObj.IsDictionary())
            {
                PdfStructElem structElem = new PdfStructElem((PdfDictionary)firstObj);
                while (true)
                {
                    IList <IStructureNode> kids     = structElem.GetKids();
                    IStructureNode         firstKid = kids.Count > 0 ? kids[0] : null;
                    if (firstKid is PdfMcr)
                    {
                        return(((PdfMcr)firstKid).GetPageObject());
                    }
                    else
                    {
                        if (firstKid is PdfStructElem)
                        {
                            structElem = (PdfStructElem)firstKid;
                        }
                        else
                        {
                            break;
                        }
                    }
                }
            }
            return(null);
        }
Пример #2
0
        /// <summary>Transforms root tags in a way that complies with the PDF References.</summary>
        /// <remarks>
        /// Transforms root tags in a way that complies with the PDF References.
        /// <br/><br/>
        /// PDF Reference
        /// 10.7.3 Grouping Elements:
        /// <br/><br/>
        /// For most content extraction formats, the document must be a tree with a single top-level element;
        /// the structure tree root (identified by the StructTreeRoot entry in the document catalog) must have
        /// only one child in its K (kids) array. If the PDF file contains a complete document, the structure
        /// type Document is recommended for this top-level element in the logical structure hierarchy. If the
        /// file contains a well-formed document fragment, one of the structure types Part, Art, Sect, or Div
        /// may be used instead.
        /// </remarks>
        public virtual void NormalizeDocumentRootTag()
        {
            // in this method we could deal with existing document, so we don't won't to throw exceptions here
            bool forbid = forbidUnknownRoles;

            forbidUnknownRoles = false;
            IList <IPdfStructElem> rootKids = document.GetStructTreeRoot().GetKids();

            if (rootKids.Count == 1 && allowedRootTagRoles.Contains(rootKids[0].GetRole()))
            {
                rootTagElement = (PdfStructElem)rootKids[0];
            }
            else
            {
                PdfStructElem prevRootTag = rootTagElement;
                document.GetStructTreeRoot().GetPdfObject().Remove(PdfName.K);
                if (prevRootTag == null)
                {
                    rootTagElement = document.GetStructTreeRoot().AddKid(new PdfStructElem(document, PdfName.Document));
                }
                else
                {
                    document.GetStructTreeRoot().AddKid(rootTagElement);
                    if (!PdfName.Document.Equals(rootTagElement.GetRole()))
                    {
                        WrapAllKidsInTag(rootTagElement, rootTagElement.GetRole());
                        rootTagElement.SetRole(PdfName.Document);
                    }
                }
                int  originalRootKidsIndex = 0;
                bool isBeforeOriginalRoot  = true;
                foreach (IPdfStructElem elem in rootKids)
                {
                    // StructTreeRoot kids are always PdfStructElem, so we are save here to cast it
                    PdfStructElem kid = (PdfStructElem)elem;
                    if (kid.GetPdfObject() == rootTagElement.GetPdfObject())
                    {
                        isBeforeOriginalRoot = false;
                        continue;
                    }
                    bool kidIsDocument = PdfName.Document.Equals(kid.GetRole());
                    if (isBeforeOriginalRoot)
                    {
                        rootTagElement.AddKid(originalRootKidsIndex, kid);
                        originalRootKidsIndex += kidIsDocument ? kid.GetKids().Count : 1;
                    }
                    else
                    {
                        rootTagElement.AddKid(kid);
                    }
                    if (kidIsDocument)
                    {
                        RemoveOldRoot(kid);
                    }
                }
            }
            forbidUnknownRoles = forbid;
        }
Пример #3
0
        private void WrapAllKidsInTag(PdfStructElem parent, PdfName wrapTagRole)
        {
            int            kidsNum    = parent.GetKids().Count;
            TagTreePointer tagPointer = new TagTreePointer(document);

            tagPointer.SetCurrentStructElem(parent).AddTag(0, wrapTagRole);
            TagTreePointer newParentOfKids = new TagTreePointer(tagPointer);

            tagPointer.MoveToParent();
            for (int i = 0; i < kidsNum; ++i)
            {
                tagPointer.RelocateKid(1, newParentOfKids);
            }
        }
Пример #4
0
 private void FlushStructElementAndItKids(PdfStructElem elem)
 {
     if (waitingTagToAssociatedObj.ContainsKey(elem.GetPdfObject()))
     {
         return;
     }
     foreach (IStructureNode kid in elem.GetKids())
     {
         if (kid is PdfStructElem)
         {
             FlushStructElementAndItKids((PdfStructElem)kid);
         }
     }
     elem.Flush();
 }
Пример #5
0
 private void FlushStructElementAndItKids(PdfStructElem elem)
 {
     if (connectedStructToModel.ContainsKey(elem.GetPdfObject()))
     {
         return;
     }
     foreach (IPdfStructElem kid in elem.GetKids())
     {
         if (kid is PdfStructElem)
         {
             FlushStructElementAndItKids((PdfStructElem)kid);
         }
     }
     elem.Flush();
 }
Пример #6
0
        /// <summary>Removes the current tag.</summary>
        /// <remarks>
        /// Removes the current tag. If it has kids, they will become kids of the current tag parent.
        /// This method call moves this
        /// <c>TagTreePointer</c>
        /// to the current tag parent.
        /// <br /><br />
        /// You cannot remove root tag, and also you cannot remove the tag if it's parent is already flushed;
        /// in this two cases an exception will be thrown.
        /// </remarks>
        /// <returns>
        /// this
        /// <see cref="TagStructureContext"/>
        /// instance.
        /// </returns>
        public virtual iText.Kernel.Pdf.Tagutils.TagTreePointer RemoveTag()
        {
            PdfStructElem  currentStructElem = GetCurrentStructElem();
            IStructureNode parentElem        = currentStructElem.GetParent();

            if (parentElem is PdfStructTreeRoot)
            {
                throw new PdfException(PdfException.CannotRemoveDocumentRootTag);
            }
            IList <IStructureNode> kids   = currentStructElem.GetKids();
            PdfStructElem          parent = (PdfStructElem)parentElem;

            if (parent.IsFlushed())
            {
                throw new PdfException(PdfException.CannotRemoveTagBecauseItsParentIsFlushed);
            }
            // remove waiting tag state if tag is removed
            Object objForStructDict = tagStructureContext.GetWaitingTagsManager().GetObjForStructDict(currentStructElem
                                                                                                      .GetPdfObject());

            tagStructureContext.GetWaitingTagsManager().RemoveWaitingState(objForStructDict);
            int removedKidIndex         = parent.RemoveKid(currentStructElem);
            PdfIndirectReference indRef = currentStructElem.GetPdfObject().GetIndirectReference();

            if (indRef != null)
            {
                // TODO how about possible references to structure element from refs or structure destination for instance?
                indRef.SetFree();
            }
            foreach (IStructureNode kid in kids)
            {
                if (kid is PdfStructElem)
                {
                    parent.AddKid(removedKidIndex++, (PdfStructElem)kid);
                }
                else
                {
                    PdfMcr mcr = PrepareMcrForMovingToNewParent((PdfMcr)kid, parent);
                    parent.AddKid(removedKidIndex++, mcr);
                }
            }
            currentStructElem.GetPdfObject().Clear();
            SetCurrentStructElem(parent);
            return(this);
        }
Пример #7
0
        private void WrapAllKidsInTag(PdfStructElem parent, PdfName wrapTagRole, PdfNamespace wrapTagNs)
        {
            int            kidsNum    = parent.GetKids().Count;
            TagTreePointer tagPointer = new TagTreePointer(parent, document);

            tagPointer.AddTag(0, wrapTagRole.GetValue());
            if (context.TargetTagStructureVersionIs2())
            {
                tagPointer.GetProperties().SetNamespace(wrapTagNs);
            }
            TagTreePointer newParentOfKids = new TagTreePointer(tagPointer);

            tagPointer.MoveToParent();
            for (int i = 0; i < kidsNum; ++i)
            {
                tagPointer.RelocateKid(1, newParentOfKids);
            }
        }
Пример #8
0
        // it is StructTreeRoot
        // should never happen as we always should have only one root tag and we don't remove it
        private void FlushParentIfBelongsToPage(PdfStructElem parent, PdfPage currentPage)
        {
            if (parent.IsFlushed() || connectedStructToModel.ContainsKey(parent.GetPdfObject()) || parent.GetPdfObject
                    () == rootTagElement.GetPdfObject())
            {
                return;
            }
            IList <IPdfStructElem> kids = parent.GetKids();
            bool allKidsBelongToPage    = true;

            foreach (IPdfStructElem kid in kids)
            {
                if (kid is PdfMcr)
                {
                    PdfDictionary kidPage = ((PdfMcr)kid).GetPageObject();
                    if (!kidPage.IsFlushed() && !kidPage.Equals(currentPage.GetPdfObject()))
                    {
                        allKidsBelongToPage = false;
                        break;
                    }
                }
                else
                {
                    if (kid is PdfStructElem)
                    {
                        // If kid is structElem and was already flushed then in kids list there will be null for it instead of
                        // PdfStructElem. And therefore if we get into this if clause it means that some StructElem wasn't flushed.
                        allKidsBelongToPage = false;
                        break;
                    }
                }
            }
            if (allKidsBelongToPage)
            {
                IPdfStructElem parentsParent = parent.GetParent();
                parent.Flush();
                if (parentsParent is PdfStructElem)
                {
                    FlushParentIfBelongsToPage((PdfStructElem)parentsParent, currentPage);
                }
            }
            return;
        }
Пример #9
0
        internal virtual void FlushParentIfBelongsToPage(PdfStructElem parent, PdfPage currentPage)
        {
            if (parent.IsFlushed() || waitingTagsManager.GetObjForStructDict(parent.GetPdfObject()) != null || parent.
                GetParent() is PdfStructTreeRoot)
            {
                return;
            }
            IList <IStructureNode> kids = parent.GetKids();
            bool readyToBeFlushed       = true;

            foreach (IStructureNode kid in kids)
            {
                if (kid is PdfMcr)
                {
                    PdfDictionary kidPage = ((PdfMcr)kid).GetPageObject();
                    if (!kidPage.IsFlushed() && (currentPage == null || !kidPage.Equals(currentPage.GetPdfObject())))
                    {
                        readyToBeFlushed = false;
                        break;
                    }
                }
                else
                {
                    if (kid is PdfStructElem)
                    {
                        // If kid is structElem and was already flushed then in kids list there will be null for it instead of
                        // PdfStructElement. And therefore if we get into this if-clause it means that some StructElem wasn't flushed.
                        readyToBeFlushed = false;
                        break;
                    }
                }
            }
            if (readyToBeFlushed)
            {
                IStructureNode parentsParent = parent.GetParent();
                parent.Flush();
                if (parentsParent is PdfStructElem)
                {
                    FlushParentIfBelongsToPage((PdfStructElem)parentsParent, currentPage);
                }
            }
        }
Пример #10
0
 protected internal virtual void InspectKid(IStructureNode kid)
 {
     try {
         if (kid is PdfStructElem)
         {
             PdfStructElem structElemKid = (PdfStructElem)kid;
             PdfName       s             = structElemKid.GetRole();
             String        tagN          = s.GetValue();
             String        tag           = FixTagName(tagN);
             @out.Write("<");
             @out.Write(tag);
             InspectAttributes(structElemKid);
             @out.Write(">" + Environment.NewLine);
             PdfString alt = (structElemKid).GetAlt();
             if (alt != null)
             {
                 @out.Write("<alt><![CDATA[");
                 @out.Write(iText.IO.Util.StringUtil.ReplaceAll(alt.GetValue(), "[\\000]*", ""));
                 @out.Write("]]></alt>" + Environment.NewLine);
             }
             InspectKids(structElemKid.GetKids());
             @out.Write("</");
             @out.Write(tag);
             @out.Write(">" + Environment.NewLine);
         }
         else
         {
             if (kid is PdfMcr)
             {
                 ParseTag((PdfMcr)kid);
             }
             else
             {
                 @out.Write(" <flushedKid/> ");
             }
         }
     }
     catch (System.IO.IOException e) {
         throw new iText.IO.IOException(iText.IO.IOException.UnknownIOException, e);
     }
 }
Пример #11
0
        private void AddStructTreeRootKidsToTheRootTag(IList <IStructureNode> rootKids)
        {
            int  originalRootKidsIndex = 0;
            bool isBeforeOriginalRoot  = true;

            foreach (IStructureNode elem in rootKids)
            {
                // StructTreeRoot kids are always PdfStructElement, so we are save here to cast it
                PdfStructElem kid = (PdfStructElem)elem;
                if (kid.GetPdfObject() == rootTagElement.GetPdfObject())
                {
                    isBeforeOriginalRoot = false;
                    continue;
                }
                // This boolean is used to "flatten" possible deep "stacking" of the tag structure in case of the multiple pages copying operations.
                // This could happen due to the wrapping of all the kids in the createNewRootTag or ensureExistingRootTagIsDocument methods.
                // And therefore, we don't need here to resolve mappings, because we exactly know which role we set.
                bool kidIsDocument = PdfName.Document.Equals(kid.GetRole());
                if (kidIsDocument && kid.GetNamespace() != null && context.TargetTagStructureVersionIs2())
                {
                    // we flatten only tags of document role in standard structure namespace
                    String kidNamespaceName = kid.GetNamespace().GetNamespaceName();
                    kidIsDocument = StandardNamespaces.PDF_1_7.Equals(kidNamespaceName) || StandardNamespaces.PDF_2_0.Equals(kidNamespaceName
                                                                                                                             );
                }
                if (isBeforeOriginalRoot)
                {
                    rootTagElement.AddKid(originalRootKidsIndex, kid);
                    originalRootKidsIndex += kidIsDocument ? kid.GetKids().Count : 1;
                }
                else
                {
                    rootTagElement.AddKid(kid);
                }
                if (kidIsDocument)
                {
                    RemoveOldRoot(kid);
                }
            }
        }