Пример #1
0
        /// <summary>Transforms root tags in a way that complies with the PDF References.</summary>
        /// <remarks>
        /// Transforms root tags in a way that complies with the PDF References.
        /// <br/><br/>
        /// PDF Reference
        /// 10.7.3 Grouping Elements:
        /// <br/><br/>
        /// For most content extraction formats, the document must be a tree with a single top-level element;
        /// the structure tree root (identified by the StructTreeRoot entry in the document catalog) must have
        /// only one child in its K (kids) array. If the PDF file contains a complete document, the structure
        /// type Document is recommended for this top-level element in the logical structure hierarchy. If the
        /// file contains a well-formed document fragment, one of the structure types Part, Art, Sect, or Div
        /// may be used instead.
        /// </remarks>
        public virtual void NormalizeDocumentRootTag()
        {
            // in this method we could deal with existing document, so we don't won't to throw exceptions here
            bool forbid = forbidUnknownRoles;

            forbidUnknownRoles = false;
            IList <IPdfStructElem> rootKids = document.GetStructTreeRoot().GetKids();

            if (rootKids.Count == 1 && allowedRootTagRoles.Contains(rootKids[0].GetRole()))
            {
                rootTagElement = (PdfStructElem)rootKids[0];
            }
            else
            {
                PdfStructElem prevRootTag = rootTagElement;
                document.GetStructTreeRoot().GetPdfObject().Remove(PdfName.K);
                if (prevRootTag == null)
                {
                    rootTagElement = document.GetStructTreeRoot().AddKid(new PdfStructElem(document, PdfName.Document));
                }
                else
                {
                    document.GetStructTreeRoot().AddKid(rootTagElement);
                    if (!PdfName.Document.Equals(rootTagElement.GetRole()))
                    {
                        WrapAllKidsInTag(rootTagElement, rootTagElement.GetRole());
                        rootTagElement.SetRole(PdfName.Document);
                    }
                }
                int  originalRootKidsIndex = 0;
                bool isBeforeOriginalRoot  = true;
                foreach (IPdfStructElem elem in rootKids)
                {
                    // StructTreeRoot kids are always PdfStructElem, so we are save here to cast it
                    PdfStructElem kid = (PdfStructElem)elem;
                    if (kid.GetPdfObject() == rootTagElement.GetPdfObject())
                    {
                        isBeforeOriginalRoot = false;
                        continue;
                    }
                    bool kidIsDocument = PdfName.Document.Equals(kid.GetRole());
                    if (isBeforeOriginalRoot)
                    {
                        rootTagElement.AddKid(originalRootKidsIndex, kid);
                        originalRootKidsIndex += kidIsDocument ? kid.GetKids().Count : 1;
                    }
                    else
                    {
                        rootTagElement.AddKid(kid);
                    }
                    if (kidIsDocument)
                    {
                        RemoveOldRoot(kid);
                    }
                }
            }
            forbidUnknownRoles = forbid;
        }
 private iText.Kernel.Pdf.Navigation.PdfStructureDestination Add(PdfStructElem elem)
 {
     if (elem.GetPdfObject().GetIndirectReference() == null)
     {
         throw new PdfException(PdfException.StructureElementInStructureDestinationShallBeAnIndirectObject);
     }
     ((PdfArray)GetPdfObject()).Add(elem.GetPdfObject());
     return(this);
 }
Пример #3
0
        private bool EnsureElementPageEqualsKidPage(PdfStructElem elem, PdfDictionary kidPage)
        {
            PdfObject pageObject = elem.GetPdfObject().Get(PdfName.Pg);

            if (pageObject == null)
            {
                pageObject = kidPage;
                elem.GetPdfObject().Put(PdfName.Pg, kidPage);
            }
            return(kidPage.Equals(pageObject));
        }
Пример #4
0
            public override void Draw(DrawContext drawContext)
            {
                LayoutTaggingHelper taggingHelper = GetProperty <LayoutTaggingHelper>(Property.TAGGING_HELPER);

                // We want to reach the actual tag from logical structure tree, in order to set custom properties, for
                // which iText doesn't provide convenient API at the moment. Specifically we are aiming at setting /ID
                // entry in structure element dictionary corresponding to the table header cell. Here we are creating tag
                // for the current element in logical structure tree right at the beginning of #draw method.
                // If this particular instance of header cell is paging artifact it would be marked so by layouting
                // engine and it would not allow to create a tag (return value of the method would be 'false').
                // If this particular instance of header cell is the header which is to be tagged, a tag will be created.
                // It's safe to create a tag at this moment, it will be picked up and placed at correct position in the
                // logical structure tree later by layout engine.

                TagTreePointer p = new TagTreePointer(pdfDocument);

                if (taggingHelper.CreateTag(this, p))
                {
                    // After the tag is created, we can fetch low level entity PdfStructElem
                    // in order to work with it directly. These changes would be directly reflected
                    // in the PDF file inner structure.
                    PdfStructElem structElem     = tagContext.GetPointerStructElem(p);
                    PdfDictionary structElemDict = structElem.GetPdfObject();
                    structElemDict.Put(PdfName.ID, headerId);
                    idTree.AddEntry(headerId.GetValue(), structElemDict);
                }

                base.Draw(drawContext);
            }
Пример #5
0
 private void RemovePageTagFromParent(IPdfStructElem pageTag, IPdfStructElem parent)
 {
     if (parent is PdfStructElem)
     {
         PdfStructElem structParent = (PdfStructElem)parent;
         if (!structParent.IsFlushed())
         {
             structParent.RemoveKid(pageTag);
             PdfDictionary parentObject = structParent.GetPdfObject();
             if (!connectedStructToModel.ContainsKey(parentObject) && parent.GetKids().Count == 0 && parentObject != rootTagElement
                 .GetPdfObject())
             {
                 RemovePageTagFromParent(structParent, parent.GetParent());
                 parentObject.GetIndirectReference().SetFree();
             }
         }
         else
         {
             if (pageTag is PdfMcr)
             {
                 throw new PdfException(PdfException.CannotRemoveTagBecauseItsParentIsFlushed);
             }
         }
     }
 }
Пример #6
0
 private void RemovePageTagFromParent(IStructureNode pageTag, IStructureNode parent)
 {
     if (parent is PdfStructElem)
     {
         PdfStructElem structParent = (PdfStructElem)parent;
         if (!structParent.IsFlushed())
         {
             structParent.RemoveKid(pageTag);
             PdfDictionary parentStructDict = structParent.GetPdfObject();
             if (waitingTagsManager.GetObjForStructDict(parentStructDict) == null && parent.GetKids().Count == 0 && !(structParent
                                                                                                                      .GetParent() is PdfStructTreeRoot))
             {
                 RemovePageTagFromParent(structParent, parent.GetParent());
                 PdfIndirectReference indRef = parentStructDict.GetIndirectReference();
                 if (indRef != null)
                 {
                     // TODO how about possible references to structure element from refs or structure destination for instance?
                     indRef.SetFree();
                 }
             }
         }
         else
         {
             if (pageTag is PdfMcr)
             {
                 throw new PdfException(PdfException.CannotRemoveTagBecauseItsParentIsFlushed);
             }
         }
     }
 }
Пример #7
0
        /// <summary>Removes the current tag.</summary>
        /// <remarks>
        /// Removes the current tag. If it has kids, they will become kids of the current tag parent.
        /// This method call moves this
        /// <c>TagTreePointer</c>
        /// to the current tag parent.
        /// <br /><br />
        /// You cannot remove root tag, and also you cannot remove the tag if it's parent is already flushed;
        /// in this two cases an exception will be thrown.
        /// </remarks>
        /// <returns>
        /// this
        /// <see cref="TagStructureContext"/>
        /// instance.
        /// </returns>
        public virtual iText.Kernel.Pdf.Tagutils.TagTreePointer RemoveTag()
        {
            PdfStructElem  currentStructElem = GetCurrentStructElem();
            IStructureNode parentElem        = currentStructElem.GetParent();

            if (parentElem is PdfStructTreeRoot)
            {
                throw new PdfException(PdfException.CannotRemoveDocumentRootTag);
            }
            IList <IStructureNode> kids   = currentStructElem.GetKids();
            PdfStructElem          parent = (PdfStructElem)parentElem;

            if (parent.IsFlushed())
            {
                throw new PdfException(PdfException.CannotRemoveTagBecauseItsParentIsFlushed);
            }
            // remove waiting tag state if tag is removed
            Object objForStructDict = tagStructureContext.GetWaitingTagsManager().GetObjForStructDict(currentStructElem
                                                                                                      .GetPdfObject());

            tagStructureContext.GetWaitingTagsManager().RemoveWaitingState(objForStructDict);
            int removedKidIndex         = parent.RemoveKid(currentStructElem);
            PdfIndirectReference indRef = currentStructElem.GetPdfObject().GetIndirectReference();

            if (indRef != null)
            {
                // TODO how about possible references to structure element from refs or structure destination for instance?
                indRef.SetFree();
            }
            foreach (IStructureNode kid in kids)
            {
                if (kid is PdfStructElem)
                {
                    parent.AddKid(removedKidIndex++, (PdfStructElem)kid);
                }
                else
                {
                    PdfMcr mcr = PrepareMcrForMovingToNewParent((PdfMcr)kid, parent);
                    parent.AddKid(removedKidIndex++, mcr);
                }
            }
            currentStructElem.GetPdfObject().Clear();
            SetCurrentStructElem(parent);
            return(this);
        }
        public override AccessibilityProperties AddAttributes(PdfDictionary attributes)
        {
            PdfObject attributesObject   = backingElem.GetAttributes(false);
            PdfObject combinedAttributes = CombineAttributesList(attributesObject, JavaCollectionsUtil.SingletonList(attributes
                                                                                                                     ), backingElem.GetPdfObject().GetAsNumber(PdfName.R));

            backingElem.SetAttributes(combinedAttributes);
            return(this);
        }
Пример #9
0
        private PdfStructElem GetCurrentElemEnsureIndirect()
        {
            PdfStructElem currentStructElem = GetCurrentStructElem();

            if (currentStructElem.GetPdfObject().GetIndirectReference() == null)
            {
                currentStructElem.MakeIndirect(GetDocument());
            }
            return(currentStructElem);
        }
Пример #10
0
        private bool EnsureElementPageEqualsKidPage(PdfStructElem elem, PdfDictionary kidPage)
        {
            PdfObject pageObject = elem.GetPdfObject().Get(PdfName.Pg);

            if (pageObject == null)
            {
                pageObject = kidPage;
                // Explicitly using object indirect reference here in order to correctly process released objects.
                elem.Put(PdfName.Pg, kidPage.GetIndirectReference());
            }
            return(kidPage.Equals(pageObject));
        }
Пример #11
0
 private void RemoveWaitingStateAndFlushIfParentFlushed(PdfStructElem structElem)
 {
     if (structElem != null)
     {
         waitingTagToAssociatedObj.JRemove(structElem.GetPdfObject());
         IStructureNode parent = structElem.GetParent();
         if (parent is PdfStructElem && ((PdfStructElem)parent).IsFlushed())
         {
             FlushStructElementAndItKids(structElem);
         }
     }
 }
Пример #12
0
        /// <returns>parent of the flushed tag</returns>
        internal virtual IStructureNode FlushTag(PdfStructElem tagStruct)
        {
            Object associatedObj = waitingTagToAssociatedObj.JRemove(tagStruct.GetPdfObject());

            if (associatedObj != null)
            {
                associatedObjToWaitingTag.JRemove(associatedObj);
            }
            IStructureNode parent = tagStruct.GetParent();

            FlushStructElementAndItKids(tagStruct);
            return(parent);
        }
Пример #13
0
        /// <returns>parent of the flushed tag</returns>
        internal virtual IPdfStructElem FlushTag(PdfStructElem tagStruct)
        {
            IAccessibleElement modelElement = connectedStructToModel.JRemove(tagStruct.GetPdfObject());

            if (modelElement != null)
            {
                connectedModelToStruct.JRemove(modelElement);
            }
            IPdfStructElem parent = tagStruct.GetParent();

            FlushStructElementAndItKids(tagStruct);
            return(parent);
        }
Пример #14
0
 internal virtual PdfStructElem GetCurrentStructElem()
 {
     if (currentStructElem.IsFlushed())
     {
         throw new PdfException(PdfException.TagTreePointerIsInInvalidStateItPointsAtFlushedElementUseMoveToRoot);
     }
     if (currentStructElem.GetPdfObject().GetIndirectReference().IsFree())
     {
         // is removed
         throw new PdfException(PdfException.TagTreePointerIsInInvalidStateItPointsAtRemovedElementUseMoveToRoot);
     }
     return(currentStructElem);
 }
Пример #15
0
 private void FlushStructElementAndItKids(PdfStructElem elem)
 {
     if (waitingTagToAssociatedObj.ContainsKey(elem.GetPdfObject()))
     {
         return;
     }
     foreach (IStructureNode kid in elem.GetKids())
     {
         if (kid is PdfStructElem)
         {
             FlushStructElementAndItKids((PdfStructElem)kid);
         }
     }
     elem.Flush();
 }
Пример #16
0
 private void FlushStructElementAndItKids(PdfStructElem elem)
 {
     if (connectedStructToModel.ContainsKey(elem.GetPdfObject()))
     {
         return;
     }
     foreach (IPdfStructElem kid in elem.GetKids())
     {
         if (kid is PdfStructElem)
         {
             FlushStructElementAndItKids((PdfStructElem)kid);
         }
     }
     elem.Flush();
 }
Пример #17
0
 private void RemoveStructToModelConnection(PdfStructElem structElem)
 {
     if (structElem != null)
     {
         IAccessibleElement element = connectedStructToModel.JRemove(structElem.GetPdfObject());
         structElem.SetRole(element.GetRole());
         if (element.GetAccessibilityProperties() != null)
         {
             element.GetAccessibilityProperties().SetToStructElem(structElem);
         }
         if (structElem.GetParent() == null)
         {
             // is flushed
             FlushStructElementAndItKids(structElem);
         }
     }
 }
Пример #18
0
        // it is StructTreeRoot
        // should never happen as we always should have only one root tag and we don't remove it
        private void FlushParentIfBelongsToPage(PdfStructElem parent, PdfPage currentPage)
        {
            if (parent.IsFlushed() || connectedStructToModel.ContainsKey(parent.GetPdfObject()) || parent.GetPdfObject
                    () == rootTagElement.GetPdfObject())
            {
                return;
            }
            IList <IPdfStructElem> kids = parent.GetKids();
            bool allKidsBelongToPage    = true;

            foreach (IPdfStructElem kid in kids)
            {
                if (kid is PdfMcr)
                {
                    PdfDictionary kidPage = ((PdfMcr)kid).GetPageObject();
                    if (!kidPage.IsFlushed() && !kidPage.Equals(currentPage.GetPdfObject()))
                    {
                        allKidsBelongToPage = false;
                        break;
                    }
                }
                else
                {
                    if (kid is PdfStructElem)
                    {
                        // If kid is structElem and was already flushed then in kids list there will be null for it instead of
                        // PdfStructElem. And therefore if we get into this if clause it means that some StructElem wasn't flushed.
                        allKidsBelongToPage = false;
                        break;
                    }
                }
            }
            if (allKidsBelongToPage)
            {
                IPdfStructElem parentsParent = parent.GetParent();
                parent.Flush();
                if (parentsParent is PdfStructElem)
                {
                    FlushParentIfBelongsToPage((PdfStructElem)parentsParent, currentPage);
                }
            }
            return;
        }
Пример #19
0
        internal virtual void FlushParentIfBelongsToPage(PdfStructElem parent, PdfPage currentPage)
        {
            if (parent.IsFlushed() || waitingTagsManager.GetObjForStructDict(parent.GetPdfObject()) != null || parent.
                GetParent() is PdfStructTreeRoot)
            {
                return;
            }
            IList <IStructureNode> kids = parent.GetKids();
            bool readyToBeFlushed       = true;

            foreach (IStructureNode kid in kids)
            {
                if (kid is PdfMcr)
                {
                    PdfDictionary kidPage = ((PdfMcr)kid).GetPageObject();
                    if (!kidPage.IsFlushed() && (currentPage == null || !kidPage.Equals(currentPage.GetPdfObject())))
                    {
                        readyToBeFlushed = false;
                        break;
                    }
                }
                else
                {
                    if (kid is PdfStructElem)
                    {
                        // If kid is structElem and was already flushed then in kids list there will be null for it instead of
                        // PdfStructElement. And therefore if we get into this if-clause it means that some StructElem wasn't flushed.
                        readyToBeFlushed = false;
                        break;
                    }
                }
            }
            if (readyToBeFlushed)
            {
                IStructureNode parentsParent = parent.GetParent();
                parent.Flush();
                if (parentsParent is PdfStructElem)
                {
                    FlushParentIfBelongsToPage((PdfStructElem)parentsParent, currentPage);
                }
            }
        }
Пример #20
0
        private static void ProcessStructElem(PdfStructElem elem, StringBuilder builder)
        {
            PdfDictionary page = elem.GetPdfObject().GetAsDictionary(PdfName.Pg);

            if (page == null)
            {
                return;
            }

            PdfStream contents = page.GetAsStream(PdfName.Contents);

            if (contents != null)
            {
                builder.Append("Content: \n" + Encoding.UTF8.GetString(contents.GetBytes()) + "\n");
            }
            else
            {
                PdfArray array = page.GetAsArray(PdfName.Contents);
                builder.Append("Contents array: " + array + "\n");
            }
        }
Пример #21
0
        private void AddStructTreeRootKidsToTheRootTag(IList <IStructureNode> rootKids)
        {
            int  originalRootKidsIndex = 0;
            bool isBeforeOriginalRoot  = true;

            foreach (IStructureNode elem in rootKids)
            {
                // StructTreeRoot kids are always PdfStructElement, so we are save here to cast it
                PdfStructElem kid = (PdfStructElem)elem;
                if (kid.GetPdfObject() == rootTagElement.GetPdfObject())
                {
                    isBeforeOriginalRoot = false;
                    continue;
                }
                // This boolean is used to "flatten" possible deep "stacking" of the tag structure in case of the multiple pages copying operations.
                // This could happen due to the wrapping of all the kids in the createNewRootTag or ensureExistingRootTagIsDocument methods.
                // And therefore, we don't need here to resolve mappings, because we exactly know which role we set.
                bool kidIsDocument = PdfName.Document.Equals(kid.GetRole());
                if (kidIsDocument && kid.GetNamespace() != null && context.TargetTagStructureVersionIs2())
                {
                    // we flatten only tags of document role in standard structure namespace
                    String kidNamespaceName = kid.GetNamespace().GetNamespaceName();
                    kidIsDocument = StandardNamespaces.PDF_1_7.Equals(kidNamespaceName) || StandardNamespaces.PDF_2_0.Equals(kidNamespaceName
                                                                                                                             );
                }
                if (isBeforeOriginalRoot)
                {
                    rootTagElement.AddKid(originalRootKidsIndex, kid);
                    originalRootKidsIndex += kidIsDocument ? kid.GetKids().Count : 1;
                }
                else
                {
                    rootTagElement.AddKid(kid);
                }
                if (kidIsDocument)
                {
                    RemoveOldRoot(kid);
                }
            }
        }
Пример #22
0
 internal virtual Object SaveAssociatedObjectForWaitingTag(Object associatedObj, PdfStructElem structElem)
 {
     associatedObjToWaitingTag.Put(associatedObj, structElem);
     return(waitingTagToAssociatedObj.Put(structElem.GetPdfObject(), associatedObj));
 }
Пример #23
0
 internal virtual IAccessibleElement GetModelConnectedToStruct(PdfStructElem @struct)
 {
     return(connectedStructToModel.Get(@struct.GetPdfObject()));
 }
 internal virtual void SetToStructElem(PdfStructElem elem) {
     if (GetActualText() != null) {
         elem.SetActualText(new PdfString(GetActualText()));
     }
     if (GetAlternateDescription() != null) {
         elem.SetAlt(new PdfString(GetAlternateDescription()));
     }
     if (GetExpansion() != null) {
         elem.SetE(new PdfString(GetExpansion()));
     }
     if (GetLanguage() != null) {
         elem.SetLang(new PdfString(GetLanguage()));
     }
     IList<PdfDictionary> newAttributesList = GetAttributesList();
     if (newAttributesList.Count > 0) {
         PdfObject attributesObject = elem.GetAttributes(false);
         PdfObject combinedAttributes = CombineAttributesList(attributesObject, newAttributesList, elem.GetPdfObject
             ().GetAsNumber(PdfName.R));
         elem.SetAttributes(combinedAttributes);
     }
 }
Пример #25
0
        internal static void Apply(AccessibilityProperties properties, PdfStructElem elem)
        {
            if (properties.GetActualText() != null)
            {
                elem.SetActualText(new PdfString(properties.GetActualText(), PdfEncodings.UNICODE_BIG));
            }
            if (properties.GetAlternateDescription() != null)
            {
                elem.SetAlt(new PdfString(properties.GetAlternateDescription(), PdfEncodings.UNICODE_BIG));
            }
            if (properties.GetExpansion() != null)
            {
                elem.SetE(new PdfString(properties.GetExpansion(), PdfEncodings.UNICODE_BIG));
            }
            if (properties.GetLanguage() != null)
            {
                elem.SetLang(new PdfString(properties.GetLanguage(), PdfEncodings.UNICODE_BIG));
            }
            IList <PdfStructureAttributes> newAttributesList = properties.GetAttributesList();

            if (newAttributesList.Count > 0)
            {
                PdfObject attributesObject   = elem.GetAttributes(false);
                PdfObject combinedAttributes = CombineAttributesList(attributesObject, -1, newAttributesList, elem.GetPdfObject
                                                                         ().GetAsNumber(PdfName.R));
                elem.SetAttributes(combinedAttributes);
            }
            if (properties.GetPhoneme() != null)
            {
                elem.SetPhoneme(new PdfString(properties.GetPhoneme(), PdfEncodings.UNICODE_BIG));
            }
            if (properties.GetPhoneticAlphabet() != null)
            {
                elem.SetPhoneticAlphabet(new PdfName(properties.GetPhoneticAlphabet()));
            }
            if (properties.GetNamespace() != null)
            {
                elem.SetNamespace(properties.GetNamespace());
            }
            foreach (TagTreePointer @ref in properties.GetRefsList())
            {
                elem.AddRef(@ref.GetCurrentStructElem());
            }
        }
Пример #26
0
 internal virtual void SaveConnectionBetweenStructAndModel(IAccessibleElement element, PdfStructElem structElem
                                                           )
 {
     connectedModelToStruct[element] = structElem;
     connectedStructToModel[structElem.GetPdfObject()] = element;
 }