Пример #1
0
        private void EnsureExistingRootTagIsDocument()
        {
            IRoleMappingResolver mapping;

            mapping = context.GetRoleMappingResolver(rootTagElement.GetRole().GetValue(), rootTagElement.GetNamespace(
                                                         ));
            bool isDocBeforeResolving = mapping.CurrentRoleIsStandard() && StandardRoles.DOCUMENT.Equals(mapping.GetRole
                                                                                                             ());

            mapping = context.ResolveMappingToStandardOrDomainSpecificRole(rootTagElement.GetRole().GetValue(), rootTagElement
                                                                           .GetNamespace());
            bool isDocAfterResolving = mapping != null && mapping.CurrentRoleIsStandard() && StandardRoles.DOCUMENT.Equals
                                           (mapping.GetRole());

            if (isDocBeforeResolving && !isDocAfterResolving)
            {
                LogCreatedRootTagHasMappingIssue(rootTagElement.GetNamespace(), mapping);
            }
            else
            {
                if (!isDocAfterResolving)
                {
                    WrapAllKidsInTag(rootTagElement, rootTagElement.GetRole(), rootTagElement.GetNamespace());
                    rootTagElement.SetRole(PdfName.Document);
                    if (context.TargetTagStructureVersionIs2())
                    {
                        rootTagElement.SetNamespace(context.GetDocumentDefaultNamespace());
                        context.EnsureNamespaceRegistered(context.GetDocumentDefaultNamespace());
                    }
                }
            }
        }
Пример #2
0
        /// <summary>Transforms root tags in a way that complies with the PDF References.</summary>
        /// <remarks>
        /// Transforms root tags in a way that complies with the PDF References.
        /// <br/><br/>
        /// PDF Reference
        /// 10.7.3 Grouping Elements:
        /// <br/><br/>
        /// For most content extraction formats, the document must be a tree with a single top-level element;
        /// the structure tree root (identified by the StructTreeRoot entry in the document catalog) must have
        /// only one child in its K (kids) array. If the PDF file contains a complete document, the structure
        /// type Document is recommended for this top-level element in the logical structure hierarchy. If the
        /// file contains a well-formed document fragment, one of the structure types Part, Art, Sect, or Div
        /// may be used instead.
        /// </remarks>
        public virtual void NormalizeDocumentRootTag()
        {
            // in this method we could deal with existing document, so we don't won't to throw exceptions here
            bool forbid = forbidUnknownRoles;

            forbidUnknownRoles = false;
            IList <IPdfStructElem> rootKids = document.GetStructTreeRoot().GetKids();

            if (rootKids.Count == 1 && allowedRootTagRoles.Contains(rootKids[0].GetRole()))
            {
                rootTagElement = (PdfStructElem)rootKids[0];
            }
            else
            {
                PdfStructElem prevRootTag = rootTagElement;
                document.GetStructTreeRoot().GetPdfObject().Remove(PdfName.K);
                if (prevRootTag == null)
                {
                    rootTagElement = document.GetStructTreeRoot().AddKid(new PdfStructElem(document, PdfName.Document));
                }
                else
                {
                    document.GetStructTreeRoot().AddKid(rootTagElement);
                    if (!PdfName.Document.Equals(rootTagElement.GetRole()))
                    {
                        WrapAllKidsInTag(rootTagElement, rootTagElement.GetRole());
                        rootTagElement.SetRole(PdfName.Document);
                    }
                }
                int  originalRootKidsIndex = 0;
                bool isBeforeOriginalRoot  = true;
                foreach (IPdfStructElem elem in rootKids)
                {
                    // StructTreeRoot kids are always PdfStructElem, so we are save here to cast it
                    PdfStructElem kid = (PdfStructElem)elem;
                    if (kid.GetPdfObject() == rootTagElement.GetPdfObject())
                    {
                        isBeforeOriginalRoot = false;
                        continue;
                    }
                    bool kidIsDocument = PdfName.Document.Equals(kid.GetRole());
                    if (isBeforeOriginalRoot)
                    {
                        rootTagElement.AddKid(originalRootKidsIndex, kid);
                        originalRootKidsIndex += kidIsDocument ? kid.GetKids().Count : 1;
                    }
                    else
                    {
                        rootTagElement.AddKid(kid);
                    }
                    if (kidIsDocument)
                    {
                        RemoveOldRoot(kid);
                    }
                }
            }
            forbidUnknownRoles = forbid;
        }
Пример #3
0
 /// <summary>Method for internal usages.</summary>
 /// <remarks>
 /// Method for internal usages.
 /// Essentially, all it does is just making sure that for connected tags properties are
 /// up to date with connected accessible elements properties.
 /// </remarks>
 public virtual void ActualizeTagsProperties()
 {
     foreach (KeyValuePair <IAccessibleElement, PdfStructElem> structToModel in connectedModelToStruct)
     {
         IAccessibleElement element    = structToModel.Key;
         PdfStructElem      structElem = structToModel.Value;
         structElem.SetRole(element.GetRole());
         if (element.GetAccessibilityProperties() != null)
         {
             element.GetAccessibilityProperties().SetToStructElem(structElem);
         }
     }
 }
Пример #4
0
 private void RemoveStructToModelConnection(PdfStructElem structElem)
 {
     if (structElem != null)
     {
         IAccessibleElement element = connectedStructToModel.JRemove(structElem.GetPdfObject());
         structElem.SetRole(element.GetRole());
         if (element.GetAccessibilityProperties() != null)
         {
             element.GetAccessibilityProperties().SetToStructElem(structElem);
         }
         if (structElem.GetParent() == null)
         {
             // is flushed
             FlushStructElementAndItKids(structElem);
         }
     }
 }