Exemplo n.º 1
0
        /// <summary>Parses element bbox.</summary>
        /// <param name="node">element containing bbox</param>
        /// <param name="unparsedBBoxes">list of element ids with bboxes which could not be parsed</param>
        /// <returns>parsed bbox</returns>
        internal static IList <float> ParseBBox(iText.StyledXmlParser.Jsoup.Nodes.Node node, IDictionary <String, iText.StyledXmlParser.Jsoup.Nodes.Node
                                                                                                          > unparsedBBoxes)
        {
            IList <float> bbox        = new List <float>();
            Match         bboxMatcher = iText.IO.Util.StringUtil.Match(BBOX_PATTERN, node.Attr("title"));

            if (bboxMatcher.Success)
            {
                Match bboxCoordinateMatcher = iText.IO.Util.StringUtil.Match(BBOX_COORDINATE_PATTERN, iText.IO.Util.StringUtil.Group
                                                                                 (bboxMatcher));
                if (bboxCoordinateMatcher.Success)
                {
                    for (int i = 0; i < BBOX_ARRAY_SIZE; i++)
                    {
                        String coord = iText.IO.Util.StringUtil.Group(bboxCoordinateMatcher, i + 1);
                        bbox.Add(float.Parse(coord, System.Globalization.CultureInfo.InvariantCulture));
                    }
                }
            }
            if (bbox.Count == 0)
            {
                bbox = JavaUtil.ArraysAsList(0f, 0f, 0f, 0f);
                String id = node.Attr("id");
                if (id != null && !unparsedBBoxes.ContainsKey(id))
                {
                    unparsedBBoxes.Put(id, node);
                }
            }
            return(bbox);
        }
Exemplo n.º 2
0
 // doesn't use insertNode, because we don't foster these; and will always have a stack.
 private void InsertNode(iText.StyledXmlParser.Jsoup.Nodes.Node node)
 {
     // if the stack hasn't been set up yet, elements (doctype, comments) go into the doc
     if (stack.Count == 0)
     {
         doc.AppendChild(node);
     }
     else
     {
         if (IsFosterInserts())
         {
             InsertInFosterParent(node);
         }
         else
         {
             CurrentElement().AppendChild(node);
         }
     }
     // connect form controls to their form element
     if (node is iText.StyledXmlParser.Jsoup.Nodes.Element && ((iText.StyledXmlParser.Jsoup.Nodes.Element)node)
         .Tag().IsFormListed())
     {
         if (formElement != null)
         {
             formElement.AddElement((iText.StyledXmlParser.Jsoup.Nodes.Element)node);
         }
     }
 }
Exemplo n.º 3
0
        internal virtual void InsertInFosterParent(iText.StyledXmlParser.Jsoup.Nodes.Node @in)
        {
            iText.StyledXmlParser.Jsoup.Nodes.Element fosterParent;
            iText.StyledXmlParser.Jsoup.Nodes.Element lastTable = GetFromStack("table");
            bool isLastTableParent = false;

            if (lastTable != null)
            {
                if (lastTable.Parent() != null)
                {
                    fosterParent      = (iText.StyledXmlParser.Jsoup.Nodes.Element)lastTable.Parent();
                    isLastTableParent = true;
                }
                else
                {
                    fosterParent = AboveOnStack(lastTable);
                }
            }
            else
            {
                // no table == frag
                fosterParent = stack[0];
            }
            if (isLastTableParent)
            {
                Validate.NotNull(lastTable);
                // last table cannot be null by this point.
                lastTable.Before(@in);
            }
            else
            {
                fosterParent.AppendChild(@in);
            }
        }
Exemplo n.º 4
0
 public void Tail(iText.StyledXmlParser.Jsoup.Nodes.Node source, int depth)
 {
     if (source is iText.StyledXmlParser.Jsoup.Nodes.Element && this._enclosing.whitelist.IsSafeTag(source.NodeName
                                                                                                        ()))
     {
         this.destination = (iText.StyledXmlParser.Jsoup.Nodes.Element) this.destination.Parent();
     }
 }
Exemplo n.º 5
0
        /// <summary>
        /// Wraps JSoup nodes into pdfHTML
        /// <see cref="iText.StyledXmlParser.Node.INode"/>
        /// classes.
        /// </summary>
        /// <param name="jsoupNode">the JSoup node instance</param>
        /// <returns>
        /// the
        /// <see cref="iText.StyledXmlParser.Node.INode"/>
        /// instance
        /// </returns>
        private INode WrapJsoupHierarchy(iText.StyledXmlParser.Jsoup.Nodes.Node jsoupNode)
        {
            INode resultNode = null;

            if (jsoupNode is Document)
            {
                resultNode = new JsoupDocumentNode((Document)jsoupNode);
            }
            else
            {
                if (jsoupNode is TextNode)
                {
                    resultNode = new JsoupTextNode((TextNode)jsoupNode);
                }
                else
                {
                    if (jsoupNode is iText.StyledXmlParser.Jsoup.Nodes.Element)
                    {
                        resultNode = new JsoupElementNode((iText.StyledXmlParser.Jsoup.Nodes.Element)jsoupNode);
                    }
                    else
                    {
                        if (jsoupNode is DataNode)
                        {
                            resultNode = new JsoupDataNode((DataNode)jsoupNode);
                        }
                        else
                        {
                            if (jsoupNode is DocumentType)
                            {
                                resultNode = new JsoupDocumentTypeNode((DocumentType)jsoupNode);
                            }
                            else
                            {
                                if (jsoupNode is Comment || jsoupNode is XmlDeclaration)
                                {
                                }
                                else
                                {
                                    // Ignore. We should do this to avoid redundant log message
                                    logger.Error(MessageFormatUtil.Format(iText.StyledXmlParser.LogMessageConstant.ERROR_PARSING_COULD_NOT_MAP_NODE
                                                                          , jsoupNode.GetType()));
                                }
                            }
                        }
                    }
                }
            }
            foreach (iText.StyledXmlParser.Jsoup.Nodes.Node node in jsoupNode.ChildNodes())
            {
                INode childNode = WrapJsoupHierarchy(node);
                if (childNode != null)
                {
                    resultNode.AddChild(childNode);
                }
            }
            return(resultNode);
        }
Exemplo n.º 6
0
        /// <summary>
        /// Wraps JSoup nodes into pdfHTML
        /// <see cref="iText.StyledXmlParser.Node.INode"/>
        /// classes.
        /// </summary>
        /// <param name="jsoupNode">the JSoup node instance</param>
        /// <returns>
        /// the
        /// <see cref="iText.StyledXmlParser.Node.INode"/>
        /// instance
        /// </returns>
        private INode WrapJsoupHierarchy(iText.StyledXmlParser.Jsoup.Nodes.Node jsoupNode)
        {
            INode resultNode = null;

            if (jsoupNode is Document)
            {
                resultNode = new JsoupDocumentNode((Document)jsoupNode);
            }
            else
            {
                if (jsoupNode is TextNode)
                {
                    resultNode = new JsoupTextNode((TextNode)jsoupNode);
                }
                else
                {
                    if (jsoupNode is iText.StyledXmlParser.Jsoup.Nodes.Element)
                    {
                        resultNode = new JsoupElementNode((iText.StyledXmlParser.Jsoup.Nodes.Element)jsoupNode);
                    }
                    else
                    {
                        if (jsoupNode is DataNode)
                        {
                            resultNode = new JsoupDataNode((DataNode)jsoupNode);
                        }
                        else
                        {
                            if (jsoupNode is DocumentType)
                            {
                                resultNode = new JsoupDocumentTypeNode((DocumentType)jsoupNode);
                            }
                            else
                            {
                                if (jsoupNode is Comment)
                                {
                                }
                                else
                                {
                                    logger.Error(MessageFormatUtil.Format("Could not map node type: {0}", jsoupNode.GetType()));
                                }
                            }
                        }
                    }
                }
            }
            foreach (iText.StyledXmlParser.Jsoup.Nodes.Node node in jsoupNode.ChildNodes())
            {
                INode childNode = WrapJsoupHierarchy(node);
                if (childNode != null)
                {
                    resultNode.AddChild(childNode);
                }
            }
            return(resultNode);
        }
Exemplo n.º 7
0
 public void Head(iText.StyledXmlParser.Jsoup.Nodes.Node source, int depth)
 {
     if (source is iText.StyledXmlParser.Jsoup.Nodes.Element)
     {
         iText.StyledXmlParser.Jsoup.Nodes.Element sourceEl = (iText.StyledXmlParser.Jsoup.Nodes.Element)source;
         if (this._enclosing.whitelist.IsSafeTag(sourceEl.TagName()))
         {
             // safe, clone and copy safe attrs
             Cleaner.ElementMeta meta = this._enclosing.CreateSafeElement(sourceEl);
             iText.StyledXmlParser.Jsoup.Nodes.Element destChild = meta.el;
             this.destination.AppendChild(destChild);
             this.numDiscarded += meta.numAttribsDiscarded;
             this.destination   = destChild;
         }
         else
         {
             if (source != this.root)
             {
                 // not a safe tag, so don't add. don't count root against discarded.
                 this.numDiscarded++;
             }
         }
     }
     else
     {
         if (source is TextNode)
         {
             TextNode sourceText = (TextNode)source;
             TextNode destText   = new TextNode(sourceText.GetWholeText(), source.BaseUri());
             this.destination.AppendChild(destText);
         }
         else
         {
             if (source is DataNode && this._enclosing.whitelist.IsSafeTag(source.Parent().NodeName()))
             {
                 DataNode sourceData = (DataNode)source;
                 DataNode destData   = new DataNode(sourceData.GetWholeData(), source.BaseUri());
                 this.destination.AppendChild(destData);
             }
             else
             {
                 // else, we don't care about comments, xml proc instructions, etc
                 this.numDiscarded++;
             }
         }
     }
 }
Exemplo n.º 8
0
        /// <summary>Get and align (if needed) bbox of the element.</summary>
        internal static IList <float> GetAlignedBBox(iText.StyledXmlParser.Jsoup.Nodes.Element @object, TextPositioning
                                                     textPositioning, IDictionary <String, iText.StyledXmlParser.Jsoup.Nodes.Node> unparsedBBoxes)
        {
            IList <float> coordinates = ParseBBox(@object, unparsedBBoxes);

            if (TextPositioning.BY_WORDS_AND_LINES == textPositioning || TextPositioning.BY_WORDS == textPositioning)
            {
                iText.StyledXmlParser.Jsoup.Nodes.Node line = @object.Parent();
                IList <float> lineCoordinates = ParseBBox(line, unparsedBBoxes);
                if (TextPositioning.BY_WORDS_AND_LINES == textPositioning)
                {
                    coordinates[BOTTOM_IDX] = lineCoordinates[BOTTOM_IDX];
                    coordinates[TOP_IDX]    = lineCoordinates[TOP_IDX];
                }
                DetectAndFixBrokenBBoxes(@object, coordinates, lineCoordinates, unparsedBBoxes);
            }
            return(coordinates);
        }
Exemplo n.º 9
0
        internal virtual void Insert(Token.Comment commentToken)
        {
            Comment comment = new Comment(commentToken.GetData(), baseUri);

            iText.StyledXmlParser.Jsoup.Nodes.Node insert = comment;
            if (commentToken.bogus)
            {
                // xml declarations are emitted as bogus comments (which is right for html, but not xml)
                // so we do a bit of a hack and parse the data as an element to pull the attributes out
                String data = comment.GetData();
                if (data.Length > 1 && (data.StartsWith("!") || data.StartsWith("?")))
                {
                    Document doc = iText.StyledXmlParser.Jsoup.Jsoup.Parse("<" + data.JSubstring(1, data.Length - 1) + ">", baseUri
                                                                           , iText.StyledXmlParser.Jsoup.Parser.Parser.XmlParser());
                    iText.StyledXmlParser.Jsoup.Nodes.Element el = doc.Child(0);
                    insert = new XmlDeclaration(el.TagName(), comment.BaseUri(), data.StartsWith("!"));
                    insert.Attributes().AddAll(el.Attributes());
                }
            }
            InsertNode(insert);
        }
Exemplo n.º 10
0
 public void Tail(iText.StyledXmlParser.Jsoup.Nodes.Node node, int depth)
 {
     accum.Append("</" + node.NodeName() + ">");
 }
Exemplo n.º 11
0
 private void InsertNode(iText.StyledXmlParser.Jsoup.Nodes.Node node)
 {
     CurrentElement().AppendChild(node);
 }