/// <summary>Parses element bbox.</summary> /// <param name="node">element containing bbox</param> /// <param name="unparsedBBoxes">list of element ids with bboxes which could not be parsed</param> /// <returns>parsed bbox</returns> internal static IList <float> ParseBBox(iText.StyledXmlParser.Jsoup.Nodes.Node node, IDictionary <String, iText.StyledXmlParser.Jsoup.Nodes.Node > unparsedBBoxes) { IList <float> bbox = new List <float>(); Match bboxMatcher = iText.IO.Util.StringUtil.Match(BBOX_PATTERN, node.Attr("title")); if (bboxMatcher.Success) { Match bboxCoordinateMatcher = iText.IO.Util.StringUtil.Match(BBOX_COORDINATE_PATTERN, iText.IO.Util.StringUtil.Group (bboxMatcher)); if (bboxCoordinateMatcher.Success) { for (int i = 0; i < BBOX_ARRAY_SIZE; i++) { String coord = iText.IO.Util.StringUtil.Group(bboxCoordinateMatcher, i + 1); bbox.Add(float.Parse(coord, System.Globalization.CultureInfo.InvariantCulture)); } } } if (bbox.Count == 0) { bbox = JavaUtil.ArraysAsList(0f, 0f, 0f, 0f); String id = node.Attr("id"); if (id != null && !unparsedBBoxes.ContainsKey(id)) { unparsedBBoxes.Put(id, node); } } return(bbox); }
// doesn't use insertNode, because we don't foster these; and will always have a stack. private void InsertNode(iText.StyledXmlParser.Jsoup.Nodes.Node node) { // if the stack hasn't been set up yet, elements (doctype, comments) go into the doc if (stack.Count == 0) { doc.AppendChild(node); } else { if (IsFosterInserts()) { InsertInFosterParent(node); } else { CurrentElement().AppendChild(node); } } // connect form controls to their form element if (node is iText.StyledXmlParser.Jsoup.Nodes.Element && ((iText.StyledXmlParser.Jsoup.Nodes.Element)node) .Tag().IsFormListed()) { if (formElement != null) { formElement.AddElement((iText.StyledXmlParser.Jsoup.Nodes.Element)node); } } }
internal virtual void InsertInFosterParent(iText.StyledXmlParser.Jsoup.Nodes.Node @in) { iText.StyledXmlParser.Jsoup.Nodes.Element fosterParent; iText.StyledXmlParser.Jsoup.Nodes.Element lastTable = GetFromStack("table"); bool isLastTableParent = false; if (lastTable != null) { if (lastTable.Parent() != null) { fosterParent = (iText.StyledXmlParser.Jsoup.Nodes.Element)lastTable.Parent(); isLastTableParent = true; } else { fosterParent = AboveOnStack(lastTable); } } else { // no table == frag fosterParent = stack[0]; } if (isLastTableParent) { Validate.NotNull(lastTable); // last table cannot be null by this point. lastTable.Before(@in); } else { fosterParent.AppendChild(@in); } }
public void Tail(iText.StyledXmlParser.Jsoup.Nodes.Node source, int depth) { if (source is iText.StyledXmlParser.Jsoup.Nodes.Element && this._enclosing.whitelist.IsSafeTag(source.NodeName ())) { this.destination = (iText.StyledXmlParser.Jsoup.Nodes.Element) this.destination.Parent(); } }
/// <summary> /// Wraps JSoup nodes into pdfHTML /// <see cref="iText.StyledXmlParser.Node.INode"/> /// classes. /// </summary> /// <param name="jsoupNode">the JSoup node instance</param> /// <returns> /// the /// <see cref="iText.StyledXmlParser.Node.INode"/> /// instance /// </returns> private INode WrapJsoupHierarchy(iText.StyledXmlParser.Jsoup.Nodes.Node jsoupNode) { INode resultNode = null; if (jsoupNode is Document) { resultNode = new JsoupDocumentNode((Document)jsoupNode); } else { if (jsoupNode is TextNode) { resultNode = new JsoupTextNode((TextNode)jsoupNode); } else { if (jsoupNode is iText.StyledXmlParser.Jsoup.Nodes.Element) { resultNode = new JsoupElementNode((iText.StyledXmlParser.Jsoup.Nodes.Element)jsoupNode); } else { if (jsoupNode is DataNode) { resultNode = new JsoupDataNode((DataNode)jsoupNode); } else { if (jsoupNode is DocumentType) { resultNode = new JsoupDocumentTypeNode((DocumentType)jsoupNode); } else { if (jsoupNode is Comment || jsoupNode is XmlDeclaration) { } else { // Ignore. We should do this to avoid redundant log message logger.Error(MessageFormatUtil.Format(iText.StyledXmlParser.LogMessageConstant.ERROR_PARSING_COULD_NOT_MAP_NODE , jsoupNode.GetType())); } } } } } } foreach (iText.StyledXmlParser.Jsoup.Nodes.Node node in jsoupNode.ChildNodes()) { INode childNode = WrapJsoupHierarchy(node); if (childNode != null) { resultNode.AddChild(childNode); } } return(resultNode); }
/// <summary> /// Wraps JSoup nodes into pdfHTML /// <see cref="iText.StyledXmlParser.Node.INode"/> /// classes. /// </summary> /// <param name="jsoupNode">the JSoup node instance</param> /// <returns> /// the /// <see cref="iText.StyledXmlParser.Node.INode"/> /// instance /// </returns> private INode WrapJsoupHierarchy(iText.StyledXmlParser.Jsoup.Nodes.Node jsoupNode) { INode resultNode = null; if (jsoupNode is Document) { resultNode = new JsoupDocumentNode((Document)jsoupNode); } else { if (jsoupNode is TextNode) { resultNode = new JsoupTextNode((TextNode)jsoupNode); } else { if (jsoupNode is iText.StyledXmlParser.Jsoup.Nodes.Element) { resultNode = new JsoupElementNode((iText.StyledXmlParser.Jsoup.Nodes.Element)jsoupNode); } else { if (jsoupNode is DataNode) { resultNode = new JsoupDataNode((DataNode)jsoupNode); } else { if (jsoupNode is DocumentType) { resultNode = new JsoupDocumentTypeNode((DocumentType)jsoupNode); } else { if (jsoupNode is Comment) { } else { logger.Error(MessageFormatUtil.Format("Could not map node type: {0}", jsoupNode.GetType())); } } } } } } foreach (iText.StyledXmlParser.Jsoup.Nodes.Node node in jsoupNode.ChildNodes()) { INode childNode = WrapJsoupHierarchy(node); if (childNode != null) { resultNode.AddChild(childNode); } } return(resultNode); }
public void Head(iText.StyledXmlParser.Jsoup.Nodes.Node source, int depth) { if (source is iText.StyledXmlParser.Jsoup.Nodes.Element) { iText.StyledXmlParser.Jsoup.Nodes.Element sourceEl = (iText.StyledXmlParser.Jsoup.Nodes.Element)source; if (this._enclosing.whitelist.IsSafeTag(sourceEl.TagName())) { // safe, clone and copy safe attrs Cleaner.ElementMeta meta = this._enclosing.CreateSafeElement(sourceEl); iText.StyledXmlParser.Jsoup.Nodes.Element destChild = meta.el; this.destination.AppendChild(destChild); this.numDiscarded += meta.numAttribsDiscarded; this.destination = destChild; } else { if (source != this.root) { // not a safe tag, so don't add. don't count root against discarded. this.numDiscarded++; } } } else { if (source is TextNode) { TextNode sourceText = (TextNode)source; TextNode destText = new TextNode(sourceText.GetWholeText(), source.BaseUri()); this.destination.AppendChild(destText); } else { if (source is DataNode && this._enclosing.whitelist.IsSafeTag(source.Parent().NodeName())) { DataNode sourceData = (DataNode)source; DataNode destData = new DataNode(sourceData.GetWholeData(), source.BaseUri()); this.destination.AppendChild(destData); } else { // else, we don't care about comments, xml proc instructions, etc this.numDiscarded++; } } } }
/// <summary>Get and align (if needed) bbox of the element.</summary> internal static IList <float> GetAlignedBBox(iText.StyledXmlParser.Jsoup.Nodes.Element @object, TextPositioning textPositioning, IDictionary <String, iText.StyledXmlParser.Jsoup.Nodes.Node> unparsedBBoxes) { IList <float> coordinates = ParseBBox(@object, unparsedBBoxes); if (TextPositioning.BY_WORDS_AND_LINES == textPositioning || TextPositioning.BY_WORDS == textPositioning) { iText.StyledXmlParser.Jsoup.Nodes.Node line = @object.Parent(); IList <float> lineCoordinates = ParseBBox(line, unparsedBBoxes); if (TextPositioning.BY_WORDS_AND_LINES == textPositioning) { coordinates[BOTTOM_IDX] = lineCoordinates[BOTTOM_IDX]; coordinates[TOP_IDX] = lineCoordinates[TOP_IDX]; } DetectAndFixBrokenBBoxes(@object, coordinates, lineCoordinates, unparsedBBoxes); } return(coordinates); }
internal virtual void Insert(Token.Comment commentToken) { Comment comment = new Comment(commentToken.GetData(), baseUri); iText.StyledXmlParser.Jsoup.Nodes.Node insert = comment; if (commentToken.bogus) { // xml declarations are emitted as bogus comments (which is right for html, but not xml) // so we do a bit of a hack and parse the data as an element to pull the attributes out String data = comment.GetData(); if (data.Length > 1 && (data.StartsWith("!") || data.StartsWith("?"))) { Document doc = iText.StyledXmlParser.Jsoup.Jsoup.Parse("<" + data.JSubstring(1, data.Length - 1) + ">", baseUri , iText.StyledXmlParser.Jsoup.Parser.Parser.XmlParser()); iText.StyledXmlParser.Jsoup.Nodes.Element el = doc.Child(0); insert = new XmlDeclaration(el.TagName(), comment.BaseUri(), data.StartsWith("!")); insert.Attributes().AddAll(el.Attributes()); } } InsertNode(insert); }
public void Tail(iText.StyledXmlParser.Jsoup.Nodes.Node node, int depth) { accum.Append("</" + node.NodeName() + ">"); }
private void InsertNode(iText.StyledXmlParser.Jsoup.Nodes.Node node) { CurrentElement().AppendChild(node); }