private void ProcessHtmlChunks(HtmlEnumerator en, String endTag) { while (en.MoveUntilMatch(endTag)) { if (en.IsCurrentHtmlTag) { Action <HtmlEnumerator> action; if (knownTags.TryGetValue(en.CurrentTag, out action)) { if (Logging.On) { Logging.PrintVerbose(en.Current); } action(en); } // else unknown or not yet implemented - we ignore } else { Run run = new Run( new Text(HttpUtility.HtmlDecode(en.Current)) { Space = SpaceProcessingModeValues.Preserve } ); // apply the previously discovered style htmlStyles.Runs.ApplyTags(run); elements.Add(run); } } }
/// <summary> /// Save the actual list and restart with a new one. /// Continue to process until we found endTag. /// </summary> private void AlternateProcessHtmlChunks(HtmlEnumerator en, string endTag) { if (elements.Count > 0) { CompleteCurrentParagraph(); } ProcessHtmlChunks(en, endTag); }
/// <summary> /// There is a few attributes shared by a large number of tags. This method will check them for a limited /// number of tags (<p>, <pre>, <div>, <span> and <body>). /// </summary> /// <returns>Returns true if the processing of this tag should generate a new paragraph.</returns> private bool ProcessContainerAttributes(HtmlEnumerator en, IList <OpenXmlElement> styleAttributes) { bool newParagraph = false; // Not applicable to a table : page break if (!tables.HasContext || en.CurrentTag == "<pre>") { String attrValue = en.StyleAttributes["page-break-after"]; if (attrValue == "always") { paragraphs.Add(new Paragraph( new Run( new Break() { Type = BreakValues.Page }))); } attrValue = en.StyleAttributes["page-break-before"]; if (attrValue == "always") { elements.Add( new Run( new Break() { Type = BreakValues.Page }) ); elements.Add(new Run( new LastRenderedPageBreak()) ); } } // support left and right padding var padding = en.StyleAttributes.GetAsMargin("padding"); if (!padding.IsEmpty && (padding.Left.IsFixed || padding.Right.IsFixed)) { Indentation indentation = new Indentation(); if (padding.Left.Value > 0) { indentation.Left = padding.Left.ValueInDxa.ToString(CultureInfo.InvariantCulture); } if (padding.Right.Value > 0) { indentation.Right = padding.Right.ValueInDxa.ToString(CultureInfo.InvariantCulture); } currentParagraph.InsertInProperties(prop => prop.Indentation = indentation); } newParagraph |= htmlStyles.Paragraph.ProcessCommonAttributes(en, styleAttributes); return(newParagraph); }
/// <summary> /// Start the parse processing. /// </summary> /// <returns>Returns a list of parsed paragraph.</returns> public IList <OpenXmlCompositeElement> Parse(String html) { if (String.IsNullOrEmpty(html)) { return(new Paragraph[0]); } // ensure a body exists to avoid any errors when trying to access it if (mainPart.Document == null) { new Document(new Body()).Save(mainPart); } else if (mainPart.Document.Body == null) { mainPart.Document.Body = new Body(); } // Reset: elements = new List <OpenXmlElement>(); paragraphs = new List <OpenXmlCompositeElement>(); tables = new TableContext(); htmlStyles.Runs.Reset(); currentParagraph = null; // Start a new processing paragraphs.Add(currentParagraph = htmlStyles.Paragraph.NewParagraph()); if (htmlStyles.DefaultParagraphStyle != null) { currentParagraph.ParagraphProperties = new ParagraphProperties { ParagraphStyleId = new ParagraphStyleId { Val = htmlStyles.DefaultParagraphStyle } }; } HtmlEnumerator en = new HtmlEnumerator(html); ProcessHtmlChunks(en, null); if (elements.Count > 0) { this.currentParagraph.Append(elements); } // As the Parse method is public, to avoid changing the type of the return value, I use this proxy // that will allow me to call the recursive method RemoveEmptyParagraphs with no major changes, impacting the client. RemoveEmptyParagraphs(); return(paragraphs); }