private void ProcessHtmlChunks(HtmlEnumerator en, String endTag) { while (en.MoveUntilMatch(endTag)) { if (en.IsCurrentHtmlTag) { Action <HtmlEnumerator> action; if (knownTags.TryGetValue(en.CurrentTag, out action)) { if (Logging.On) { Logging.PrintVerbose(en.Current); } action(en); } // else unknown or not yet implemented - we ignore } else { // apply the previously discovered style Run run = new Run( new Text(HttpUtility.HtmlDecode(en.Current)) { Space = SpaceProcessingModeValues.Preserve } ); htmlStyles.Runs.ApplyTags(run); elements.Add(run); } } }
/// <summary> /// Save the actual list and restart with a new one. /// Continue to process until we found endTag. /// </summary> protected void AlternateProcessHtmlChunks(HtmlEnumerator en, string endTag) { if (elements.Count > 0) { CompleteCurrentParagraph(); } ProcessHtmlChunks(en, endTag); }
public void BeginList(HtmlEnumerator en) { int prevAbsNumId = absNumId; // lookup for a predefined list style in the template collection String type = en.StyleAttributes["list-style-type"]; bool orderedList = en.CurrentTag.Equals("<ol>", StringComparison.OrdinalIgnoreCase); if (type == null || !knonwAbsNumIds.TryGetValue(type.ToLowerInvariant(), out absNumId)) { if (orderedList) { absNumId = knonwAbsNumIds["decimal"]; } else { absNumId = knonwAbsNumIds["disc"]; } } firstItem = true; levelDepth++; // save a NumberingInstance if the nested list style is the same as its ancestor. // this allows us to nest <ol> and restart the indentation to 1. int currentInstanceId = this.InstanceID; if (levelDepth > 1 && absNumId == prevAbsNumId && orderedList) { EnsureMultilevel(absNumId); } else { currentInstanceId = ++nextInstanceID; Numbering numbering = mainPart.NumberingDefinitionsPart.Numbering; numbering.Append( new NumberingInstance( new AbstractNumId() { Val = absNumId }, new LevelOverride( new StartOverrideNumberingValue() { Val = 1 } ) { LevelIndex = 0, } ) { NumberID = currentInstanceId }); } numInstances.Push(currentInstanceId); }
/// <summary> /// There is a few attributes shared by a large number of tags. This method will check them for a limited /// number of tags (<p>, <pre>, <div>, <span> and <body>). /// </summary> /// <returns>Returns true if the processing of this tag should generate a new paragraph.</returns> protected bool ProcessContainerAttributes(HtmlEnumerator en, IList <OpenXmlElement> styleAttributes) { bool newParagraph = false; // Not applicable to a table : page break if (!tables.HasContext || en.CurrentTag == "<pre>") { string attrValue = en.StyleAttributes["page-break-after"]; if (attrValue == "always") { paragraphs.Add(new Paragraph( new Run( new Break() { Type = BreakValues.Page }))); } attrValue = en.StyleAttributes["page-break-before"]; if (attrValue == "always") { elements.Add( new Run( new Break() { Type = BreakValues.Page }) ); elements.Add(new Run( new LastRenderedPageBreak()) ); } } // support left and right padding var padding = en.StyleAttributes.GetAsMargin("padding"); if (!padding.IsEmpty && (padding.Left.IsFixed || padding.Right.IsFixed)) { Indentation indentation = new Indentation(); if (padding.Left.Value > 0) { indentation.Left = padding.Left.ValueInDxa.ToString(CultureInfo.InvariantCulture); } if (padding.Right.Value > 0) { indentation.Right = padding.Right.ValueInDxa.ToString(CultureInfo.InvariantCulture); } currentParagraph.InsertInProperties(prop => prop.Indentation = indentation); } newParagraph |= htmlStyles.Paragraph.ProcessCommonAttributes(en, styleAttributes); return(newParagraph); }
/// <summary> /// Start the parse processing. /// </summary> /// <returns>Returns a list of parsed paragraph.</returns> public IList <OpenXmlCompositeElement> Parse(string html) { if (string.IsNullOrEmpty(html)) { return(new Paragraph[0]); } // ensure a body exists to avoid any errors when trying to access it if (mainPart.Document == null) { new Document(new Body()).Save(mainPart); } else if (mainPart.Document.Body == null) { mainPart.Document.Body = new Body(); } // Reset: elements = new List <OpenXmlElement>(); this.paragraphs = new List <OpenXmlCompositeElement>(); tables = new TableContext(); htmlStyles.Runs.Reset(); currentParagraph = null; // Start a new processing this.paragraphs.Add(currentParagraph = htmlStyles.Paragraph.NewParagraph()); if (htmlStyles.DefaultParagraphStyle != null) { currentParagraph.ParagraphProperties = new ParagraphProperties { ParagraphStyleId = new ParagraphStyleId { Val = htmlStyles.DefaultParagraphStyle } }; } var en = new HtmlEnumerator(html); this.ProcessHtmlChunks(en, null); if (elements.Count > 0) { this.currentParagraph.Append(elements); } // As the Parse method is public, to avoid changing the type of the return value, I use this proxy // that will allow me to call the recursive method RemoveEmptyParagraphs with no major changes, impacting the client. this.RemoveEmptyParagraphs(); return(this.paragraphs); }
/// <summary> /// Iterate over the html tags and process them. /// </summary> /// <param name="en"></param> /// <param name="endTag"></param> protected void ProcessHtmlChunks(HtmlEnumerator en, string endTag) { while (en.MoveUntilMatch(endTag)) { if (en.IsCurrentHtmlTag) { Action <HtmlEnumerator> action; if (en.CurrentTag != null && knownTags.TryGetValue(en.CurrentTag, out action)) { // Process known tag if (Logging.On) { Logging.PrintVerbose(en.Current); } action(en); } else { // Just print unknown tag as text Run run = new Run( new Text(HttpUtility.HtmlDecode(en.Current)) { Space = SpaceProcessingModeValues.Preserve } ); htmlStyles.Runs.ApplyTags(run); elements.Add(run); } } else { // Print text and apply the previously discovered style Run run = new Run( new Text(HttpUtility.HtmlDecode(en.Current)) { Space = SpaceProcessingModeValues.Preserve } ); htmlStyles.Runs.ApplyTags(run); elements.Add(run); } } }
/// <summary> /// Move inside the current tag related to table (td, thead, tr, ...) and converts some common /// attributes to their OpenXml equivalence. /// </summary> /// <param name="en">The Html enumerator positionned on a <i>table (or related)</i> tag.</param> /// <param name="runStyleAttributes">The collection of attributes where to store new discovered attributes.</param> public void ProcessCommonAttributes(HtmlEnumerator en, IList <OpenXmlElement> runStyleAttributes) { List <OpenXmlElement> containerStyleAttributes = new List <OpenXmlElement>(); var colorValue = en.StyleAttributes.GetAsColor("background-color"); // "background-color" is also handled by RunStyleCollection which duplicate this attribute (bug #13212). Let's ignore it if (!colorValue.IsEmpty && en.CurrentTag.Equals("<td>", StringComparison.InvariantCultureIgnoreCase)) { colorValue = System.Drawing.Color.Empty; } if (colorValue.IsEmpty) { colorValue = en.Attributes.GetAsColor("bgcolor"); } if (!colorValue.IsEmpty) { containerStyleAttributes.Add( new Shading() { Val = ShadingPatternValues.Clear, Color = "auto", Fill = colorValue.ToHexString() }); } var htmlAlign = en.StyleAttributes["vertical-align"]; if (htmlAlign == null) { htmlAlign = en.Attributes["valign"]; } if (htmlAlign != null) { TableVerticalAlignmentValues?valign = ConverterUtility.FormatVAlign(htmlAlign); if (valign.HasValue) { containerStyleAttributes.Add(new TableCellVerticalAlignment() { Val = valign }); } } htmlAlign = en.StyleAttributes["text-align"]; if (htmlAlign == null) { htmlAlign = en.Attributes["align"]; } if (htmlAlign != null) { JustificationValues?halign = ConverterUtility.FormatParagraphAlign(htmlAlign); if (halign.HasValue) { this.BeginTagForParagraph(en.CurrentTag, new KeepNext(), new Justification { Val = halign }); } } // implemented by ddforge String[] classes = en.Attributes.GetAsClass(); if (classes != null) { for (int i = 0; i < classes.Length; i++) { string className = documentStyle.GetStyle(classes[i], StyleValues.Table, ignoreCase: true); if (className != null) // only one Style can be applied in OpenXml and dealing with inheritance is out of scope { containerStyleAttributes.Add(new RunStyle() { Val = className }); break; } } } this.BeginTag(en.CurrentTag, containerStyleAttributes); // Process general run styles documentStyle.Runs.ProcessCommonAttributes(en, runStyleAttributes); }
public int ProcessItem(HtmlEnumerator en) { if (!firstItem) { return(this.InstanceID); } firstItem = false; // in case a margin has been specifically specified, we need to create a new list template // on the fly with a different AbsNumId, in order to let Word doesn't merge the style with its predecessor. Margin margin = en.StyleAttributes.GetAsMargin("margin"); if (margin.Left.Value > 0 && margin.Left.Type == UnitMetric.Pixel) { Numbering numbering = mainPart.NumberingDefinitionsPart.Numbering; foreach (AbstractNum absNum in numbering.Elements <AbstractNum>()) { if (absNum.AbstractNumberId == absNumId) { Level lvl = absNum.GetFirstChild <Level>(); Int32 currentNumId = ++nextInstanceID; numbering.Append( new AbstractNum( new MultiLevelType() { Val = MultiLevelValues.SingleLevel }, new Level { StartNumberingValue = new StartNumberingValue() { Val = 1 }, NumberingFormat = new NumberingFormat() { Val = lvl.NumberingFormat.Val }, LevelIndex = 0, LevelText = new LevelText() { Val = lvl.LevelText.Val } } ) { AbstractNumberId = currentNumId }); numbering.Save(mainPart.NumberingDefinitionsPart); numbering.Append( new NumberingInstance( new AbstractNumId() { Val = currentNumId } ) { NumberID = currentNumId }); numbering.Save(mainPart.NumberingDefinitionsPart); mainPart.NumberingDefinitionsPart.Numbering.Reload(); break; } } } return(this.InstanceID); }
/// <summary> /// There is a few attributes shared by a large number of tags. This method will check them for a limited /// number of tags (<p>, <pre>, <div>, <span> and <body>). /// </summary> /// <returns>Returns true if the processing of this tag should generate a new paragraph.</returns> public bool ProcessCommonAttributes(HtmlEnumerator en, IList <OpenXmlElement> styleAttributes) { if (en.Attributes.Count == 0) { return(false); } bool newParagraph = false; List <OpenXmlElement> containerStyleAttributes = new List <OpenXmlElement>(); string attrValue = en.Attributes["lang"]; if (attrValue != null && attrValue.Length > 0) { try { var ci = System.Globalization.CultureInfo.GetCultureInfo(attrValue); bool rtl = ci.TextInfo.IsRightToLeft; Languages lang = new Languages() { Val = ci.TwoLetterISOLanguageName }; if (rtl) { lang.Bidi = ci.Name; styleAttributes.Add(new Languages() { Bidi = ci.Name }); // notify table documentStyle.Tables.BeginTag(en.CurrentTag, new TableJustification() { Val = TableRowAlignmentValues.Right }); } containerStyleAttributes.Add(new ParagraphMarkRunProperties(lang)); containerStyleAttributes.Add(new BiDi() { Val = OnOffValue.FromBoolean(rtl) }); } catch (ArgumentException) { // lang not valid, ignore it } } attrValue = en.StyleAttributes["text-align"]; if (attrValue != null && en.CurrentTag != "<font>") { JustificationValues?align = ConverterUtility.FormatParagraphAlign(attrValue); if (align.HasValue) { containerStyleAttributes.Add(new Justification { Val = align }); } } // according to w3c, dir should be used in conjonction with lang. But whatever happens, we'll apply the RTL layout attrValue = en.Attributes["dir"]; if (attrValue != null) { if (attrValue.Equals("rtl", StringComparison.OrdinalIgnoreCase)) { styleAttributes.Add(new RightToLeftText()); containerStyleAttributes.Add(new Justification() { Val = JustificationValues.Right }); } else if (attrValue.Equals("ltr", StringComparison.OrdinalIgnoreCase)) { containerStyleAttributes.Add(new Justification() { Val = JustificationValues.Left }); } } // <span> and <font> are considered as semi-container attribute. When converted to OpenXml, there are Runs but not Paragraphs if (en.CurrentTag == "<p>" || en.CurrentTag == "<div>" || en.CurrentTag == "<pre>") { var border = en.StyleAttributes.GetAsBorder("border"); if (!border.IsEmpty) { ParagraphBorders borders = new ParagraphBorders(); if (border.Top.IsValid) { borders.Append( new TopBorder() { Val = border.Top.Style, Color = border.Top.Color.ToHexString(), Size = (uint)border.Top.Width.ValueInPx * 4, Space = 1U }); } if (border.Left.IsValid) { borders.Append( new LeftBorder() { Val = border.Left.Style, Color = border.Left.Color.ToHexString(), Size = (uint)border.Left.Width.ValueInPx * 4, Space = 1U }); } if (border.Bottom.IsValid) { borders.Append( new BottomBorder() { Val = border.Bottom.Style, Color = border.Bottom.Color.ToHexString(), Size = (uint)border.Bottom.Width.ValueInPx * 4, Space = 1U }); } if (border.Right.IsValid) { borders.Append( new RightBorder() { Val = border.Right.Style, Color = border.Right.Color.ToHexString(), Size = (uint)border.Right.Width.ValueInPx * 4, Space = 1U }); } containerStyleAttributes.Add(borders); newParagraph = true; } } else if (en.CurrentTag == "<span>" || en.CurrentTag == "<font>") { // OpenXml limits the border to 4-side of the same color and style. SideBorder border = en.StyleAttributes.GetAsSideBorder("border"); if (border.IsValid) { styleAttributes.Add(new DocumentFormat.OpenXml.Wordprocessing.Border() { Val = border.Style, Color = border.Color.ToHexString(), Size = (uint)border.Width.ValueInPx * 4, Space = 1U }); } } String[] classes = en.Attributes.GetAsClass(); if (classes != null) { for (int i = 0; i < classes.Length; i++) { string className = documentStyle.GetStyle(classes[i], StyleValues.Paragraph, ignoreCase: true); if (className != null) { containerStyleAttributes.Add(new ParagraphStyleId() { Val = className }); newParagraph = true; break; } } } Margin margin = en.StyleAttributes.GetAsMargin("margin"); Indentation indentation = null; if (!margin.IsEmpty) { if (margin.Top.IsFixed || margin.Bottom.IsFixed) { SpacingBetweenLines spacing = new SpacingBetweenLines(); if (margin.Top.IsFixed) { spacing.Before = margin.Top.ValueInDxa.ToString(CultureInfo.InvariantCulture); } if (margin.Bottom.IsFixed) { spacing.After = margin.Bottom.ValueInDxa.ToString(CultureInfo.InvariantCulture); } containerStyleAttributes.Add(spacing); } if (margin.Left.IsFixed || margin.Right.IsFixed) { indentation = new Indentation(); if (margin.Left.IsFixed) { indentation.Left = margin.Left.ValueInDxa.ToString(CultureInfo.InvariantCulture); } if (margin.Right.IsFixed) { indentation.Right = margin.Right.ValueInDxa.ToString(CultureInfo.InvariantCulture); } containerStyleAttributes.Add(indentation); } } // implemented by giorand (feature #13787) Unit textIndent = en.StyleAttributes.GetAsUnit("text-indent"); if (textIndent.IsValid && (en.CurrentTag == "<p>" || en.CurrentTag == "<div>")) { if (indentation == null) { indentation = new Indentation(); } indentation.FirstLine = textIndent.ValueInDxa.ToString(CultureInfo.InvariantCulture); containerStyleAttributes.Add(indentation); } this.BeginTag(en.CurrentTag, containerStyleAttributes); // Process general run styles documentStyle.Runs.ProcessCommonAttributes(en, styleAttributes); return(newParagraph); }
/// <summary> /// Converts some common styling attributes to their OpenXml equivalence. /// </summary> /// <param name="styleAttributes">The collection of attributes where to store new discovered attributes.</param> public void ProcessCommonAttributes(HtmlEnumerator en, IList <OpenXmlElement> styleAttributes) { if (en.Attributes.Count == 0) { return; } var colorValue = en.StyleAttributes.GetAsColor("color"); if (colorValue.IsEmpty) { colorValue = en.Attributes.GetAsColor("color"); } if (!colorValue.IsEmpty) { styleAttributes.Add(new Color { Val = colorValue.ToHexString() }); } colorValue = en.StyleAttributes.GetAsColor("background-color"); if (!colorValue.IsEmpty) { // change the way the background-color renders. It now uses Shading instead of Highlight. // Changes brought by Wude on http://notesforhtml2openxml.codeplex.com/discussions/277570 styleAttributes.Add(new Shading { Val = ShadingPatternValues.Clear, Fill = colorValue.ToHexString() }); } string attrValue = en.StyleAttributes["text-decoration"]; if (attrValue == "underline") { styleAttributes.Add(new Underline { Val = UnderlineValues.Single }); } else if (attrValue == "line-through") { styleAttributes.Add(new Strike()); } String[] classes = en.Attributes.GetAsClass(); if (classes != null) { for (int i = 0; i < classes.Length; i++) { string className = documentStyle.GetStyle(classes[i], StyleValues.Character, ignoreCase: true); if (className != null) // only one Style can be applied in OpenXml and dealing with inheritance is out of scope { styleAttributes.Add(new RunStyle() { Val = className }); break; } } } HtmlFont font = en.StyleAttributes.GetAsFont("font"); if (!font.IsEmpty) { if (font.Style == FontStyle.Italic) { styleAttributes.Add(new Italic()); } if (font.Weight == FontWeight.Bold || font.Weight == FontWeight.Bolder) { styleAttributes.Add(new Bold()); } if (font.Variant == FontVariant.SmallCaps) { styleAttributes.Add(new SmallCaps()); } if (font.Family != null) { styleAttributes.Add(new RunFonts() { Ascii = font.Family.Name, HighAnsi = font.Family.Name }); } // size are half-point font size if (font.Size.IsFixed) { styleAttributes.Add(new FontSize() { Val = (font.Size.ValueInPoint * 2).ToString(CultureInfo.InvariantCulture) }); } } }