示例#1
0
        private void ProcessHtmlChunks(HtmlEnumerator en, String endTag)
        {
            while (en.MoveUntilMatch(endTag))
            {
                if (en.IsCurrentHtmlTag)
                {
                    Action <HtmlEnumerator> action;
                    if (knownTags.TryGetValue(en.CurrentTag, out action))
                    {
                        if (Logging.On)
                        {
                            Logging.PrintVerbose(en.Current);
                        }
                        action(en);
                    }

                    // else unknown or not yet implemented - we ignore
                }
                else
                {
                    Run run = new Run(
                        new Text(HttpUtility.HtmlDecode(en.Current))
                    {
                        Space = SpaceProcessingModeValues.Preserve
                    }
                        );
                    // apply the previously discovered style
                    htmlStyles.Runs.ApplyTags(run);
                    elements.Add(run);
                }
            }
        }
示例#2
0
        public void BeginList(HtmlEnumerator en)
        {
            int prevAbsNumId = numInstances.Peek().Value;
            var absNumId     = -1;

            // lookup for a predefined list style in the template collection
            String type        = en.StyleAttributes["list-style-type"];
            bool   orderedList = en.CurrentTag.Equals("<ol>", StringComparison.OrdinalIgnoreCase);

            if (type == null || !knonwAbsNumIds.TryGetValue(type.ToLowerInvariant(), out absNumId))
            {
                if (orderedList)
                {
                    absNumId = knonwAbsNumIds["decimal"];
                }
                else
                {
                    absNumId = knonwAbsNumIds["disc"];
                }
            }

            firstItem = true;
            levelDepth++;

            // save a NumberingInstance if the nested list style is the same as its ancestor.
            // this allows us to nest <ol> and restart the indentation to 1.
            int currentInstanceId = this.InstanceID;

            if (levelDepth > 1 && absNumId == prevAbsNumId && orderedList)
            {
                EnsureMultilevel(absNumId);
            }
            else
            {
                currentInstanceId = ++nextInstanceID;
                Numbering numbering = mainPart.NumberingDefinitionsPart.Numbering;
                numbering.Append(
                    new NumberingInstance(
                        new AbstractNumId()
                {
                    Val = absNumId
                },
                        new LevelOverride(
                            new StartOverrideNumberingValue()
                {
                    Val = 1
                }
                            )
                {
                    LevelIndex = 0,
                }
                        )
                {
                    NumberID = currentInstanceId
                });
            }

            numInstances.Push(new KeyValuePair <int, int>(currentInstanceId, absNumId));
        }
示例#3
0
 /// <summary>
 /// Save the actual list and restart with a new one.
 /// Continue to process until we found endTag.
 /// </summary>
 private void AlternateProcessHtmlChunks(HtmlEnumerator en, string endTag)
 {
     if (elements.Count > 0)
     {
         CompleteCurrentParagraph();
     }
     ProcessHtmlChunks(en, endTag);
 }
示例#4
0
        public void BeginList(HtmlEnumerator en)
        {
            // lookup for a predefined list style in the template collection
            String type        = en.StyleAttributes["list-style-type"];
            bool   orderedList = en.CurrentTag.Equals("<ol>", StringComparison.OrdinalIgnoreCase);

            CreateList(type, orderedList);
        }
示例#5
0
        /// <summary>
        /// There is a few attributes shared by a large number of tags. This method will check them for a limited
        /// number of tags (&lt;p&gt;, &lt;pre&gt;, &lt;div&gt;, &lt;span&gt; and &lt;body&gt;).
        /// </summary>
        /// <returns>Returns true if the processing of this tag should generate a new paragraph.</returns>
        private bool ProcessContainerAttributes(HtmlEnumerator en, IList <OpenXmlElement> styleAttributes)
        {
            bool newParagraph = false;

            // Not applicable to a table : page break
            if (!tables.HasContext || en.CurrentTag == "<pre>")
            {
                String attrValue = en.StyleAttributes["page-break-after"];
                if (attrValue == "always")
                {
                    paragraphs.Add(new Paragraph(
                                       new Run(
                                           new Break()
                    {
                        Type = BreakValues.Page
                    })));
                }

                attrValue = en.StyleAttributes["page-break-before"];
                if (attrValue == "always")
                {
                    elements.Add(
                        new Run(
                            new Break()
                    {
                        Type = BreakValues.Page
                    })
                        );
                    elements.Add(new Run(
                                     new LastRenderedPageBreak())
                                 );
                }
            }

            // support left and right padding
            var padding = en.StyleAttributes.GetAsMargin("padding");

            if (!padding.IsEmpty && (padding.Left.IsFixed || padding.Right.IsFixed))
            {
                Indentation indentation = new Indentation();
                if (padding.Left.Value > 0)
                {
                    indentation.Left = padding.Left.ValueInDxa.ToString(CultureInfo.InvariantCulture);
                }
                if (padding.Right.Value > 0)
                {
                    indentation.Right = padding.Right.ValueInDxa.ToString(CultureInfo.InvariantCulture);
                }

                currentParagraph.InsertInProperties(prop => prop.Indentation = indentation);
            }

            newParagraph |= htmlStyles.Paragraph.ProcessCommonAttributes(en, styleAttributes);
            return(newParagraph);
        }
示例#6
0
        /// <summary>
        /// Start the parse processing.
        /// </summary>
        /// <returns>Returns a list of parsed paragraph.</returns>
        public IList <OpenXmlCompositeElement> Parse(String html)
        {
            if (String.IsNullOrEmpty(html))
            {
                return(new Paragraph[0]);
            }

            // ensure a body exists to avoid any errors when trying to access it
            if (mainPart.Document == null)
            {
                new Document(new Body()).Save(mainPart);
            }
            else if (mainPart.Document.Body == null)
            {
                mainPart.Document.Body = new Body();
            }

            // Reset:
            elements   = new List <OpenXmlElement>();
            paragraphs = new List <OpenXmlCompositeElement>();
            tables     = new TableContext();
            htmlStyles.Runs.Reset();
            currentParagraph = null;

            // Start a new processing
            paragraphs.Add(currentParagraph = htmlStyles.Paragraph.NewParagraph());
            if (htmlStyles.DefaultStyles.ParagraphStyle != null)
            {
                currentParagraph.ParagraphProperties = new ParagraphProperties {
                    ParagraphStyleId = new ParagraphStyleId {
                        Val = htmlStyles.DefaultStyles.ParagraphStyle
                    }
                };
            }

            HtmlEnumerator en = new HtmlEnumerator(html);

            ProcessHtmlChunks(en, null);

            if (elements.Count > 0)
            {
                this.currentParagraph.Append(elements);
            }

            // As the Parse method is public, to avoid changing the type of the return value, I use this proxy
            // that will allow me to call the recursive method RemoveEmptyParagraphs with no major changes, impacting the client.
            RemoveEmptyParagraphs();

            return(paragraphs);
        }
        /// <summary>
        /// Converts some common styling attributes to their OpenXml equivalence.
        /// </summary>
        /// <param name="en">The Html parser.</param>
        /// <param name="styleAttributes">The collection of attributes where to store new discovered attributes.</param>
        public void ProcessCommonAttributes(HtmlEnumerator en, IList <OpenXmlElement> styleAttributes)
        {
            if (en.Attributes.Count == 0)
            {
                return;
            }

            var colorValue = en.StyleAttributes.GetAsColor("color");

            if (colorValue.IsEmpty)
            {
                colorValue = en.Attributes.GetAsColor("color");
            }
            if (!colorValue.IsEmpty)
            {
                styleAttributes.Add(new Color {
                    Val = colorValue.ToHexString()
                });
            }

            colorValue = en.StyleAttributes.GetAsColor("background-color");
            if (!colorValue.IsEmpty)
            {
                // change the way the background-color renders. It now uses Shading instead of Highlight.
                // Changes brought by Wude on http://html2openxml.codeplex.com/discussions/277570
                styleAttributes.Add(new Shading {
                    Val = ShadingPatternValues.Clear, Fill = colorValue.ToHexString()
                });
            }

            var decorations = Converter.ToTextDecoration(en.StyleAttributes["text-decoration"]);

            if ((decorations & TextDecoration.Underline) != 0)
            {
                styleAttributes.Add(new Underline {
                    Val = UnderlineValues.Single
                });
            }
            if ((decorations & TextDecoration.LineThrough) != 0)
            {
                styleAttributes.Add(new Strike());
            }

            String[] classes = en.Attributes.GetAsClass();
            if (classes != null)
            {
                for (int i = 0; i < classes.Length; i++)
                {
                    string className = documentStyle.GetStyle(classes[i], StyleValues.Character, ignoreCase: true);
                    if (className != null)                     // only one Style can be applied in OpenXml and dealing with inheritance is out of scope
                    {
                        styleAttributes.Add(new RunStyle()
                        {
                            Val = className
                        });
                        break;
                    }
                }
            }

            HtmlFont font = en.StyleAttributes.GetAsFont("font");

            if (!font.IsEmpty)
            {
                if (font.Style == FontStyle.Italic)
                {
                    styleAttributes.Add(new Italic());
                }

                if (font.Weight == FontWeight.Bold || font.Weight == FontWeight.Bolder)
                {
                    styleAttributes.Add(new Bold());
                }

                if (font.Variant == FontVariant.SmallCaps)
                {
                    styleAttributes.Add(new SmallCaps());
                }

                if (font.Family != null)
                {
                    styleAttributes.Add(new RunFonts()
                    {
                        Ascii = font.Family, HighAnsi = font.Family
                    });
                }

                // size are half-point font size
                if (font.Size.IsFixed)
                {
                    styleAttributes.Add(new FontSize()
                    {
                        Val = (font.Size.ValueInPoint * 2).ToString(CultureInfo.InvariantCulture)
                    });
                }
            }
        }
        public int ProcessItem(HtmlEnumerator en)
        {
            if (!firstItem)
            {
                return(this.InstanceID);
            }

            firstItem = false;

            // in case a margin has been specifically specified, we need to create a new list template
            // on the fly with a different AbsNumId, in order to let Word doesn't merge the style with its predecessor.
            Margin margin = en.StyleAttributes.GetAsMargin("margin");

            if (margin.Left.Value > 0 && margin.Left.Type == UnitMetric.Pixel)
            {
                Numbering numbering = mainPart.NumberingDefinitionsPart.Numbering;
                foreach (AbstractNum absNum in numbering.Elements <AbstractNum>())
                {
                    if (absNum.AbstractNumberId == numInstances.Peek().Value)
                    {
                        Level lvl          = absNum.GetFirstChild <Level>();
                        Int32 currentNumId = ++nextInstanceID;

                        numbering.Append(
                            new AbstractNum(
                                new MultiLevelType()
                        {
                            Val = MultiLevelValues.SingleLevel
                        },
                                new Level {
                            StartNumberingValue = new StartNumberingValue()
                            {
                                Val = 1
                            },
                            NumberingFormat = new NumberingFormat()
                            {
                                Val = lvl.NumberingFormat.Val
                            },
                            LevelIndex = 0,
                            LevelText  = new LevelText()
                            {
                                Val = lvl.LevelText.Val
                            }
                        }
                                )
                        {
                            AbstractNumberId = currentNumId
                        });
                        numbering.Save(mainPart.NumberingDefinitionsPart);
                        numbering.Append(
                            new NumberingInstance(
                                new AbstractNumId()
                        {
                            Val = currentNumId
                        }
                                )
                        {
                            NumberID = currentNumId
                        });
                        numbering.Save(mainPart.NumberingDefinitionsPart);
                        mainPart.NumberingDefinitionsPart.Numbering.Reload();
                        break;
                    }
                }
            }

            return(this.InstanceID);
        }
        /// <summary>
        /// There is a few attributes shared by a large number of tags. This method will check them for a limited
        /// number of tags (&lt;p&gt;, &lt;pre&gt;, &lt;div&gt;, &lt;span&gt; and &lt;body&gt;).
        /// </summary>
        /// <returns>Returns true if the processing of this tag should generate a new paragraph.</returns>
        public bool ProcessCommonAttributes(HtmlEnumerator en, IList <OpenXmlElement> styleAttributes)
        {
            if (en.Attributes.Count == 0)
            {
                return(false);
            }

            bool newParagraph = false;
            List <OpenXmlElement> containerStyleAttributes = new List <OpenXmlElement>();

            string attrValue = en.Attributes["lang"];

            if (attrValue != null && attrValue.Length > 0)
            {
                try
                {
#if !NET_CORE
                    var ci = System.Globalization.CultureInfo.GetCultureInfo(attrValue);
#else
                    var ci = new System.Globalization.CultureInfo(attrValue);
#endif
                    bool rtl = ci.TextInfo.IsRightToLeft;

                    Languages lang = new Languages()
                    {
                        Val = ci.TwoLetterISOLanguageName
                    };
                    if (rtl)
                    {
                        lang.Bidi = ci.Name;
                        styleAttributes.Add(new Languages()
                        {
                            Bidi = ci.Name
                        });

                        // notify table
                        documentStyle.Tables.BeginTag(en.CurrentTag, new TableJustification()
                        {
                            Val = TableRowAlignmentValues.Right
                        });
                    }

                    containerStyleAttributes.Add(new ParagraphMarkRunProperties(lang));
                    containerStyleAttributes.Add(new BiDi()
                    {
                        Val = OnOffValue.FromBoolean(rtl)
                    });
                }
                catch (ArgumentException exc)
                {
                    // lang not valid, ignore it
                    if (Logging.On)
                    {
                        Logging.PrintError($"lang attribute {attrValue} not recognized: " + exc.Message, exc);
                    }
                }
            }


            attrValue = en.StyleAttributes["text-align"];
            if (attrValue != null && en.CurrentTag != "<font>")
            {
                JustificationValues?align = Converter.ToParagraphAlign(attrValue);
                if (align.HasValue)
                {
                    containerStyleAttributes.Add(new Justification {
                        Val = align
                    });
                }
            }

            // according to w3c, dir should be used in conjonction with lang. But whatever happens, we'll apply the RTL layout
            attrValue = en.Attributes["dir"];
            if (attrValue != null)
            {
                if (attrValue.Equals("rtl", StringComparison.OrdinalIgnoreCase))
                {
                    styleAttributes.Add(new RightToLeftText());
                    containerStyleAttributes.Add(new Justification()
                    {
                        Val = JustificationValues.Right
                    });
                }
                else if (attrValue.Equals("ltr", StringComparison.OrdinalIgnoreCase))
                {
                    containerStyleAttributes.Add(new Justification()
                    {
                        Val = JustificationValues.Left
                    });
                }
            }

            // <span> and <font> are considered as semi-container attribute. When converted to OpenXml, there are Runs but not Paragraphs
            if (en.CurrentTag == "<p>" || en.CurrentTag == "<div>" || en.CurrentTag == "<pre>")
            {
                var border = en.StyleAttributes.GetAsBorder("border");
                if (!border.IsEmpty)
                {
                    ParagraphBorders borders = new ParagraphBorders();
                    if (border.Top.IsValid)
                    {
                        borders.Append(
                            new TopBorder()
                        {
                            Val = border.Top.Style, Color = border.Top.Color.ToHexString(), Size = (uint)border.Top.Width.ValueInPx * 4, Space = 1U
                        });
                    }
                    if (border.Left.IsValid)
                    {
                        borders.Append(
                            new LeftBorder()
                        {
                            Val = border.Left.Style, Color = border.Left.Color.ToHexString(), Size = (uint)border.Left.Width.ValueInPx * 4, Space = 1U
                        });
                    }
                    if (border.Bottom.IsValid)
                    {
                        borders.Append(
                            new BottomBorder()
                        {
                            Val = border.Bottom.Style, Color = border.Bottom.Color.ToHexString(), Size = (uint)border.Bottom.Width.ValueInPx * 4, Space = 1U
                        });
                    }
                    if (border.Right.IsValid)
                    {
                        borders.Append(
                            new RightBorder()
                        {
                            Val = border.Right.Style, Color = border.Right.Color.ToHexString(), Size = (uint)border.Right.Width.ValueInPx * 4, Space = 1U
                        });
                    }

                    containerStyleAttributes.Add(borders);
                    newParagraph = true;
                }
            }
            else if (en.CurrentTag == "<span>" || en.CurrentTag == "<font>")
            {
                // OpenXml limits the border to 4-side of the same color and style.
                SideBorder border = en.StyleAttributes.GetAsSideBorder("border");
                if (border.IsValid)
                {
                    styleAttributes.Add(new DocumentFormat.OpenXml.Wordprocessing.Border()
                    {
                        Val   = border.Style,
                        Color = border.Color.ToHexString(),
                        Size  = (uint)border.Width.ValueInPx * 4,
                        Space = 1U
                    });
                }
            }

            String[] classes = en.Attributes.GetAsClass();
            if (classes != null)
            {
                for (int i = 0; i < classes.Length; i++)
                {
                    string className = documentStyle.GetStyle(classes[i], StyleValues.Paragraph, ignoreCase: true);
                    if (className != null)
                    {
                        containerStyleAttributes.Add(new ParagraphStyleId()
                        {
                            Val = className
                        });
                        newParagraph = true;
                        break;
                    }
                }
            }

            Margin      margin      = en.StyleAttributes.GetAsMargin("margin");
            Indentation indentation = null;
            if (!margin.IsEmpty)
            {
                if (margin.Top.IsFixed || margin.Bottom.IsFixed)
                {
                    SpacingBetweenLines spacing = new SpacingBetweenLines();
                    if (margin.Top.IsFixed)
                    {
                        spacing.Before = margin.Top.ValueInDxa.ToString(CultureInfo.InvariantCulture);
                    }
                    if (margin.Bottom.IsFixed)
                    {
                        spacing.After = margin.Bottom.ValueInDxa.ToString(CultureInfo.InvariantCulture);
                    }
                    containerStyleAttributes.Add(spacing);
                }
                if (margin.Left.IsFixed || margin.Right.IsFixed)
                {
                    indentation = new Indentation();
                    if (margin.Left.IsFixed)
                    {
                        indentation.Left = margin.Left.ValueInDxa.ToString(CultureInfo.InvariantCulture);
                    }
                    if (margin.Right.IsFixed)
                    {
                        indentation.Right = margin.Right.ValueInDxa.ToString(CultureInfo.InvariantCulture);
                    }
                    containerStyleAttributes.Add(indentation);
                }
            }

            // implemented by giorand (feature #13787)
            Unit textIndent = en.StyleAttributes.GetAsUnit("text-indent");
            if (textIndent.IsValid && (en.CurrentTag == "<p>" || en.CurrentTag == "<div>"))
            {
                if (indentation == null)
                {
                    indentation = new Indentation();
                }
                indentation.FirstLine = textIndent.ValueInDxa.ToString(CultureInfo.InvariantCulture);
                containerStyleAttributes.Add(indentation);
            }

            this.BeginTag(en.CurrentTag, containerStyleAttributes);

            // Process general run styles
            documentStyle.Runs.ProcessCommonAttributes(en, styleAttributes);

            return(newParagraph);
        }
示例#10
0
        /// <summary>
        /// Move inside the current tag related to table (td, thead, tr, ...) and converts some common
        /// attributes to their OpenXml equivalence.
        /// </summary>
        /// <param name="en">The Html enumerator positionned on a <i>table (or related)</i> tag.</param>
        /// <param name="runStyleAttributes">The collection of attributes where to store new discovered attributes.</param>
        public void ProcessCommonAttributes(HtmlEnumerator en, IList <OpenXmlElement> runStyleAttributes)
        {
            List <OpenXmlElement> containerStyleAttributes = new List <OpenXmlElement>();

            var colorValue = en.StyleAttributes.GetAsColor("background-color");

            if (colorValue.IsEmpty)            //We do a first try, if it's empty the background-color, I try it with background
            {
                colorValue = en.StyleAttributes.GetAsColor("background");
            }

            //
            // "background-color" is also handled by RunStyleCollection which duplicate this attribute (bug #13212).
            // Also apply on <th> (issue #20).
            // As on 05 Jan 2018, the duplication was due to the wrong argument passed during the td/th processing.
            // It was the runStyle and not the containerStyle that was provided. The code has been removed as no more useful
            if (colorValue.IsEmpty)
            {
                colorValue = en.Attributes.GetAsColor("bgcolor");
            }
            if (!colorValue.IsEmpty)
            {
                containerStyleAttributes.Add(
                    new Shading()
                {
                    Val = ShadingPatternValues.Clear, Color = "auto", Fill = colorValue.ToHexString()
                });
            }

            var htmlAlign = en.StyleAttributes["vertical-align"];

            if (htmlAlign == null)
            {
                htmlAlign = en.Attributes["valign"];
            }
            if (htmlAlign != null)
            {
                TableVerticalAlignmentValues?valign = Converter.ToVAlign(htmlAlign);
                if (valign.HasValue)
                {
                    containerStyleAttributes.Add(new TableCellVerticalAlignment()
                    {
                        Val = valign
                    });
                }
            }

            htmlAlign = en.StyleAttributes["text-align"];
            if (htmlAlign == null)
            {
                htmlAlign = en.Attributes["align"];
            }
            if (htmlAlign != null)
            {
                JustificationValues?halign = Converter.ToParagraphAlign(htmlAlign);
                if (halign.HasValue)
                {
                    this.BeginTagForParagraph(en.CurrentTag, new KeepNext(), new Justification {
                        Val = halign
                    });
                }
            }

            // implemented by ddforge
            String[] classes = en.Attributes.GetAsClass();
            if (classes != null)
            {
                for (int i = 0; i < classes.Length; i++)
                {
                    string className = documentStyle.GetStyle(classes[i], StyleValues.Table, ignoreCase: true);
                    if (className != null)                     // only one Style can be applied in OpenXml and dealing with inheritance is out of scope
                    {
                        containerStyleAttributes.Add(new RunStyle()
                        {
                            Val = className
                        });
                        break;
                    }
                }
            }

            this.BeginTag(en.CurrentTag, containerStyleAttributes);

            // Process general run styles
            documentStyle.Runs.ProcessCommonAttributes(en, runStyleAttributes);
        }