Пример #1
0
        private void ProcessHtmlChunks(HtmlEnumerator en, String endTag)
        {
            while (en.MoveUntilMatch(endTag))
            {
                if (en.IsCurrentHtmlTag)
                {
                    Action <HtmlEnumerator> action;
                    if (knownTags.TryGetValue(en.CurrentTag, out action))
                    {
                        if (Logging.On)
                        {
                            Logging.PrintVerbose(en.Current);
                        }
                        action(en);
                    }

                    // else unknown or not yet implemented - we ignore
                }
                else
                {
                    Run run = new Run(
                        new Text(HttpUtility.HtmlDecode(en.Current))
                    {
                        Space = SpaceProcessingModeValues.Preserve
                    }
                        );
                    // apply the previously discovered style
                    htmlStyles.Runs.ApplyTags(run);
                    elements.Add(run);
                }
            }
        }
Пример #2
0
 /// <summary>
 /// Save the actual list and restart with a new one.
 /// Continue to process until we found endTag.
 /// </summary>
 private void AlternateProcessHtmlChunks(HtmlEnumerator en, string endTag)
 {
     if (elements.Count > 0)
     {
         CompleteCurrentParagraph();
     }
     ProcessHtmlChunks(en, endTag);
 }
Пример #3
0
        /// <summary>
        /// There is a few attributes shared by a large number of tags. This method will check them for a limited
        /// number of tags (&lt;p&gt;, &lt;pre&gt;, &lt;div&gt;, &lt;span&gt; and &lt;body&gt;).
        /// </summary>
        /// <returns>Returns true if the processing of this tag should generate a new paragraph.</returns>
        private bool ProcessContainerAttributes(HtmlEnumerator en, IList <OpenXmlElement> styleAttributes)
        {
            bool newParagraph = false;

            // Not applicable to a table : page break
            if (!tables.HasContext || en.CurrentTag == "<pre>")
            {
                String attrValue = en.StyleAttributes["page-break-after"];
                if (attrValue == "always")
                {
                    paragraphs.Add(new Paragraph(
                                       new Run(
                                           new Break()
                    {
                        Type = BreakValues.Page
                    })));
                }

                attrValue = en.StyleAttributes["page-break-before"];
                if (attrValue == "always")
                {
                    elements.Add(
                        new Run(
                            new Break()
                    {
                        Type = BreakValues.Page
                    })
                        );
                    elements.Add(new Run(
                                     new LastRenderedPageBreak())
                                 );
                }
            }

            // support left and right padding
            var padding = en.StyleAttributes.GetAsMargin("padding");

            if (!padding.IsEmpty && (padding.Left.IsFixed || padding.Right.IsFixed))
            {
                Indentation indentation = new Indentation();
                if (padding.Left.Value > 0)
                {
                    indentation.Left = padding.Left.ValueInDxa.ToString(CultureInfo.InvariantCulture);
                }
                if (padding.Right.Value > 0)
                {
                    indentation.Right = padding.Right.ValueInDxa.ToString(CultureInfo.InvariantCulture);
                }

                currentParagraph.InsertInProperties(prop => prop.Indentation = indentation);
            }

            newParagraph |= htmlStyles.Paragraph.ProcessCommonAttributes(en, styleAttributes);
            return(newParagraph);
        }
Пример #4
0
        /// <summary>
        /// Start the parse processing.
        /// </summary>
        /// <returns>Returns a list of parsed paragraph.</returns>
        public IList <OpenXmlCompositeElement> Parse(String html)
        {
            if (String.IsNullOrEmpty(html))
            {
                return(new Paragraph[0]);
            }

            // ensure a body exists to avoid any errors when trying to access it
            if (mainPart.Document == null)
            {
                new Document(new Body()).Save(mainPart);
            }
            else if (mainPart.Document.Body == null)
            {
                mainPart.Document.Body = new Body();
            }

            // Reset:
            elements   = new List <OpenXmlElement>();
            paragraphs = new List <OpenXmlCompositeElement>();
            tables     = new TableContext();
            htmlStyles.Runs.Reset();
            currentParagraph = null;

            // Start a new processing
            paragraphs.Add(currentParagraph = htmlStyles.Paragraph.NewParagraph());
            if (htmlStyles.DefaultParagraphStyle != null)
            {
                currentParagraph.ParagraphProperties = new ParagraphProperties {
                    ParagraphStyleId = new ParagraphStyleId {
                        Val = htmlStyles.DefaultParagraphStyle
                    }
                };
            }

            HtmlEnumerator en = new HtmlEnumerator(html);

            ProcessHtmlChunks(en, null);

            if (elements.Count > 0)
            {
                this.currentParagraph.Append(elements);
            }

            // As the Parse method is public, to avoid changing the type of the return value, I use this proxy
            // that will allow me to call the recursive method RemoveEmptyParagraphs with no major changes, impacting the client.
            RemoveEmptyParagraphs();

            return(paragraphs);
        }