IsInlineElement() static private method

returns true if the xmlElementName represents an inline formatting element
static private IsInlineElement ( string xmlElementName ) : bool
xmlElementName string
return bool
            // Opens structurig element such as Div or Table etc.
            private void OpenStructuringElement(XmlElement htmlElement)
            {
                // Close all pending inline elements
                // All block elements are considered as delimiters for inline elements
                // which forces all inline elements to be closed and re-opened in the following
                // structural element (if any).
                // By doing that we guarantee that all inline elements appear only within most nested blocks
                if (HtmlSchema.IsBlockElement(htmlElement.LocalName))
                {
                    while (_openedElements.Count > 0 && HtmlSchema.IsInlineElement(_openedElements.Peek().LocalName))
                    {
                        XmlElement htmlInlineElement = _openedElements.Pop();
                        InvariantAssert(_openedElements.Count > 0, "OpenStructuringElement: stack of opened elements cannot become empty here");

                        _pendingInlineElements.Push(CreateElementCopy(htmlInlineElement));
                    }
                }

                // Add this block element to its parent
                if (_openedElements.Count > 0)
                {
                    XmlElement htmlParent = _openedElements.Peek();

                    // Check some known block elements for auto-closing (LI and P)
                    if (HtmlSchema.ClosesOnNextElementStart(htmlParent.LocalName, htmlElement.LocalName))
                    {
                        _openedElements.Pop();
                        htmlParent = _openedElements.Count > 0 ? _openedElements.Peek() : null;
                    }

                    if (htmlParent != null)
                    {
                        // NOTE:
                        // Actually we never expect null - it would mean two top-level P or LI (without a parent).
                        // In such weird case we will loose all paragraphs except the first one...
                        htmlParent.AppendChild(htmlElement);
                    }
                }

                // Push it onto a stack
                _openedElements.Push(htmlElement);
            }
            private void CloseElement(string htmlElementName)
            {
                // Check if the element is opened and already added to the parent
                InvariantAssert(_openedElements.Count > 0, "CloseElement: Stack of opened elements cannot be empty, as we have at least one artificial root element");

                // Check if the element is opened and still waiting to be added to the parent
                if (_pendingInlineElements.Count > 0 && _pendingInlineElements.Peek().LocalName == htmlElementName)
                {
                    // Closing an empty inline element.
                    // Note that HtmlConverter will skip empty inlines, but for completeness we keep them here on parser level.
                    XmlElement htmlInlineElement = _pendingInlineElements.Pop();
                    InvariantAssert(_openedElements.Count > 0, "CloseElement: Stack of opened elements cannot be empty, as we have at least one artificial root element");
                    XmlElement htmlParent = _openedElements.Peek();
                    htmlParent.AppendChild(htmlInlineElement);
                    return;
                }
                else if (IsElementOpened(htmlElementName))
                {
                    while (_openedElements.Count > 1)                     // we never pop the last element - the artificial root
                    {
                        // Close all unbalanced elements.
                        XmlElement htmlOpenedElement = _openedElements.Pop();

                        if (htmlOpenedElement.LocalName == htmlElementName)
                        {
                            return;
                        }

                        if (HtmlSchema.IsInlineElement(htmlOpenedElement.LocalName))
                        {
                            // Unbalances Inlines will be transfered to the next element content
                            _pendingInlineElements.Push(CreateElementCopy(htmlOpenedElement));
                        }
                    }
                }

                // If element was not opened, we simply ignore the unbalanced closing tag
                return;
            }
            /// <summary>
            /// Parses the stream of html tokens starting
            /// from the name of top-level element.
            /// Returns XmlElement representing the top-level
            /// html element
            /// </summary>
            private XmlElement ParseHtmlContent()
            {
                // Create artificial root elelemt to be able to group multiple top-level elements
                // We create "html" element which may be a duplicate of real HTML element, which is ok, as HtmlConverter will swallow it painlessly..
                XmlElement htmlRootElement = _document.CreateElement("html", XhtmlNamespace);

                OpenStructuringElement(htmlRootElement);

                while (_htmlLexicalAnalyzer.NextTokenType != HtmlTokenType.EOF)
                {
                    if (_htmlLexicalAnalyzer.NextTokenType == HtmlTokenType.OpeningTagStart)
                    {
                        _htmlLexicalAnalyzer.GetNextTagToken();
                        if (_htmlLexicalAnalyzer.NextTokenType == HtmlTokenType.Name)
                        {
                            string htmlElementName = _htmlLexicalAnalyzer.NextToken.ToLower();
                            _htmlLexicalAnalyzer.GetNextTagToken();

                            // Create an element
                            XmlElement htmlElement = _document.CreateElement(htmlElementName, XhtmlNamespace);

                            // Parse element attributes
                            ParseAttributes(htmlElement);

                            if (_htmlLexicalAnalyzer.NextTokenType == HtmlTokenType.EmptyTagEnd || HtmlSchema.IsEmptyElement(htmlElementName))
                            {
                                // It is an element without content (because of explicit slash or based on implicit knowledge aboout html)
                                AddEmptyElement(htmlElement);
                            }
                            else if (HtmlSchema.IsInlineElement(htmlElementName))
                            {
                                // Elements known as formatting are pushed to some special
                                // pending stack, which allows them to be transferred
                                // over block tags - by doing this we convert
                                // overlapping tags into normal heirarchical element structure.
                                OpenInlineElement(htmlElement);
                            }
                            else if (HtmlSchema.IsBlockElement(htmlElementName) || HtmlSchema.IsKnownOpenableElement(htmlElementName))
                            {
                                // This includes no-scope elements
                                OpenStructuringElement(htmlElement);
                            }
                            else
                            {
                                // Do nothing. Skip the whole opening tag.
                                // Ignoring all unknown elements on their start tags.
                                // Thus we will ignore them on closinng tag as well.
                                // Anyway we don't know what to do withthem on conversion to Xaml.
                            }
                        }
                        else
                        {
                            // Note that the token following opening angle bracket must be a name - lexical analyzer must guarantee that.
                            // Otherwise - we skip the angle bracket and continue parsing the content as if it is just text.
                            //  Add the following asserion here, right? or output "<" as a text run instead?:
                            // InvariantAssert(false, "Angle bracket without a following name is not expected");
                        }
                    }
                    else if (_htmlLexicalAnalyzer.NextTokenType == HtmlTokenType.ClosingTagStart)
                    {
                        _htmlLexicalAnalyzer.GetNextTagToken();
                        if (_htmlLexicalAnalyzer.NextTokenType == HtmlTokenType.Name)
                        {
                            string htmlElementName = _htmlLexicalAnalyzer.NextToken.ToLower();

                            // Skip the name token. Assume that the following token is end of tag,
                            // but do not check this. If it is not true, we simply ignore one token
                            // - this is our recovery from bad xml in this case.
                            _htmlLexicalAnalyzer.GetNextTagToken();

                            CloseElement(htmlElementName);
                        }
                    }
                    else if (_htmlLexicalAnalyzer.NextTokenType == HtmlTokenType.Text)
                    {
                        AddTextContent(_htmlLexicalAnalyzer.NextToken);
                    }
                    else if (_htmlLexicalAnalyzer.NextTokenType == HtmlTokenType.Comment)
                    {
                        AddComment(_htmlLexicalAnalyzer.NextToken);
                    }

                    _htmlLexicalAnalyzer.GetNextContentToken();
                }

                // Get rid of the artificial root element
                if (htmlRootElement.FirstChild is XmlElement &&
                    htmlRootElement.FirstChild == htmlRootElement.LastChild &&
                    htmlRootElement.FirstChild.LocalName.ToLower() == "html")
                {
                    htmlRootElement = (XmlElement)htmlRootElement.FirstChild;
                }

                return(htmlRootElement);
            }
Exemplo n.º 4
0
        /// <summary>
        ///     Parses the stream of html tokens starting
        ///     from the name of top-level element.
        ///     Returns XmlElement representing the top-level
        ///     html element
        /// </summary>
        private XmlElement ParseHtmlContent()
        {
            // Create artificial root elelemt to be able to group multiple top-level elements
            // We create "html" element which may be a duplicate of real HTML element, which is ok, as HtmlConverter will swallow it painlessly..
            var htmlRootElement = _document.CreateElement("html", XhtmlNamespace);

            OpenStructuringElement(htmlRootElement);

            while (_htmlLexicalAnalyzer.NextTokenType != HtmlTokenType.Eof)
            {
                if (_htmlLexicalAnalyzer.NextTokenType == HtmlTokenType.OpeningTagStart)
                {
                    _htmlLexicalAnalyzer.GetNextTagToken();
                    if (_htmlLexicalAnalyzer.NextTokenType == HtmlTokenType.Name)
                    {
                        var htmlElementName = _htmlLexicalAnalyzer.NextToken.ToLower();
                        _htmlLexicalAnalyzer.GetNextTagToken();

                        // Create an element
                        var htmlElement = _document.CreateElement(htmlElementName, XhtmlNamespace);

                        // Parse element attributes
                        ParseAttributes(htmlElement);

                        if (_htmlLexicalAnalyzer.NextTokenType == HtmlTokenType.EmptyTagEnd ||
                            HtmlSchema.IsEmptyElement(htmlElementName))
                        {
                            // It is an element without content (because of explicit slash or based on implicit knowledge aboout html)
                            AddEmptyElement(htmlElement);
                        }
                        else if (HtmlSchema.IsInlineElement(htmlElementName))
                        {
                            // Elements known as formatting are pushed to some special
                            // pending stack, which allows them to be transferred
                            // over block tags - by doing this we convert
                            // overlapping tags into normal heirarchical element structure.
                            OpenInlineElement(htmlElement);
                        }
                        else if (HtmlSchema.IsBlockElement(htmlElementName) ||
                                 HtmlSchema.IsKnownOpenableElement(htmlElementName))
                        {
                            // This includes no-scope elements
                            OpenStructuringElement(htmlElement);
                        }
                    }
                }
                else if (_htmlLexicalAnalyzer.NextTokenType == HtmlTokenType.ClosingTagStart)
                {
                    _htmlLexicalAnalyzer.GetNextTagToken();
                    if (_htmlLexicalAnalyzer.NextTokenType == HtmlTokenType.Name)
                    {
                        var htmlElementName = _htmlLexicalAnalyzer.NextToken.ToLower();

                        // Skip the name token. Assume that the following token is end of tag,
                        // but do not check this. If it is not true, we simply ignore one token
                        // - this is our recovery from bad xml in this case.
                        _htmlLexicalAnalyzer.GetNextTagToken();

                        CloseElement(htmlElementName);
                    }
                }
                else if (_htmlLexicalAnalyzer.NextTokenType == HtmlTokenType.Text)
                {
                    AddTextContent(_htmlLexicalAnalyzer.NextToken);
                }
                else if (_htmlLexicalAnalyzer.NextTokenType == HtmlTokenType.Comment)
                {
                    AddComment(_htmlLexicalAnalyzer.NextToken);
                }

                _htmlLexicalAnalyzer.GetNextContentToken();
            }

            // Get rid of the artificial root element
            if (htmlRootElement.FirstChild is XmlElement &&
                htmlRootElement.FirstChild == htmlRootElement.LastChild &&
                htmlRootElement.FirstChild.LocalName.ToLower() == "html")
            {
                htmlRootElement = (XmlElement)htmlRootElement.FirstChild;
            }

            return(htmlRootElement);
        }