GetXmlDomFromHtml() public static method

Throws if there are parsing errors
public static GetXmlDomFromHtml ( string content, bool includeXmlDeclaration = false ) : XmlDocument
content string
includeXmlDeclaration bool
return System.Xml.XmlDocument
Example #1
0
        /// <summary>
        /// What's going on here: the browser is just /editting displaying a copy of one page of the document.
        /// So we need to copy any changes back to the real DOM.
        /// </summary>
        private void LoadPageDomFromBrowser()
        {
            if (_pageDom == null)
            {
                return;
            }

#if DEBUG
            if (_pageDom.SelectNodes("//textarea").Count > 0)
            {
                Debug.Fail("Oh, a chance to test bluring textarea's!");
            }
#endif
            //as of august 2012 textareas only occur in the Calendar
            //		if (_pageDom.SelectNodes("//textarea").Count >0)
            {
                //This approach was to force an onblur so that we can get at the actual user-edited value.
                //This caused problems, with Bloom itself (the Shell) not knowing that it is active.
                //_browser.WebBrowserFocus.Deactivate();
                //_browser.WebBrowserFocus.Activate();

                //now, we just do the blur directly.
                var activeElement = _browser.Window.Document.ActiveElement;
                if (activeElement != null)
                {
                    activeElement.Blur();
                }
            }

            var body = _browser.Document.GetElementsByTagName("body");
            if (body.Count == 0)                //review: this does happen... onValidating comes along, but there is no body. Assuming it is a timing issue.
            {
                return;
            }

            var         content = body[0].InnerHtml;
            XmlDocument dom;

            //todo: deal with exception that can come out of this
            try
            {
                dom = XmlHtmlConverter.GetXmlDomFromHtml(content, false);
                var bodyDom = dom.SelectSingleNode("//body");

                if (_pageDom == null)
                {
                    return;
                }

                var destinationDomPage = _pageDom.SelectSingleNode("//body/div[contains(@class,'bloom-page')]");
                if (destinationDomPage == null)
                {
                    return;
                }
                var expectedPageId = destinationDomPage["id"];

                var browserPageId = bodyDom.SelectSingleNode("//body/div[contains(@class,'bloom-page')]");
                if (browserPageId == null)
                {
                    return;                    //why? but I've seen it happen
                }
                var thisPageId = browserPageId["id"];
                if (expectedPageId != thisPageId)
                {
                    Palaso.Reporting.ErrorReport.NotifyUserOfProblem("Bloom encountered an error saving that page (unexpected page id)");
                    return;
                }
                _pageDom.GetElementsByTagName("body")[0].InnerXml = bodyDom.InnerXml;

                var customStyleSheet = _browser.Document.StyleSheets.Where(s =>
                {
                    var idNode = s.OwnerNode.Attributes["id"];
                    if (idNode == null)
                    {
                        return(false);
                    }
                    return(idNode.NodeValue == "customBookStyles");
                }).FirstOrDefault();

                if (customStyleSheet != null)
                {
                    /* why are we bothering to walk through the rules instead of just copying the html of the style tag? Because that doesn't
                     * actually get updated when the javascript edits the stylesheets of the page. Well, the <style> tag gets created, but
                     * rules don't show up inside of it. So
                     * this won't work: _pageDom.GetElementsByTagName("head")[0].InnerText = customStyleSheet.OwnerNode.OuterHtml;
                     */
                    var styles = new StringBuilder();
                    styles.AppendLine("<style id='customStyles' type='text/css'>");
                    foreach (var cssRule in customStyleSheet.CssRules)
                    {
                        styles.AppendLine(cssRule.CssText);
                    }
                    styles.AppendLine("</style>");
                    Debug.WriteLine("*CustomStylesheet in browser:" + styles);
                    _pageDom.GetElementsByTagName("head")[0].InnerXml = styles.ToString();
                }

                //enhance: we have jscript for this: cleanup()... but running jscript in this method was leading the browser to show blank screen
//				foreach (XmlElement j in _pageDom.SafeSelectNodes("//div[contains(@class, 'ui-tooltip')]"))
//				{
//					j.ParentNode.RemoveChild(j);
//				}
//				foreach (XmlAttribute j in _pageDom.SafeSelectNodes("//@ariasecondary-describedby | //@aria-describedby"))
//				{
//					j.OwnerElement.RemoveAttributeNode(j);
//				}
            }
            catch (Exception e)
            {
                Palaso.Reporting.ErrorReport.NotifyUserOfProblem(e, "Sorry, Bloom choked on something on this page (invalid incoming html).\r\n\r\n+{0}", e);
                return;
            }



            try
            {
                XmlHtmlConverter.ThrowIfHtmlHasErrors(_pageDom.OuterXml);
            }
            catch (Exception e)
            {
                var exceptionWithHtmlContents = new Exception(content);
                Palaso.Reporting.ErrorReport.NotifyUserOfProblem(e, "Sorry, Bloom choked on something on this page (validating page).\r\n\r\n+{0}", e.Message);
            }
        }
Example #2
0
        /// <summary>
        /// What's going on here: the browser is just /editting displaying a copy of one page of the document.
        /// So we need to copy any changes back to the real DOM.
        /// </summary>
        private void LoadPageDomFromBrowser()
        {
            Debug.Assert(!InvokeRequired);
            if (_pageEditDom == null)
            {
                return;
            }

            var contentDocument = _browser.Document;

            if (_pageEditDom != _rootDom)
            {
                // Assume _editDom corresponds to a frame called 'page' in the root. This may eventually need to be more configurable.
                if (_browser.Window == null || _browser.Window.Document == null)
                {
                    return;
                }
                var frameElement = _browser.Window.Document.GetElementById("page") as GeckoIFrameElement;
                if (frameElement == null)
                {
                    return;
                }
                contentDocument = frameElement.ContentDocument;
            }
            if (contentDocument == null)
            {
                return;                 // can this happen?
            }
            // As of august 2012 textareas only occur in the Calendar
            if (_pageEditDom.SelectNodes("//textarea").Count > 0)
            {
                //This approach was to force an onblur so that we can get at the actual user-edited value.
                //This caused problems, with Bloom itself (the Shell) not knowing that it is active.
                //_browser.WebBrowserFocus.Deactivate();
                //_browser.WebBrowserFocus.Activate();

                // Now, we just do the blur directly.
                var activeElement = contentDocument.ActiveElement;
                if (activeElement != null)
                {
                    activeElement.Blur();
                }
            }

            var body = contentDocument.GetElementsByTagName("body");

            if (body.Length == 0)               //review: this does happen... onValidating comes along, but there is no body. Assuming it is a timing issue.
            {
                return;
            }

            var         content = body[0].InnerHtml;
            XmlDocument dom;

            //todo: deal with exception that can come out of this
            try
            {
                dom = XmlHtmlConverter.GetXmlDomFromHtml(content, false);
                var bodyDom = dom.SelectSingleNode("//body");

                if (_pageEditDom == null)
                {
                    return;
                }

                var destinationDomPage = _pageEditDom.SelectSingleNode("//body//div[contains(@class,'bloom-page')]");
                if (destinationDomPage == null)
                {
                    return;
                }
                var expectedPageId = destinationDomPage["id"];

                var browserPageId = bodyDom.SelectSingleNode("//body//div[contains(@class,'bloom-page')]");
                if (browserPageId == null)
                {
                    return;                    //why? but I've seen it happen
                }
                var thisPageId = browserPageId["id"];
                if (expectedPageId != thisPageId)
                {
                    Palaso.Reporting.ErrorReport.NotifyUserOfProblem("Bloom encountered an error saving that page (unexpected page id)");
                    return;
                }
                _pageEditDom.GetElementsByTagName("body")[0].InnerXml = bodyDom.InnerXml;

                var userModifiedStyleSheet = contentDocument.StyleSheets.FirstOrDefault(s =>
                {
                    // workaround for bug #40 (https://bitbucket.org/geckofx/geckofx-29.0/issue/40/xpath-error-hresult-0x805b0034)
                    // var titleNode = s.OwnerNode.EvaluateXPath("@title").GetSingleNodeValue();
                    var titleNode = s.OwnerNode.EvaluateXPath("@title").GetNodes().FirstOrDefault();
                    if (titleNode == null)
                    {
                        return(false);
                    }
                    return(titleNode.NodeValue == "userModifiedStyles");
                });

                if (userModifiedStyleSheet != null)
                {
                    SaveCustomizedCssRules(userModifiedStyleSheet);
                }

                //enhance: we have jscript for this: cleanup()... but running jscript in this method was leading the browser to show blank screen
//				foreach (XmlElement j in _editDom.SafeSelectNodes("//div[contains(@class, 'ui-tooltip')]"))
//				{
//					j.ParentNode.RemoveChild(j);
//				}
//				foreach (XmlAttribute j in _editDom.SafeSelectNodes("//@ariasecondary-describedby | //@aria-describedby"))
//				{
//					j.OwnerElement.RemoveAttributeNode(j);
//				}
            }
            catch (Exception e)
            {
                Debug.Fail("Debug Mode Only: Error while trying to read changes to CSSRules. In Release, this just gets swallowed. Will now re-throw the exception.");
#if DEBUG
                throw;
#endif
            }

            try
            {
                XmlHtmlConverter.ThrowIfHtmlHasErrors(_pageEditDom.OuterXml);
            }
            catch (Exception e)
            {
                var exceptionWithHtmlContents = new Exception(content);
                ErrorReport.NotifyUserOfProblem(e,
                                                "Sorry, Bloom choked on something on this page (validating page).{1}{1}+{0}",
                                                e.Message, Environment.NewLine);
            }
        }