Provides a more robust version of various IO methods. The original intent of this class is to attempt to mitigate issues where we attempt IO but the file is locked by another application. Our theory is that some anti-virus software locks files while it scans them. There is a similar class in SIL.IO, but that handles more generic calls which would not require additional dependencies.
        /// <summary></summary>
        /// <param name="content"></param>
        /// <param name="includeXmlDeclaration"></param>
        /// <exception>Throws if there are parsing errors</exception>
        /// <returns></returns>
        public static XmlDocument GetXmlDomFromHtml(string content, bool includeXmlDeclaration = false)
        {
            var dom = new XmlDocument();

            content = AddFillerToKeepTidyFromRemovingEmptyElements(content);

            //in BL-2250, we found that in previous versions, this method would turn, for example, "<u> </u>" REMOVEWHITESPACE.
            //That is fixed now, but this is needed to give to clean up existing books.
            content = content.Replace(@"REMOVEWHITESPACE", "");

            // tidy likes to insert newlines before <b>, <u>, <i>, and these other elements and convert any existing whitespace
            // there to a space.  (span was found by pursuing BL-7558)
            content = new Regex(@"<([ubi]|em|strong|sup|sub|span[^>]*)>").Replace(content, "REMOVEWHITESPACE<$1>");

            // fix for <br></br> tag doubling
            content = content.Replace("<br></br>", "<br />");

            // fix for > and similar in <style> element protected by CDATA.
            // At present we only need to account for this occurring once.
            // See Browser.SaveCustomizedCssRules.
            var          startOfCdata     = content.IndexOf(Browser.CdataPrefix, StringComparison.InvariantCulture);
            const string restoreCdataHere = "/****RestoreCDATAHere*****/";
            var          endOfCdata       = content.IndexOf(Browser.CdataSuffix, StringComparison.InvariantCulture);
            var          savedCdata       = "";

            if (startOfCdata >= 0 && endOfCdata >= startOfCdata)
            {
                endOfCdata += Browser.CdataSuffix.Length;
                savedCdata  = content.Substring(startOfCdata, endOfCdata - startOfCdata);
                content     = content.Substring(0, startOfCdata) + restoreCdataHere + content.Substring(endOfCdata, content.Length - endOfCdata);
            }

            var removedSvgs = new List <string>();

            content = RemoveSvgs(content, removedSvgs);


            //using (var temp = new TempFile())
            var temp = new TempFile();

            {
                RobustFile.WriteAllText(temp.Path, content, Encoding.UTF8);
                using (var tidy = RobustIO.DocumentFromFile(temp.Path))
                {
                    tidy.ShowWarnings            = false;
                    tidy.Quiet                   = true;
                    tidy.WrapAt                  = 0;    // prevents textarea wrapping.
                    tidy.AddTidyMetaElement      = false;
                    tidy.OutputXml               = true;
                    tidy.CharacterEncoding       = EncodingType.Utf8;
                    tidy.InputCharacterEncoding  = EncodingType.Utf8;
                    tidy.OutputCharacterEncoding = EncodingType.Utf8;
                    tidy.DocType                 = DocTypeMode.Omit;     //when it supports html5, then we will let it out it
                    //maybe try this? tidy.Markup = true;

                    tidy.AddXmlDeclaration = includeXmlDeclaration;

                    //NB: this does not prevent tidy from deleting <span data-libray='somethingImportant'></span>
                    tidy.MergeSpans          = AutoBool.No;
                    tidy.DropEmptyParagraphs = false;
                    tidy.MergeDivs           = AutoBool.No;


                    var errors = tidy.CleanAndRepair();
                    if (!string.IsNullOrEmpty(errors))
                    {
                        throw new ApplicationException(errors);
                    }
                    var newContents = tidy.Save();
                    try
                    {
                        newContents = RestoreSvgs(newContents, removedSvgs);
                        newContents = RemoveFillerInEmptyElements(newContents);

                        newContents = newContents.Replace("&nbsp;", "&#160;");
                        //REVIEW: 1) are there others? &amp; and such are fine.  2) shoul we to convert back to &nbsp; on save?

                        // The regex here is mainly for the \s* as a convenient way to remove whatever whitespace TIDY
                        // has inserted. It's a fringe benefit that we can use the[biu]|... to deal with all these elements in one replace.
                        newContents = Regex.Replace(newContents, @"REMOVEWHITESPACE\s*<([biu]|em|strong|sup|sub|span[^>]*)>", "<$1>");

                        //In BL2250, we still had REMOVEWHITESPACE sticking around sometimes. The way we reproduced it was
                        //with <u> </u>. That is, we started with
                        //"REMOVEWHITESPACE <u> </u>", then libtidy (properly) removed the <u></u>, leaving us with only
                        //"REMOVEWHITESPACE".
                        newContents = Regex.Replace(newContents, @"REMOVEWHITESPACE", "");

                        // remove blank lines at the end of style blocks
                        newContents = Regex.Replace(newContents, @"\s+<\/style>", "</style>");

                        // remove <br> elements immediately preceding </p> close tag (BL-2557)
                        // These are apparently inserted by ckeditor as far as we can tell.  They don't show up on
                        // fields that have never had a ckeditor activated, and always show up on fields that have
                        // received focus and activated an inline ckeditor.  The ideal ckeditor use case appears
                        // to be for data entry as part of a web page that get stored separately, with the data
                        // obtained something like the following in javascript:
                        //        ckedit.on('blur', function(evt) {
                        //            var editor = evt['editor'];
                        //            var data = editor.getData();
                        //            <at this point, the data looks okay, with any <br> element before the </p> tag.>
                        //            <store the data somewhere: the following lines have no effect, and may be silly.>
                        //            var div = mapCkeditDiv[editor.id];
                        //            div.innerHTML = data;
                        //        });
                        // Examining the initial value of div.innerHTML shows the unwanted <br> element, but it is
                        // not in the data returned by editor.getData().  Since assigning to div.innerHTML doesn't
                        // affect what gets written to the file, this hack was implemented instead.
                        newContents = Regex.Replace(newContents, @"(<br></br>|<br ?/>)[\r\n]*</p>", "</p>");

                        newContents = newContents.Replace(restoreCdataHere, savedCdata);

                        // Don't let spaces between <strong>, <em>, or <u> elements be removed. (BL-2484)
                        dom.PreserveWhitespace = true;
                        dom.LoadXml(newContents);
                    }
                    catch (Exception e)
                    {
                        var exceptionWithHtmlContents = new Exception(string.Format("{0}{2}{2}{1}",
                                                                                    e.Message, newContents, Environment.NewLine));
                        throw exceptionWithHtmlContents;
                    }
                }
            }
            try
            {
                //It's a mystery but http://jira.palaso.org/issues/browse/BL-46 was reported by several people on Win XP, even though a look at html tidy dispose indicates that it does dispose (and thus close) the stream.
                // Therefore, I'm moving the dispose to an explict call so that I can catch the error and ignore it, leaving an extra file in Temp.

                temp.Dispose();
                //enhance... could make a version of this which collects up any failed deletes and re-attempts them with each call to this
            }
            catch (Exception error)
            {
                //swallow
                Debug.Fail("Repro of http://jira.palaso.org/issues/browse/BL-46 ");
            }

            //this is a hack... each time we write the content, we add a new <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
            //so for now, we remove it when we read it in. It'll get added again when we write it out
            RemoveAllContentTypesMetas(dom);

            return(dom);
        }