Пример #1
0
        public string DoTidy(string html)
        {
            Tidy.Core.Tidy        document          = new Tidy.Core.Tidy();
            TidyMessageCollection messageCollection = new TidyMessageCollection();

            document.Options.DocType         = DocType.Omit;
            document.Options.Xhtml           = true;
            document.Options.CharEncoding    = CharEncoding.Utf8;
            document.Options.LogicalEmphasis = true;

            document.Options.MakeClean     = false;
            document.Options.QuoteNbsp     = false;
            document.Options.SmartIndent   = false;
            document.Options.IndentContent = false;
            document.Options.TidyMark      = false;

            document.Options.DropFontTags   = false;
            document.Options.QuoteAmpersand = true;
            document.Options.DropEmptyParas = true;

            MemoryStream input  = new MemoryStream();
            MemoryStream output = new MemoryStream();

            byte[] array = Encoding.UTF8.GetBytes(html);
            input.Write(array, 0, array.Length);
            input.Position = 0;

            document.Parse(input, output, messageCollection);

            string tidyXhtml = Encoding.UTF8.GetString(output.ToArray(), 0, output.ToArray().Length);

            return(XElement.Parse(tidyXhtml).ToString());
        }
Пример #2
0
        private static string ParseMarkup(string markup, Tidy tidy, out TidyMessageCollection tidyMessages)
        {
            string result;

            tidyMessages = new TidyMessageCollection();
            byte[] htmlByteArray = Encoding.UTF8.GetBytes(markup);

            using (var inputStream = new MemoryStream(htmlByteArray))
            {
                using (var outputStream = new MemoryStream())
                {
                    tidy.Parse(inputStream, outputStream, tidyMessages);
                    outputStream.Position = 0;
                    using (var sr = new C1StreamReader(outputStream))
                    {
                        result = sr.ReadToEnd();
                    }
                }
            }

            if (tidyMessages.Errors > 0)
            {
                var errorMessageBuilder = new StringBuilder();
                foreach (TidyMessage message in tidyMessages)
                {
                    if (message.Level == MessageLevel.Error)
                    {
                        errorMessageBuilder.AppendLine(message.ToString());
                    }
                }
                throw new InvalidOperationException($"Failed to parse html:\n\n{errorMessageBuilder}");
            }

            return(result);
        }
Пример #3
0
        /// <summary>
        /// Cleans HTML documents or fragments into XHTML conformant markup
        /// </summary>
        /// <param name="xmlMarkup">The html to clean</param>
        /// <returns></returns>
        public static XDocument TidyXml(string xmlMarkup)
        {
            try
            {
                return(XhtmlDocument.Parse(xmlMarkup));
            }
            catch (Exception)
            {
                // take the slow road below...
            }

            byte[] xmlByteArray = Encoding.UTF8.GetBytes(xmlMarkup);

            Tidy tidy = GetXmlConfiguredTidy();

            List <string> namespacePrefixedElementNames = LocateNamespacePrefixedElementNames(xmlMarkup);

            AllowNamespacePrefixedElementNames(tidy, namespacePrefixedElementNames);
            AllowHtml5ElementNames(tidy);

            TidyMessageCollection tidyMessages = new TidyMessageCollection();
            string xml = "";

            using (MemoryStream inputStream = new MemoryStream(xmlByteArray))
            {
                using (MemoryStream outputStream = new MemoryStream())
                {
                    tidy.Parse(inputStream, outputStream, tidyMessages);
                    outputStream.Position = 0;
                    C1StreamReader sr = new C1StreamReader(outputStream);
                    xml = sr.ReadToEnd();
                }
            }

            if (tidyMessages.Errors > 0)
            {
                StringBuilder errorMessageBuilder = new StringBuilder();
                foreach (TidyMessage message in tidyMessages)
                {
                    if (message.Level == MessageLevel.Error)
                    {
                        errorMessageBuilder.AppendLine(message.ToString());
                    }
                }
                throw new InvalidOperationException(string.Format("Failed to parse html:\n\n{0}", errorMessageBuilder.ToString()));
            }

            xml = RemoveDuplicateAttributes(xml);

            return(XDocument.Parse(xml));
        }
Пример #4
0
        public static string tidy(string str)
        {
            Tidy tidy = new Tidy();
            TidyMessageCollection msg = new TidyMessageCollection();

            MemoryStream input  = new MemoryStream();
            MemoryStream output = new MemoryStream();

            tidy.Options.CharEncoding    = CharEncoding.UTF8;
            tidy.Options.DocType         = DocType.Strict;
            tidy.Options.DropFontTags    = true;
            tidy.Options.LogicalEmphasis = true;
            tidy.Options.Xhtml           = true;
            tidy.Options.MakeClean       = true;
            tidy.Options.TidyMark        = true;
            tidy.Options.TabSize         = 0;

            if (str != null)
            {
                byte[] byteArray = Encoding.UTF8.GetBytes(str);
                input.Write(byteArray, 0, byteArray.Length);
                input.Position = 0;

                tidy.Parse(input, output, msg);//粘贴的Word文档在此有出现代码丢失的情况
                string outputString = Encoding.UTF8.GetString(output.ToArray());

                int bodybegin = outputString.IndexOf("<body>");
                int bodyend   = outputString.IndexOf("</body>");
                if (bodybegin > 0 && bodyend > 0)
                {
                    int length = bodyend - bodybegin - 10;
                    if (length < 0)
                    {
                        length = 0;
                    }
                    string realoutputString = outputString.Substring(bodybegin + 8, length);
                    return(realoutputString);
                }
                else
                {
                    return("");
                }
            }
            else
            {
                return("");
            }
        }
Пример #5
0
        static string CleanHtml(string badHtmlString)
        {
            //Clean bad html using TIDY
            // http://sourceforge.net/projects/tidynet/
            Tidy         tidy   = new Tidy();
            MemoryStream input  = new MemoryStream();
            MemoryStream output = new MemoryStream();

            byte[] badHtml = Encoding.UTF8.GetBytes(badHtmlString);
            input.Write(badHtml, 0, badHtml.Length);
            input.Position = 0;
            TidyMessageCollection tidyMsg = new TidyMessageCollection();

            tidy.Parse(input, output, tidyMsg);
            return(Encoding.UTF8.GetString(output.ToArray()));
        }
        protected override void DoProcess(FieldSerializationPipelineArgs args)
        {
            Assert.ArgumentNotNull(args, "args");

            if (args.ValueSerialized != null ||
                string.IsNullOrWhiteSpace(args.ValueNormal) ||
                !supportedFieldTypeKeys.Any(k => k.Equals(args.FieldTypeKey, StringComparison.InvariantCultureIgnoreCase)))
            {
                return;
            }

            Tidy tidy = new Tidy();

            tidy.Options.DocType       = DocType.Omit;
            tidy.Options.TidyMark      = true;
            tidy.Options.IndentContent = true;

            TidyMessageCollection tmc = new TidyMessageCollection();

            using (MemoryStream input = new MemoryStream())
                using (MemoryStream output = new MemoryStream())
                {
                    byte[] byteArray = Encoding.UTF8.GetBytes(args.ValueNormal);
                    input.Write(byteArray, 0, byteArray.Length);
                    input.Position = 0;
                    tidy.Parse(input, output, tmc);

                    string html         = Encoding.UTF8.GetString(output.ToArray());
                    string bodyTag      = "<body>";
                    string bodyCloseTag = "</body>";
                    if (html.IndexOf(bodyTag) > 0)
                    {
                        html = html.Substring(html.IndexOf(bodyTag) + bodyTag.Length);
                    }
                    if (html.IndexOf(bodyCloseTag) > 0)
                    {
                        html = html.Substring(0, html.IndexOf(bodyCloseTag));
                    }
                    html = html.Trim();

                    args.ValueSerialized = string.Concat(
                        Environment.NewLine,
                        html,
                        Environment.NewLine);
                    args.FieldSerializationType = FieldSerializationType.Html;
                }
        }
Пример #7
0
        /// <summary>
        /// Indents the given html source.
        /// </summary>
        /// <param name="htmlSource">The html source.</param>
        /// <returns>A string with the new source.</returns>
        public String IndentContent(String htmlSource)
        {
            Tidy tidy = new Tidy();

            tidy.Options.IndentContent = true;
            TidyMessageCollection tmc    = new TidyMessageCollection();
            MemoryStream          input  = new MemoryStream();
            MemoryStream          output = new MemoryStream();

            byte[] byteArray = Encoding.Unicode.GetBytes(htmlSource);
            input.Write(byteArray, 0, byteArray.Length);
            input.Position = 0;
            tidy.Parse(input, output, tmc);

            htmlSource = Encoding.Unicode.GetString(output.ToArray());
            return(htmlSource);
        }
Пример #8
0
        public static string TidyHtml(string text)
        {
            var doc      = new Tidy();
            var messages = new TidyMessageCollection();
            var input    = new MemoryStream();
            var output   = new MemoryStream();

            var array = Encoding.UTF8.GetBytes(text);

            input.Write(array, 0, array.Length);
            input.Position = 0;

            /*
             * // Disabled as it causes problems handling "font" tags
             * // There are occurences when it will muck up a font tag to "fontface=...etc...
             * //doc.Options.Xhtml = true;
             * doc.Options.MakeClean = false;
             * doc.Options.DocType = DocType.Strict;
             * doc.Options.CharEncoding = CharEncoding.UTF8;
             * doc.Options.LogicalEmphasis = true;
             *
             * doc.Options.SmartIndent = true;
             * doc.Options.IndentContent = true;
             * doc.Options.TidyMark = false;
             * doc.Options.QuoteAmpersand = true;
             * doc.Options.DropFontTags = false;
             * doc.Options.DropEmptyParas = true;
             *
             * // Required to stop spaces being removed, and tabs added etc...
             * doc.Options.Spaces = 0;
             * doc.Options.WrapLen = 32000;
             */

            doc.Options.TidyMark    = false;
            doc.Options.MakeClean   = true;
            doc.Options.Word2000    = true;
            doc.Options.EncloseText = true;

            // Required to stop spaces being removed, and tabs added etc...
            doc.Options.Spaces  = 0;
            doc.Options.WrapLen = 32000;
            doc.Parse(input, output, messages);
            //return Encoding.UTF8.GetString(output.ToArray());
            return(RemoveTidyAdditions(Encoding.UTF8.GetString(output.ToArray())));
        }
        public static String ConvertHtmlToXhtml(String source)
        {
            MemoryStream input  = new MemoryStream(Encoding.UTF8.GetBytes(source));
            MemoryStream output = new MemoryStream();

            TidyMessageCollection tmc = new TidyMessageCollection();
            Tidy tidy = new Tidy();

            tidy.Options.DocType         = DocType.Omit;
            tidy.Options.DropFontTags    = true;
            tidy.Options.LogicalEmphasis = true;
            tidy.Options.Xhtml           = true;
            tidy.Options.XmlOut          = true;
            tidy.Options.MakeClean       = true;
            tidy.Options.TidyMark        = false;
            tidy.Options.NumEntities     = true;


            tidy.Parse(input, output, tmc);

            XmlDocument x     = new XmlDocument();
            XmlDocument xhtml = new XmlDocument();

            xhtml.LoadXml("<body />");
            XmlNode xhtmlBody = xhtml.SelectSingleNode("/body");

            x.LoadXml(Encoding.UTF8.GetString(output.ToArray()));
            XmlAttribute ns = x.CreateAttribute("xmlns");

            ns.Value = XhtmlNamespace;
            XmlNode body = x.SelectSingleNode("/html/body");

            foreach (XmlNode node in body.ChildNodes)
            {
                if (node.NodeType == XmlNodeType.Element)
                {
                    node.Attributes.Append(ns);
                }

                xhtmlBody.AppendChild(xhtml.ImportNode(node, true));
            }
            return(xhtmlBody.InnerXml);
        }
Пример #10
0
        private static string CleanHtml(string dirtyHtml)
        {
            //try
            //{
            //    return System.Xml.Linq.XElement.Parse(dirtyHtml).ToString();
            //}
            //catch (Exception ex)
            //{
            //    // isn't well-formed xml
            //    Console.WriteLine(ex.ToString());
            //    return string.Empty;
            //}

            var tidy = new TidyNet.Tidy();

            tidy.Options.SmartIndent      = true;
            tidy.Options.IndentAttributes = false;
            tidy.Options.WrapLen          = 0;
            tidy.Options.Spaces           = 4;
            //tidy.Options.XmlOut = false;
            //tidy.Options.XmlTags = false;
            //tidy.Options.Xhtml = false;

            //tidy.Options.WrapLen = 0;

            var messages = new TidyMessageCollection();

            using (var inStream = new MemoryStream(Encoding.Default.GetBytes(dirtyHtml)))
                using (var outStream = new MemoryStream())
                {
                    tidy.Parse(inStream, outStream, messages);
                    return(Encoding.Default.GetString(outStream.ToArray()));
                }

            //using (Document doc = Document.FromString(dirtyHtml))
            //{
            //    doc.OutputBodyOnly = AutoBool.Yes;
            //    doc.Quiet = true;
            //    doc.CleanAndRepair();

            //    return doc.Save();
            //}
        }
Пример #11
0
        /// <summary>
        ///     Pretties the print.
        /// </summary>
        /// <param name="dirtyHtml">The dirty HTML.</param>
        /// <param name="messages">The messages.</param>
        /// <returns></returns>
        public static string PrettyPrint(string dirtyHtml, out TidyMessageCollection messages)
        {
            const int spaces = 8;

            var tidy = new Tidy.Core.Tidy();

            tidy.Options.SmartIndent      = true;
            tidy.Options.IndentAttributes = false;
            tidy.Options.WrapLen          = 0;
            tidy.Options.Spaces           = spaces;

            messages = new TidyMessageCollection();

            using (var inStream = new MemoryStream(Encoding.Default.GetBytes(dirtyHtml)))
                using (var outStream = new MemoryStream())
                {
                    tidy.Parse(inStream, outStream, messages);
                    return(Encoding.Default.GetString(outStream.ToArray())
                           .Replace(new string(' ', spaces), '\t'.ToString()));
                }
        }
Пример #12
0
        /// <summary>
        /// Uses Tidy.Net to clean a html source.
        /// </summary>
        /// <param name="htmlSource">The original html source.</param>
        /// <param name="isWordHtml">Specifies if the source is an output from Microsoft Word</param>
        /// <returns>The cleaned Html.</returns>
        public string Clean(string htmlSource)
        {
            Tidy tidy = new Tidy();

            //Options required dor xhtml conversion.
            tidy.Options.DocType          = DocType.Strict;
            tidy.Options.DropFontTags     = true;
            tidy.Options.LogicalEmphasis  = true;
            tidy.Options.Xhtml            = true;
            tidy.Options.XmlOut           = true;
            tidy.Options.MakeClean        = true;
            tidy.Options.TidyMark         = false;
            tidy.Options.DropEmptyParas   = true;
            tidy.Options.IndentContent    = true;
            tidy.Options.SmartIndent      = true;
            tidy.Options.Word2000         = isWordHtml;
            tidy.Options.EncloseBlockText = true;

            tidy.Options.XmlTags     = true;
            tidy.Options.FixComments = true;
            TidyMessageCollection tmc    = new TidyMessageCollection();
            MemoryStream          input  = new MemoryStream();
            MemoryStream          output = new MemoryStream();

            byte[] byteArray = Encoding.UTF8.GetBytes(htmlSource);
            input.Write(byteArray, 0, byteArray.Length);
            input.Position = 0;
            try
            {
                tidy.Parse(input, output, tmc);
            }
            catch (FormatException ex)
            {
                Log.Exception(ex);
                return(htmlSource);
            }
            string cleanContent = Encoding.UTF8.GetString(output.ToArray());

            return(cleanContent);
        }
Пример #13
0
        private void cleanContent(ref String initialContent, ref String cleanContent)
        {
            Tidy tidy = new Tidy();

            /*
             * tidy.Options.DocType = DocType.Strict;
             * tidy.Options.DropFontTags = true;
             * tidy.Options.LogicalEmphasis = true;
             * tidy.Options.XmlOut = true;
             * dy.Options.TidyMark = false;*/
            tidy.Options.Word2000 = true;
            //tidy.Options.MakeClean = true;
            tidy.Options.Xhtml = true;

            TidyMessageCollection tmc    = new TidyMessageCollection();
            MemoryStream          input  = new MemoryStream();
            MemoryStream          output = new MemoryStream();

            byte[] byteArray = Encoding.UTF8.GetBytes(initialContent);
            input.Write(byteArray, 0, byteArray.Length);
            input.Position = 0;
            tidy.Parse(input, output, tmc);

            cleanContent = Encoding.UTF8.GetString(output.ToArray());

            //Delete header & footer
            int startIndex, endIndex;

            startIndex   = cleanContent.IndexOf("<body");
            endIndex     = cleanContent.IndexOf(">", startIndex);
            cleanContent = cleanContent.Remove(0, endIndex + 1);
            startIndex   = cleanContent.IndexOf("</body");
            if (startIndex >= 0)
            {
                cleanContent = cleanContent.Remove(startIndex);
            }
        }
        /// <summary>
        /// Shortens a HTML formatted string, while keeping HTML formatting and complete words (also removes line-breakes at the end of the shortened string)
        /// </summary>
        /// <param name="input">The HTML formatted string</param>
        /// <param name="inputIsShortened">Output boolean telling if the input string has been shortened</param>
        /// <param name="length">The approximate length of the output string (default: 300)</param>
        /// <param name="elipsis">Elipsis text to append to the output string (use string.Empty when elipsis should not be added, default: ...)</param>
        /// <returns>The shortened input string with HTML formatting</returns>
        public static string ShortenHtml(this string input, out bool inputIsShortened, int length = 300, string elipsis = "...")
        {
            inputIsShortened = false;

            if (input.Length <= length)
            {
                return(input);
            }

            input = input.Replace("<br />", "<br/>");

            string substring = input.Substring(0, length);
            string leftover  = input.Substring(length);

            while (!leftover.StartsWith(" ") && leftover != string.Empty)
            {
                substring += leftover.Substring(0, 1);
                leftover   = leftover.Substring(1);
            }
            substring = substring.Trim();
            while (substring.EndsWith("<br/>"))
            {
                substring = substring.Substring(0, substring.Length - 5);
                substring = substring.Trim();
            }

            if (input.Length > substring.Length)
            {
                inputIsShortened = true;
            }

            substring = substring.Replace("<br/>", "<br />");

            Tidy tidy = new Tidy();

            tidy.Options.DocType      = DocType.Omit;
            tidy.Options.CharEncoding = CharEncoding.UTF8;
            tidy.Options.Xhtml        = true;
            tidy.Options.NumEntities  = true;

            TidyMessageCollection tmc          = new TidyMessageCollection();
            MemoryStream          inputStream  = new MemoryStream();
            MemoryStream          outputStream = new MemoryStream();

            byte[] bytes = Encoding.UTF8.GetBytes(substring);
            inputStream.Write(bytes, 0, bytes.Length);
            inputStream.Position = 0;
            tidy.Parse(inputStream, outputStream, tmc);

            string      tidyResult = Encoding.UTF8.GetString(outputStream.ToArray());
            XmlDocument xmlDoc     = new XmlDocument();

            xmlDoc.LoadXml(tidyResult);
            tidyResult = xmlDoc.SelectSingleNode("//body").InnerXml;

            if (!string.IsNullOrEmpty(elipsis))
            {
                if (tidyResult.EndsWith("</p>"))
                {
                    return(string.Concat(tidyResult.Substring(0, tidyResult.Length - 4), elipsis, "</p>"));
                }
                return(string.Concat(tidyResult, elipsis));
            }
            return(tidyResult);
        }
Пример #15
0
 /// <summary>
 ///     Parses the input stream and writes to the output.
 /// </summary>
 /// <param name="input">The input stream</param>
 /// <param name="output">The output stream</param>
 /// <param name="messages">The messages</param>
 public void Parse(Stream input, Stream output, TidyMessageCollection messages=null)
 {
     messages = messages ?? new TidyMessageCollection();
     ParseInternal(input, output, messages);
 }
Пример #16
0
        /// <summary>
        /// Cleans HTML documents or fragments into XHTML conformant markup
        /// </summary>
        /// <param name="htmlMarkup">The html to clean</param>
        /// <returns>A fully structured XHTML document, incl. html, head and body elements.</returns>
        public static TidyHtmlResult TidyHtml(string htmlMarkup)
        {
            byte[] htmlByteArray = Encoding.UTF8.GetBytes(htmlMarkup);

            Tidy tidy = GetXhtmlConfiguredTidy();

            List <string> namespacePrefixedElementNames      = LocateNamespacePrefixedElementNames(htmlMarkup);
            Dictionary <string, string> namespacePrefixToUri = LocateNamespacePrefixToUriDeclarations(htmlMarkup);
            List <string> badNamespacePrefixedElementNames   = namespacePrefixedElementNames.Where(s => namespacePrefixToUri.Where(d => s.StartsWith(d.Key)).Any() == false).ToList();

            AllowNamespacePrefixedElementNames(tidy, namespacePrefixedElementNames);
            AllowHtml5ElementNames(tidy);

            TidyMessageCollection tidyMessages = new TidyMessageCollection();
            string xhtml = "";

            using (MemoryStream inputStream = new MemoryStream(htmlByteArray))
            {
                using (MemoryStream outputStream = new MemoryStream())
                {
                    tidy.Parse(inputStream, outputStream, tidyMessages);
                    outputStream.Position = 0;
                    C1StreamReader sr = new C1StreamReader(outputStream);
                    xhtml = sr.ReadToEnd();
                }
            }

            if (tidyMessages.Errors > 0)
            {
                StringBuilder errorMessageBuilder = new StringBuilder();
                foreach (TidyMessage message in tidyMessages)
                {
                    if (message.Level == MessageLevel.Error)
                    {
                        errorMessageBuilder.AppendLine(message.ToString());
                    }
                }
                throw new InvalidOperationException(string.Format("Failed to parse html:\n\n{0}", errorMessageBuilder.ToString()));
            }

            if (xhtml.IndexOf("<html>") > -1)
            {
                xhtml = xhtml.Replace("<html>", "<html xmlns=\"http://www.w3.org/1999/xhtml\">");
            }

            if (xhtml.IndexOf("xmlns=\"http://www.w3.org/1999/xhtml\"") == -1)
            {
                xhtml = xhtml.Replace("<html", "<html xmlns=\"http://www.w3.org/1999/xhtml\"");
            }

            xhtml = RemoveDuplicateAttributes(xhtml);
            xhtml = RemoveXmlDeclarations(xhtml);
            xhtml = UndoLowerCasingOfElementNames(xhtml, namespacePrefixedElementNames);
            xhtml = UndoLowerCasingOfNamespacePrefixes(xhtml, namespacePrefixToUri);
            StringBuilder messageBuilder = new StringBuilder();

            foreach (TidyMessage message in tidyMessages)
            {
                if (message.Level == MessageLevel.Warning)
                {
                    messageBuilder.AppendLine(message.ToString());
                }
            }

            List <string> badNamespacePrefixes = badNamespacePrefixedElementNames.Select(n => n.Substring(0, n.IndexOf(':'))).Union(LocateAttributeNamespacePrefixes(xhtml)).Distinct().Where(f => IsValidXmlName(f)).ToList();

            XDocument outputResult;

            if (badNamespacePrefixedElementNames.Any())
            {
                string    badDeclared = string.Join(" ", badNamespacePrefixes.Select(p => string.Format("xmlns:{0}='#bad'", p)).ToArray());
                XDocument badDoc      = XDocument.Parse(string.Format("<root {0}>{1}</root>", badDeclared, xhtml));
                badDoc.Descendants().Attributes().Where(e => e.Name.Namespace == "#bad").Remove();
                badDoc.Descendants().Where(e => e.Name.Namespace == "#bad").Remove();
                outputResult = new XDocument(badDoc.Root.Descendants().First());
            }
            else
            {
                outputResult = XDocument.Parse(xhtml, LoadOptions.PreserveWhitespace);
            }

            return(new TidyHtmlResult {
                Output = outputResult, ErrorSummary = messageBuilder.ToString()
            });
        }
Пример #17
0
        public string Generate()
        {
            string result = Strings.BasicHtmlPage;
            string head   = string.Empty;

            if (this.Charset == string.Empty)
            {
                this.Charset = "UTF-8";
            }

            #region HeadInit
            if (this.Jquery != string.Empty)
            {
                head += string.Format(Strings.ScriptFrame, string.Format(Strings.JqueryUrl, this.Jquery));
            }
            if (this.JqueryMobile != string.Empty)
            {
                head += string.Format(Strings.CssFrame, string.Format(Strings.JqueryMobileCssUrl, this.JqueryMobile));
                head += string.Format(Strings.ScriptFrame, string.Format(Strings.JqueryMobileJsUrl, this.JqueryMobile));
            }
            if (this.JqueryUI != string.Empty)
            {
                head += string.Format(Strings.CssFrame, string.Format(Strings.JqueryUICssUrl, this.JqueryUI));
                head += string.Format(Strings.ScriptFrame, string.Format(Strings.JqueryUIJsUrl, this.JqueryUI));
            }
            if (this.Angular != string.Empty)
            {
                head += string.Format(Strings.ScriptFrame, string.Format(Strings.AngularUrl, this.Angular));
            }
            if (this.Dojo != string.Empty)
            {
                head += string.Format(Strings.ScriptFrame, string.Format(Strings.DojoUrl, this.Dojo));
            }
            if (this.ExtJS != string.Empty)
            {
                head += string.Format(Strings.ScriptFrame, string.Format(Strings.ExtJsUrl, this.ExtJS));
            }
            if (this.MooTools != string.Empty)
            {
                head += string.Format(Strings.ScriptFrame, string.Format(Strings.MooToolsUrl, this.MooTools));
            }
            if (this.Protoptype != string.Empty)
            {
                head += string.Format(Strings.ScriptFrame, string.Format(Strings.PrototypeUrl, this.Protoptype));
            }
            if (this.Scriptaculous != string.Empty)
            {
                head += string.Format(Strings.ScriptFrame, string.Format(Strings.ScriptaculousUrl, this.Scriptaculous));
            }
            if (this.SWFObject != string.Empty)
            {
                head += string.Format(Strings.ScriptFrame, string.Format(Strings.SWFObjectUrl, this.SWFObject));
            }
            if (this.ThreeJS != string.Empty)
            {
                head += string.Format(Strings.ScriptFrame, string.Format(Strings.ThreeJsUrl
                                                                         , this.ThreeJS));
            }
            if (this.WebFontLoader != string.Empty)
            {
                head += string.Format(Strings.ScriptFrame, string.Format(Strings.WebFontLoaderUrl
                                                                         , this.WebFontLoader));
            }
            #endregion

            result = result.Replace("{TITLE}", this.Title);
            result = result.Replace("{CHARSET}", this.Charset);
            result = result.Replace("{OTHER_HEAD}", head);
            result = result.Replace("{BODY}", "");

            #region HtmlTidy
            if (this.HtmlTidy)
            {
                Tidy tidy = new Tidy();
                tidy.Options.DocType         = DocType.Strict;
                tidy.Options.DropFontTags    = true;
                tidy.Options.LogicalEmphasis = true;
                tidy.Options.Xhtml           = true;
                tidy.Options.XmlOut          = true;
                tidy.Options.MakeClean       = true;
                tidy.Options.TidyMark        = false;
                TidyMessageCollection tmc    = new TidyMessageCollection();
                MemoryStream          input  = new MemoryStream();
                MemoryStream          output = new MemoryStream();
                byte[] byteArray             = Encoding.UTF8.GetBytes(result);
                input.Write(byteArray, 0, byteArray.Length);
                input.Position = 0;
                tidy.Parse(input, output, tmc);
                result = Encoding.UTF8.GetString(output.ToArray());
            }
            #endregion

            return(result);
        }
Пример #18
0
 public void Parse(string input, Stream output, TidyMessageCollection messages=null)
 {
     var html = Parse(input, messages);
     using (var writer = new StreamWriter(output))
         writer.Write(html);
 }
Пример #19
0
 public string Parse(Stream input, TidyMessageCollection messages=null)
 {
     var memoryStream = new MemoryStream();
     Parse(input, memoryStream, messages);
     memoryStream.Position = 0;
     using (var reader = new StreamReader(memoryStream))
         return reader.ReadToEnd();
 }
Пример #20
0
 public string Parse(string input, TidyMessageCollection messages=null)
 {
     var memoryStream = new MemoryStream();
     using (var writer = new StreamWriter(memoryStream))
     {
         writer.Write(input);
         var html = Parse(memoryStream, messages);
         return html;
     }
 }
Пример #21
0
        public XElement ParseXml(Stream input, TidyMessageCollection messages=null)
        {
            Options.DocType=DocType.Strict;
            Options.QuoteNbsp = false;
            Options.XmlOut = true;
            Options.Xhtml = true;

            var html=Parse(input, messages);
            return XElement.Parse(html);
        }
Пример #22
0
 /// <summary>
 ///     Parses InputStream in and returns a DOM Document node.
 ///     If out is non-null, pretty prints to OutputStream out.
 /// </summary>
 internal virtual IDocument ParseDom(Stream input, Stream output, TidyMessageCollection messages)
 {
     Node document = ParseInternal(input, output, messages);
     if (document != null)
         return (IDocument) document.Adapter;
     return null;
 }
Пример #23
0
        /// <summary>
        ///     Internal routine that actually does the parsing.  The caller
        ///     can pass either an InputStream or file name.  If both are passed,
        ///     the file name is preferred.
        /// </summary>
        internal Node ParseInternal(Stream input, Stream output, TidyMessageCollection messages)
        {
            Node document = null;
            Out o = new OutImpl(); /* normal output stream */

            /* ensure config is self-consistent */
            _options.Adjust();

            if (input != null)
            {
                var lexer = new Lexer(new ClsStreamInImpl(input, _options.CharEncoding, _options.TabSize), _options)
                    {
                        Messages = messages
                    };

                /*
                store pointer to lexer in input stream
                to allow character encoding errors to be
                reported
                */
                lexer.Input.Lexer = lexer;

                /* Tidy doesn't alter the doctype for generic XML docs */
                Node doctype;
                if (_options.XmlTags)
                {
                    document = ParserImpl.ParseXmlDocument(lexer);
                }
                else
                {
                    document = ParserImpl.ParseDocument(lexer);

                    if (!document.CheckNodeIntegrity())
                    {
                        Report.BadTree(lexer);
                        return null;
                    }

                    var cleaner = new Clean(_options.TagTable);

                    /* simplifies <b><b> ... </b> ...</b> etc. */
                    cleaner.NestedEmphasis(document);

                    /* cleans up <dir>indented text</dir> etc. */
                    cleaner.List2Bq(document);
                    cleaner.Bq2Div(document);

                    /* replaces i by em and b by strong */
                    if (_options.LogicalEmphasis)
                    {
                        cleaner.EmFromI(document);
                    }

                    if (_options.Word2000 && cleaner.IsWord2000(document, _options.TagTable))
                    {
                        /* prune Word2000's <![if ...]> ... <![endif]> */
                        cleaner.DropSections(lexer, document);

                        /* drop style & class attributes and empty p, span elements */
                        cleaner.CleanWord2000(lexer, document);
                    }

                    /* replaces presentational markup by style rules */
                    if (_options.MakeClean || _options.DropFontTags)
                    {
                        cleaner.CleanTree(lexer, document);
                    }

                    if (!document.CheckNodeIntegrity())
                    {
                        Report.BadTree(lexer);
                        return null;
                    }
                    doctype = document.FindDocType();
                    if (document.Content != null)
                    {
                        if (_options.Xhtml)
                        {
                            lexer.SetXhtmlDocType(document);
                        }
                        else
                        {
                            lexer.FixDocType(document);
                        }

                        if (_options.TidyMark)
                        {
                            lexer.AddGenerator(document);
                        }
                    }

                    /* ensure presence of initial <?XML version="1.0"?> */
                    if (_options.XmlOut && _options.XmlPi)
                    {
                        lexer.FixXmlPi(document);
                    }

                    if (document.Content != null)
                    {
                        Report.ReportVersion(lexer, doctype);
                        Report.ReportNumWarnings(lexer);
                    }
                }

                if (lexer.Messages.Errors > 0)
                {
                    Report.NeedsAuthorIntervention(lexer);
                }

                o.State = StreamIn.FSM_ASCII;
                o.Encoding = _options.CharEncoding;

                if (lexer.Messages.Errors == 0)
                {
                    PPrint pprint;
                    if (_options.BurstSlides)
                    {
                        /*
                        remove doctype to avoid potential clash with
                        markup introduced when bursting into slides
                        */
                        /* discard the document type */
                        doctype = document.FindDocType();

                        if (doctype != null)
                        {
                            Node.DiscardElement(doctype);
                        }

                        /* slides use transitional features */
                        lexer.Versions |= HtmlVersion.Html40Loose;

                        /* and patch up doctype to match */
                        if (_options.Xhtml)
                        {
                            lexer.SetXhtmlDocType(document);
                        }
                        else
                        {
                            lexer.FixDocType(document);
                        }

                        /* find the body element which may be implicit */
                        Node body = document.FindBody(_options.TagTable);

                        if (body != null)
                        {
                            pprint = new PPrint(_options);
                            Report.ReportNumberOfSlides(lexer, pprint.CountSlides(body));
                            pprint.CreateSlides(lexer, document);
                        }
                        else
                        {
                            Report.MissingBody(lexer);
                        }
                    }
                    else if (output != null)
                    {
                        pprint = new PPrint(_options);
                        o.Output = output;

                        if (_options.XmlTags)
                        {
                            pprint.PrintXmlTree(o, 0, 0, lexer, document);
                        }
                        else
                        {
                            pprint.PrintTree(o, 0, 0, lexer, document);
                        }

                        pprint.FlushLine(o, 0);
                    }
                }

                Report.ErrorSummary(lexer);
            }

            return document;
        }