Exemple #1
0
        static string sanitize(string html)
        {
            html = removeElement("<script.*?>.*?</script>", html);
            html = removeElement("<head.*?>.*?</head>", html);
            html = removeElement("<img.*?>", html);
            html = removeElement("<br.*?>", html);
            html = removeElement("<hr.*?>", html);
            html = removeElement("<input.*?>", html);
            html = removeElement("<center.*?>.*?</center>", html);
            html = removeElement("<a.*?>.*?</a>", html);
            html = html.Replace("<tr></table>", "</tr></table>");
            html = html.Replace("<span class=\"br_italiano\">", string.Empty);
            html = html.Replace("<span class=\"br_italiano\" style=\"color:#FF0000; font-weight:bold;\">", string.Empty);
            html = html.Replace("<td align=\"left\" width=\"50%\"></span></td></tr>", "<td></td></tr>");


            html = html.Replace("<span class=\"br_italiano\">", string.Empty);
            html = html.Replace("<span lang=\"it\">", string.Empty);
            html = html.Replace("&nbsp;", string.Empty);


            html = Regex.Replace(html, @"<([^/]*?)\s+[^/>]+>", "<$1>", RegexOptions.Singleline);
            html = Regex.Replace(html, @"<!.*?>", string.Empty, RegexOptions.Singleline);

            SanitizedMarkup markup = Sanitizer.SanitizeMarkup(html);

            html = markup.MarkupText;

            html = html.Replace("--&gt;", string.Empty);
            html = html.Replace("&gt;", string.Empty);

            html = Regex.Replace(html, @"\&\#[0-9]+\;", string.Empty, RegexOptions.Singleline);

            html = Regex.Replace(html, @"<td>\s*</td>", string.Empty, RegexOptions.Singleline);
            html = Regex.Replace(html, @"<tr>\s*</tr>", string.Empty, RegexOptions.Singleline);
            html = Regex.Replace(html, @"<table>\s*</table>", string.Empty, RegexOptions.Singleline);
            html = Regex.Replace(html, @"<li>\s*</li>", string.Empty, RegexOptions.Singleline);
            html = Regex.Replace(html, @"<ul>\s*</ul>", string.Empty, RegexOptions.Singleline);

            html = html.Replace("<span>", String.Empty).Replace("</span>", string.Empty);
            html = html.Replace("<strong>", String.Empty).Replace("</strong>", string.Empty);
            html = html.Replace("<h3>", String.Empty).Replace("</h3>", string.Empty);
            html = html.Replace("<p>", String.Empty).Replace("</p>", string.Empty);

            html = "<html><body>" + html + "</body></html>";
            html = "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>" + Environment.NewLine + html;
            return(html);
        }