Ejemplo n.º 1
0
        static void MinifyHtml(string filePath)
        {
            IHtmlDocument htmlDoc = new HtmlParser().ParseDocument(File.ReadAllText(filePath));

            string[] selectors =
            {
                "table#myTable",                                            // Contains photographs only
                "script",                                                   "noscript",            "style",
                "link[rel='stylesheet']",
                "head > link[rel='apple-touch-icon']",
                "head > link[rel='shortcut icon']",
                "head > link[rel='manifest']",
                "head > link[rel='publisher']",
                "head > link[rel='alternate'][type='application/rss+xml']",
                "head > meta[property^='og:']",
                "head > meta[property^='fb:']",
                "head > meta[property^='dc:']",
                "head > meta[property='article:modified_time']",
                "head > meta[name='robots']",
                "head > meta[name='copyright']",
                "head > meta[name='viewport']",
                "head > meta[name='language']",
                "head > meta[name='google-translate-customization']",
                "head > meta[http-equiv='expires']",
                "head > meta[http-equiv='Content-Security-Policy']",
                "ins.adsbygoogle",
                "span[id^='ezoic-pub-ad-placeholder-']",
                "span.ezoic-ad",
                "form#form",
                "section#header-bg",
                "div#topbar",
                "div#breadcrumb",                                           "section#breadcrumb",
                "div#refbox",
                "div#fb-root",
                "div.widget",
                "h3#comment",
                "div.mobile-hide",
                "div.comments",
                "div.push.push-5",                                          "div.push.push-10",    "div.push.push-20","div.push.push-30","div.push.push-40",
                "div#crt-1.ezflad-47",                                      "div#crt-4.ezflad-47",
                "a.back-to-top-button",
                "div.fb-like",                                              "div.fb-quote",
                "div#dmo1",
                "header",                                                   "footer"
            };
            foreach (var selector in selectors)
            {
                foreach (var el in htmlDoc.QuerySelectorAll(selector))
                {
                    el.Remove();
                }
            }

            foreach (var el in htmlDoc.QuerySelectorAll("*[style]"))
            {
                el.RemoveAttribute("style");
            }
            foreach (var el in htmlDoc.QuerySelectorAll("tr#infinite_scroll"))
            {
                el.RemoveAttribute("id");
            }

            htmlDoc.QuerySelector("body").ClearAttr();

            foreach (IComment comment in htmlDoc.Descendents <IComment>())
            {
                comment.Remove();
            }

            string html = htmlDoc.DocumentElement.InnerHtml;

            html = html.Replace("&nbsp;", " ");
            html = Regex.Replace(html, @"^\s+$", "", RegexOptions.Multiline);
            html = Regex.Replace(html, @"^\s+<", "<", RegexOptions.Multiline);

            //html.Length.ToString("N0").Dump();
            //File.WriteAllText(@"C:\Users\IEUser\Desktop\TRAINING\L11-S01-LINQ\Titanic\bin\Debug\TitanicData\titanic01-new.html", html);
            File.WriteAllText(filePath, html);
        }