public static void TestRemoveNodeFromUrl() { String url = "http://www.bbc.co.uk/news/"; HtmlSanitiser sn = new HtmlSanitiser(url); RemoveNode rn1 = new RemoveNode("script", null, null); sn.RemoveNodes(new RemoveNode[] { rn1 }); string htmlout = sn.Html; HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(htmlout); bool foundNode = false; if (doc.DocumentNode.Name == "script") foundNode = true; if(doc.DocumentNode.HasChildNodes) { foreach(HtmlNode cNode in doc.DocumentNode.ChildNodes) if (cNode.Name == "script") { foundNode = true; } } if (htmlout.Contains("<script")) { foundNode = true; } Assert.AreEqual(foundNode, false); }
public void Download() { HtmlSanitiser sanity = new HtmlSanity.HtmlSanitiser(link); sanity.RemoveNodes(RemoveNodes.ToArray()); string pathFile = path + FileName; DownloadImages dn = new DownloadImages(link, path + @"images\"); dn.Download(sanity.Html); StreamWriter sw = System.IO.File.CreateText(pathFile); dn.Document.Save(sw); sw.Flush(); sw.Close(); }