Example #1
0
        private void button4_Click(object sender, EventArgs e)
        {
            //Method "Best" HtmlAgilityPack.1.6.12 library used in this method
            if (txtURL.Text == "")
            {
                return;
            }
            var url   = txtURL.Text;
            var web   = new HtmlWeb(); //object from HtmlAgilityPack class
            var doc   = web.Load(url);
            var nodes = doc.DocumentNode.SelectNodes("//script|//style|//meta|//header|//title|//footer|//Template|//core");

            //Remove the elements (Nodes):   //script|//style|//meta|//head|//title|//footer
            foreach (var node in nodes)
            {
                node.ParentNode.RemoveChild(node);
            }

            Regex  rRemScript = new Regex(@"<script[^>]*>[\s\S]*?</script>");
            string TXT        = HtmlRemoval.StripTagsRegex(doc.DocumentNode.OuterHtml.ToString());

            //HtmlRemoval Class used here
            TXT = rRemScript.Replace(TXT, " ").Trim();
            CleanText(TXT);
        }
Example #2
0
 private void button2_Click(object sender, EventArgs e)
 {   //Method "Better"
     using (WebClient client = new WebClient())
     {
         if (txtURL.Text == "")
         {
             return;
         }
         string htmlCode = client.DownloadString(txtURL.Text);
         //HtmlAgilityPack.1.6.12 library used in this method
         HtmlAgilityPack.HtmlDocument doc1 = new HtmlAgilityPack.HtmlDocument();
         doc1.LoadHtml(htmlCode);
         var nodes = doc1.DocumentNode.SelectNodes("//script|//style|//meta|//head|//title|//footer");
         //Remove the elements (Nodes):   //script|//style|//meta|//head|//title|//footer
         foreach (var node in nodes)
         {
             node.ParentNode.RemoveChild(node);
         }
         Regex  rRemScript = new Regex(@"<script[^>]*>[\s\S]*?</script>");
         string TXT        = rRemScript.Replace(doc1.DocumentNode.OuterHtml.ToString(), "");
         TXT = HtmlRemoval.StripTagsRegex(TXT);
         CleanText(TXT);
     }
 }
Example #3
0
        public void button1_Click(object sender, EventArgs e)
        {
            //Bad Method
            if (txtURL.Text == "")
            {
                return;
            }
            string urlAddress = txtURL.Text;


            HttpWebRequest  request  = (HttpWebRequest)WebRequest.Create(urlAddress);
            HttpWebResponse response = (HttpWebResponse)request.GetResponse();

            if (response.StatusCode == HttpStatusCode.OK)
            {
                Stream       receiveStream = response.GetResponseStream();
                StreamReader readStream    = null;

                if (response.CharacterSet == null)
                {
                    readStream = new StreamReader(receiveStream);
                }
                else
                {
                    readStream = new StreamReader(receiveStream, Encoding.GetEncoding(response.CharacterSet));
                }

                string data = readStream.ReadToEnd();
                data = HtmlRemoval.StripTagsRegex(data);
                Regex rRemScript = new Regex(@"<script[^>]*>[\s\S]*?</script>");
                data = rRemScript.Replace(data, "");
                CleanText(data);
                response.Close();
                readStream.Close();
            }
        }