示例#1
0
        public List <Proxy> Scrape(string data)
        {
            List <Proxy> scraped       = new List <Proxy>();
            Generic      g             = new Generic();
            var          url           = data.RegexMatch(@"(http:\/\/|https:\/\/)(.*?)\/favicon.ico").Groups[2].Value;
            var          searchPageURL = "http://" + url + "/search?max-results=10";

            var searchPage = HTTP.DoWebRequest(searchPageURL);

            string[] pages = GetPages(searchPage, url);
            if (pages == null)
            {
                return(scraped);
            }

            var options = new ParallelOptions {
                MaxDegreeOfParallelism = 10
            };
            Task t = new Task(() =>
            {
                Parallel.ForEach(pages, options, (item) =>
                {
                    try
                    {
                        string html = HTTP.DoWebRequest(item);
                        if (string.IsNullOrEmpty(html))
                        {
                            return;
                        }

                        lock (scraped)
                        {
                            scraped.AddRange(g.Scrape(html));
                        }
                    }
                    catch
                    {
                    }
                });
            });

            t.Start();
            Task.WaitAll(t);
            return(scraped);
        }
示例#2
0
        public List <Proxy> Scrape(string data)
        {
            List <string> pages   = new List <string>();
            List <Proxy>  scraped = new List <Proxy>();

            Generic g       = new Generic();
            var     options = new ParallelOptions {
                MaxDegreeOfParallelism = 10
            };

            for (int i = 0; i < 23; i++)
            {
                pages.Add("https://proxyrox.com/?p=" + i + "&sortdir=desc&sort=reliability");
            }

            Task t = new Task(() =>
            {
                Parallel.ForEach(pages, options, (item) =>
                {
                    try
                    {
                        string html = HTTP.DoWebRequest(item);
                        if (string.IsNullOrEmpty(html))
                        {
                            return;
                        }

                        lock (scraped)
                        {
                            scraped.AddRange(g.Scrape(html));
                        }
                    }
                    catch
                    {
                    }
                });
            });

            t.Start();
            Task.WaitAll(t);
            return(scraped);
        }
示例#3
0
        private async void btnScrape_Click(object sender, EventArgs e)
        {
            btnScrape.Enabled = false;
            var hosts = new List <string>();

            if (rbCustom.Checked)
            {
                if (CustomSources.Count == 0)
                {
                    MessageBox.Show("You have selected custom source list. Please load some before scraping.", "Form Validation Failed", MessageBoxButtons.OK, MessageBoxIcon.Information);
                    return;
                }
                hosts.Clear();
                hosts.AddRange(CustomSources.ToArray());
            }
            // hosts.Add("https://orca.tech/?action=real-time-proxy-list");
            //hosts.Add("http://free-proxy-list.net/anonymous-proxy.html");
            // hosts.Add("http://www.us-proxy.org/");
            // hosts.Add("www.sslproxies.org");
            //hosts.Add("http://irc-proxies24.blogspot.com/2016/08/26-08-16-irc-proxy-servers-900_26.html");
            //  hosts.Add("http://www.samair.ru/proxy/");
            //hosts.Add("https://www.hide-my-ip.com/proxylist.shtml");
            //hosts.Add("http://fineproxy.org/eng/?p=6");
            //hosts.Add("http://www.blackhatworld.com/seo/new-fresh-big-proxy-lists-worldwide-usa-and-elite-proxies-updated-daily.753956/page-21");
            //hosts.Add("https://us-proxy-server.blogspot.com/");
            //  hosts.Add("http://txt.proxyspy.net/proxy.txt");
            //hosts.Add("http://txt.proxyspy.net/proxy.txt");
            // hosts.Add("http://proxyrox.com");
            //hosts.Add("https://nordvpn.com/wp-admin/admin-ajax.php?searchParameters[0][name]=proxy-country&searchParameters[0][value]=&searchParameters[1][name]=proxy-ports&searchParameters[1][value]=&offset=25&limit=10000&action=getProxies");
            lvProxies.BeginUpdate();
            // BLOGSPOT
            //hosts.Add("http://proxyserverlist-24.blogspot.com/");
            //hosts.Add("http://sslproxies24.blogspot.ro");
            // hosts.Add("http://sslproxies24.blogspot.ro");

            bool checkLimit = cbLimit.Checked;
            var  numLimit   = (int)this.numLimit.Value;
            var  options    = new ParallelOptions()
            {
                MaxDegreeOfParallelism = 10
            };
            var _Scraper = new Scraper.Scraper();

            Hashtable hash = new Hashtable();


            Stopwatch s = new Stopwatch();

            s.Start();
            await Task.Run(() =>
            {
                Parallel.ForEach(hosts, options, (item) =>
                {
                    try
                    {
                        if (checkLimit && hash.Count >= numLimit)
                        {
                            return;
                        }
                        if (!item.StartsWith("http://") && !item.StartsWith("https://"))
                        {
                            item = "http://" + item;
                        }
                        string html = HTTP.DoWebRequest(item);
                        if (string.IsNullOrEmpty(html))
                        {
                            return;
                        }
                        List <Proxy> proxies = _Scraper.Scrape(item, html);
                        if (proxies == null)
                        {
                            return;
                        }
                        Parallel.ForEach(proxies, options, (proxy) =>
                        {
                            if (proxy == null)
                            {
                                return;
                            }

                            if (checkLimit && hash.Count >= numLimit)
                            {
                                return;
                            }
                            lock (hash)
                            {
                                if (!hash.Contains(proxy.Proxy_))
                                {
                                    hash.Add(proxy.Proxy_, proxy);
                                }
                            }
                        });
                    }
                    catch { }
                });
            });

            foreach (DictionaryEntry element in hash)
            {
                if (checkLimit && lvProxies.Items.Count >= numLimit)
                {
                    break;
                }
                Proxy proxy = (Proxy)(element.Value);

                Invoke(new MethodInvoker(() =>
                {
                    ListViewItem i  = new ListViewItem((lvProxies.Items.Count + 1).ToString());
                    var countryCode = CountryInfo.GetCode(proxy.Country);
                    if (!imageList.Images.Keys.Contains(countryCode))
                    {
                        imageList.Images.Add(countryCode, Image.FromFile(@"Flags\" + countryCode + ".png"));
                    }
                    i.ImageKey = countryCode;


                    // i.UseItemStyleForSubItems = false;
                    i.SubItems.Add(proxy.Proxy_);
                    i.SubItems.Add(proxy.Anonymity);
                    i.SubItems.Add(proxy.Country);
                    i.SubItems.Add("");
                    i.SubItems.Add("");
                    i.SubItems.Add("");
                    lvProxies.Items.Add(i);
                }));
            }

            s.Stop();
            lvProxies.EndUpdate();
            MessageBox.Show("Done!\r\nTime Elapsed: " + s.Elapsed);
            btnScrape.Enabled = true;
        }
示例#4
0
        public List <Proxy> Scrape(string data)
        {
            List <Proxy>  scraped = new List <Proxy>();
            List <string> pages   = new List <string>();
            List <string> ippages = new List <string>();
            Generic       g       = new Generic();
            var           options = new ParallelOptions {
                MaxDegreeOfParallelism = 10
            };

            pages.Add("http://www.samair.ru/proxy/proxy-1.htm");
            for (int i = 2; i < 30; i++)
            {
                if (i <= 9 && i > 1)
                {
                    pages.Add("http://www.samair.ru/proxy/proxy-0" + i + ".htm");
                }
                else
                {
                    pages.Add("http://www.samair.ru/proxy/proxy-" + i + ".htm");
                }
                //<a href="/proxy/ip-port/977482367.html">You can do it there</a>
            }
            Task t = new Task(() =>
            {
                Parallel.ForEach(pages, options, (item) =>
                {
                    try
                    {
                        string html = HTTP.DoWebRequest(item);
                        if (string.IsNullOrEmpty(html))
                        {
                            return;
                        }

                        var page       = html.GetBetween("<a href=\"/proxy/ip-port/", ".html");
                        var linkToPage = "http://www.samair.ru/proxy/ip-port/" + page + ".html";
                        ippages.Add(linkToPage);


                        var page2 = HTTP.DoWebRequest(linkToPage);
                        if (string.IsNullOrEmpty(page2))
                        {
                            return;
                        }

                        lock (scraped)
                        {
                            scraped.AddRange(g.Scrape(page2));
                        }
                    }
                    catch
                    {
                    }
                });
            });

            t.Start();
            Task.WaitAll(t);
            return(scraped);
        }