示例#1
0
        private void startCrawler()
        {
            updateUI(new System.Action(() => grid("Title","crawler started............", Color.Green)));

            websites dt = new websites();

            /*
            dt.website_url = "http://viralkeen.com/";
            dt.website_networks = new List<string> { "Contentad" };
            dt.last_run = "";
            var ad = dt;
            */

            var list1 = dt.SelectAll();
            updateUI(new System.Action(() => grid("Websites", "We have " + list1.Count+" websites.", Color.Green)));
            ad_network an = new ad_network();
            ANlist = new List<ad_network>();
            ANlist = an.SelectAll();
            updateUI(new System.Action(() => grid("Networks", "We have  " + ANlist.Count + " Networks.", Color.Green)));

            foreach (var ad in list1)
            {

                if (ad.website_networks.Count != 0)
                {

                    if (checkBox1.Checked == true)
                    {

                        CrawlerPool.QueueWorkItem(dummy => ScrapeAdds(ad, false, country, device, resltn, agent), new object());
                        //ScrapeAdds(ad, true, country, device, resltn,agent);
                    }
                    else//ajsfdhgas
                    {

                        CrawlerPool.QueueWorkItem(dummy => ScrapeAdds(ad, false, country, device, resltn, agent), new object());
                        //ScrapeAdds(ad, false, country, device, resltn,agent);
                    }
                }
            }

            //try { CrawlerPool.WaitForIdle(); CrawlerPool.Shutdown(); }
            //catch { }
        }
示例#2
0
        public void ScrapeAdds(websites web, bool proxyCondtition, string country, string device, string resolution,string agent)
        {
            Stopwatch watch = new Stopwatch();
            watch.Start();
            updateUI(new System.Action(() => grid("Crawler","Starting crawler............",Color.Green)));

            List<advertisement> adList = new List<advertisement>();
            var service = PhantomJSDriverService.CreateDefaultService();
            service.HideCommandPromptWindow = true;
            var driver = new PhantomJSDriver(service);

            updateUI(new System.Action(() =>  grid("Loading","loading website...............", Color.Green)));

            if (proxyCondtition == true)
            {
                List<string> proxy = Proxy();

                Random rand = new Random();
                int index = rand.Next(proxy.Count);
                string selectProxy = proxy[index];
                string _proxy = selectProxy.Split('&')[0];
                string _credentials = selectProxy.Split('&')[1];
                Proxy prox = new Proxy();
                prox.HttpProxy = string.Format(_proxy);
                service.ProxyType = "http";
                service.Proxy = prox.HttpProxy;
            }

            driver.Navigate().GoToUrl(web.website_url);
            try
            {
                //List<string> resolution = ScreenResolution();
                //Random ran = new Random();
                //int index = ran.Next(resolution.Count);
                //string ScrnRes = resolution[index];
                string ScrnRes = resolution;

                int x = int.Parse(ScrnRes.Split('x')[0]);
                int y = int.Parse(ScrnRes.Split('x')[1]);

                driver.Manage().Window.Size = new Size(x, y);

            }
            catch (Exception ex)
            { }
            //  driver.GetScreenshot().SaveAsFile("image2.png", ImageFormat.Png);

            foreach (var networkName in web.website_networks)
            {
                ad_network adN = new ad_network();
                //adN = adN.SearchByName(networkName);

                adN = ANlist.Find(a => a.name.Equals(networkName));
                if (adN != null)
                {
                    updateUI(new System.Action(() => grid("Network","Network Found:"+ adN.name, Color.Green)));
                    string path1 = null;
                    string path2 = null;
                    string path3 = null;
                    string xpaths = adN.xpath;
                    string[] path = xpaths.Split('|');
                    if (path.Length == 3)
                    {
                        path1 = path[0];
                        path2 = path[1];
                        path3 = path[2];
                    }
                    if (path.Length == 2)
                    {
                        path1 = path[0];
                        path2 = path[1];
                        path3 = "";
                    }

                    NetworkFunctions nf = new NetworkFunctions(this);

                    if (adN.name == "Mgid")
                    {
                        updateUI(new System.Action(() => grid("Scraping:", "Migid Ads", Color.Green)));
                        nf.Mgid(web.website_url, driver, path1, path2, path3, adN, agent);

                    }
                    if (adN.name == "Adblade")
                    {
                        updateUI(new System.Action(() => grid("Scraping:", "Adblade Ads", Color.Green)));
                        nf.Adblade(web.website_url, driver, path1, path2, path3, adN, agent);

                    }
                    if (adN.name == "Gravity")
                    {
                        updateUI(new System.Action(() => grid("Scraping:", "Gravity Ads", Color.Green)));
                        nf.Gravity(web.website_url, driver, path1, path2, path3, adN, agent);
                    }
                    if (adN.name == "Taboola")
                    {
                        updateUI(new System.Action(() => grid("Scraping:", "Taboola Ads", Color.Green)));
                        nf.Taboola(web.website_url, driver, path1, path2, path3, adN, agent);
                    }
                    if (adN.name == "Outbrain")
                    {
                        updateUI(new System.Action(() => grid("Scraping:", "Outbrain Ads", Color.Green)));
                        nf.Outbrain(web.website_url, driver, path1, path2, path3, adN, agent);
                    }
                    if (adN.name == "Zergnet")
                    {
                        updateUI(new System.Action(() => grid("Scraping:", "Zergnet Ads", Color.Green)));
                        nf.ZergNetDotcom(web.website_url, driver, path1, path2, path3, adN, agent);
                    }
                    if (adN.name == "YahooGemini")
                    {
                        updateUI(new System.Action(() => grid("Scraping:", "YahooGemini Ads", Color.Green)));
                        nf.YahooGemini(web.website_url, driver, path1, path2, path3, adN, agent);
                    }
                    if (adN.name == "Hexagram")
                    {
                        updateUI(new System.Action(() => grid("Scraping:", "Hexagram Ads", Color.Green)));
                        nf.Hexagram(web.website_url, driver, path1, path2, path3, adN, agent);
                    }

                    if (adN.name == "Contentad" || adN.name == "Content.ad")
                    {
                        updateUI(new System.Action(() => grid("Scraping:", "Content.ad Ads", Color.Green)));
                        nf.ContentDotAd(web.website_url, driver, path1, path2, path3, adN, agent);
                    }
                    //if (adN.name == "Mgid")
                    //{
                    //    ls = nf.Mgid(driver, path1, path2, path3);
                    //}

                }
            }
            watch.Stop();
            List<websites> list = new List<websites>();
            list = web.SelectAll();
            string ScrapingTime = watch.Elapsed.ToString();
               foreach (var v in list)
            {
                if (v.website_url == web.website_url)
                {
                    web.Update(v._id, ScrapingTime);
                }
            }
            updateUI(new System.Action(() => grid("Time:", web.website_url+":" + ScrapingTime, Color.Green)));
            driver.Quit();
            driver.Dispose();
        }