private void startCrawler() { updateUI(new System.Action(() => grid("Title","crawler started............", Color.Green))); websites dt = new websites(); /* dt.website_url = "http://viralkeen.com/"; dt.website_networks = new List<string> { "Contentad" }; dt.last_run = ""; var ad = dt; */ var list1 = dt.SelectAll(); updateUI(new System.Action(() => grid("Websites", "We have " + list1.Count+" websites.", Color.Green))); ad_network an = new ad_network(); ANlist = new List<ad_network>(); ANlist = an.SelectAll(); updateUI(new System.Action(() => grid("Networks", "We have " + ANlist.Count + " Networks.", Color.Green))); foreach (var ad in list1) { if (ad.website_networks.Count != 0) { if (checkBox1.Checked == true) { CrawlerPool.QueueWorkItem(dummy => ScrapeAdds(ad, false, country, device, resltn, agent), new object()); //ScrapeAdds(ad, true, country, device, resltn,agent); } else//ajsfdhgas { CrawlerPool.QueueWorkItem(dummy => ScrapeAdds(ad, false, country, device, resltn, agent), new object()); //ScrapeAdds(ad, false, country, device, resltn,agent); } } } //try { CrawlerPool.WaitForIdle(); CrawlerPool.Shutdown(); } //catch { } }
public void ScrapeAdds(websites web, bool proxyCondtition, string country, string device, string resolution,string agent) { Stopwatch watch = new Stopwatch(); watch.Start(); updateUI(new System.Action(() => grid("Crawler","Starting crawler............",Color.Green))); List<advertisement> adList = new List<advertisement>(); var service = PhantomJSDriverService.CreateDefaultService(); service.HideCommandPromptWindow = true; var driver = new PhantomJSDriver(service); updateUI(new System.Action(() => grid("Loading","loading website...............", Color.Green))); if (proxyCondtition == true) { List<string> proxy = Proxy(); Random rand = new Random(); int index = rand.Next(proxy.Count); string selectProxy = proxy[index]; string _proxy = selectProxy.Split('&')[0]; string _credentials = selectProxy.Split('&')[1]; Proxy prox = new Proxy(); prox.HttpProxy = string.Format(_proxy); service.ProxyType = "http"; service.Proxy = prox.HttpProxy; } driver.Navigate().GoToUrl(web.website_url); try { //List<string> resolution = ScreenResolution(); //Random ran = new Random(); //int index = ran.Next(resolution.Count); //string ScrnRes = resolution[index]; string ScrnRes = resolution; int x = int.Parse(ScrnRes.Split('x')[0]); int y = int.Parse(ScrnRes.Split('x')[1]); driver.Manage().Window.Size = new Size(x, y); } catch (Exception ex) { } // driver.GetScreenshot().SaveAsFile("image2.png", ImageFormat.Png); foreach (var networkName in web.website_networks) { ad_network adN = new ad_network(); //adN = adN.SearchByName(networkName); adN = ANlist.Find(a => a.name.Equals(networkName)); if (adN != null) { updateUI(new System.Action(() => grid("Network","Network Found:"+ adN.name, Color.Green))); string path1 = null; string path2 = null; string path3 = null; string xpaths = adN.xpath; string[] path = xpaths.Split('|'); if (path.Length == 3) { path1 = path[0]; path2 = path[1]; path3 = path[2]; } if (path.Length == 2) { path1 = path[0]; path2 = path[1]; path3 = ""; } NetworkFunctions nf = new NetworkFunctions(this); if (adN.name == "Mgid") { updateUI(new System.Action(() => grid("Scraping:", "Migid Ads", Color.Green))); nf.Mgid(web.website_url, driver, path1, path2, path3, adN, agent); } if (adN.name == "Adblade") { updateUI(new System.Action(() => grid("Scraping:", "Adblade Ads", Color.Green))); nf.Adblade(web.website_url, driver, path1, path2, path3, adN, agent); } if (adN.name == "Gravity") { updateUI(new System.Action(() => grid("Scraping:", "Gravity Ads", Color.Green))); nf.Gravity(web.website_url, driver, path1, path2, path3, adN, agent); } if (adN.name == "Taboola") { updateUI(new System.Action(() => grid("Scraping:", "Taboola Ads", Color.Green))); nf.Taboola(web.website_url, driver, path1, path2, path3, adN, agent); } if (adN.name == "Outbrain") { updateUI(new System.Action(() => grid("Scraping:", "Outbrain Ads", Color.Green))); nf.Outbrain(web.website_url, driver, path1, path2, path3, adN, agent); } if (adN.name == "Zergnet") { updateUI(new System.Action(() => grid("Scraping:", "Zergnet Ads", Color.Green))); nf.ZergNetDotcom(web.website_url, driver, path1, path2, path3, adN, agent); } if (adN.name == "YahooGemini") { updateUI(new System.Action(() => grid("Scraping:", "YahooGemini Ads", Color.Green))); nf.YahooGemini(web.website_url, driver, path1, path2, path3, adN, agent); } if (adN.name == "Hexagram") { updateUI(new System.Action(() => grid("Scraping:", "Hexagram Ads", Color.Green))); nf.Hexagram(web.website_url, driver, path1, path2, path3, adN, agent); } if (adN.name == "Contentad" || adN.name == "Content.ad") { updateUI(new System.Action(() => grid("Scraping:", "Content.ad Ads", Color.Green))); nf.ContentDotAd(web.website_url, driver, path1, path2, path3, adN, agent); } //if (adN.name == "Mgid") //{ // ls = nf.Mgid(driver, path1, path2, path3); //} } } watch.Stop(); List<websites> list = new List<websites>(); list = web.SelectAll(); string ScrapingTime = watch.Elapsed.ToString(); foreach (var v in list) { if (v.website_url == web.website_url) { web.Update(v._id, ScrapingTime); } } updateUI(new System.Action(() => grid("Time:", web.website_url+":" + ScrapingTime, Color.Green))); driver.Quit(); driver.Dispose(); }