public void Setup() { this.scraper = new Scraper.Scraper(); this.scraper.Links.Add(new Uri(Directory.GetCurrentDirectory() + @"\..\..\Sample.html")); this.scraper.Download(); }
public static List<MonthActivities> GetAllActivities(string username, string password, string startDate) { var urlEncodedUserName = HttpUtility.UrlEncode(username); var urlEncodedPassword = HttpUtility.UrlEncode(password); var startDateTime = Convert.ToDateTime(startDate); var scraper = new Scraper.Scraper(); Login(ref scraper, urlEncodedUserName, urlEncodedPassword); var homeContent = EnterHome(ref scraper); var userId = GetUserIdFromHome(homeContent); var allPeriods = TimeHelper.GetAllDates(startDateTime); var allActivities = new List<MonthActivities>(); foreach (var period in allPeriods) { var periodData = GetPeriodData(ref scraper, userId, period); periodData = AdjustRunkeeperJson(periodData, period); var des = new JavaScriptSerializer().Deserialize<RunkeeperActivities>(periodData); if (des.Activities.Year == null) continue; allActivities.AddRange(des.Activities.Year.MonthActivities); } return allActivities; }
static void Main(string[] args) { var scraper = new Scraper.Scraper(); // Find pages scraper.Links.Add(new Uri("http://www.kickstarter.com/projects/obsidian/project-eternity")); // Download Pages scraper.Download(); // Scrape Pages // Analyse }
private void button1_Click(object sender, EventArgs e) { Scraper.Scraper scrapper = new Scraper.Scraper(); scrapper.ScrapeDataOfVote("https://www.sejm.gov.pl/sejm9.nsf/agent.xsp?symbol=klubglos&IdGlosowania=54381&KodKlubu=PiS"); using (var dbContext = new SqliteDbContext()) { dbContext.Database.ExecuteSqlCommand("DELETE FROM DEPUTIES"); foreach (var dep in scrapper.Deputies) { Deputy en = new Deputy(); { en.Name = dep.Name; en.PoliticalParty = dep.PoliticalParty; } dbContext.Deputies.Add(en); } dbContext.SaveChanges(); } }
private void scraperMeetingsBtn_Click(object sender, EventArgs e) { Scraper.Scraper scrapper = new Scraper.Scraper(); scrapper.ScrapeDataSitting("https://www.sejm.gov.pl/sejm9.nsf/agent.xsp?symbol=posglos&NrKadencji=9"); foreach (var meet in scrapper.Meetings) { Scraper.Scraper s2 = new Scraper.Scraper(); s2.ScrapeDataOfDay(meet.DetailsLink); foreach (var meet2 in s2.Meetings) { Meeting meeting2 = new Meeting(); meeting2.TimeOfVote = meet2.TimeOfVote; meeting2.VotingTopic = meet2.VotingTopic; meeting2.VotingLink = meet2.DetailsLink; meeting2.NrMeetings = meet.NrMeetings; meeting2.DateOfVote = meet.DateOfVote; SrapedMeetings.Add(meeting2); //listEnvoysBox.DataSource = s2.Meetings; //listEnvoysBox.DisplayMember = "FullName"; } } foreach (var meeting in SrapedMeetings.Take(5)) { Scraper.Scraper scraperParties = new Scraper.Scraper(); scraperParties.ScrapeDataClubLink(meeting.VotingLink); foreach (var link in scraperParties.Links) { Scraper.Scraper scrapevoting = new Scraper.Scraper(); scrapevoting.ScrapeDataOfVote(link.Link); foreach (var votingItem in scrapevoting.VotingList) { Deputy deputy = Deputies.Where(x => x.Name == votingItem.Name && x.PoliticalParty == link.Party) .FirstOrDefault(); if (null == deputy) { deputy = new Deputy() { Name = votingItem.Name, PoliticalParty = link.Party }; Deputies.Add(deputy); } Vote vote = new Vote() { Meeting = meeting, Deputy = deputy, VoteType = votingItem.Vote }; Votes.Add(vote); } } } using (SqliteDbContext context = new SqliteDbContext()) { context.Meetings.AddRange(SrapedMeetings); context.Deputies.AddRange(Deputies); context.Votes.AddRange(Votes); context.SaveChanges(); } }
public Travel(Scraper.Scraper scraper) { this.Scraper = scraper; this.Site = new Site(new Uri("https://secure.rezserver.com/")); }
private async void btnScrape_Click(object sender, EventArgs e) { btnScrape.Enabled = false; var hosts = new List <string>(); if (rbCustom.Checked) { if (CustomSources.Count == 0) { MessageBox.Show("You have selected custom source list. Please load some before scraping.", "Form Validation Failed", MessageBoxButtons.OK, MessageBoxIcon.Information); return; } hosts.Clear(); hosts.AddRange(CustomSources.ToArray()); } // hosts.Add("https://orca.tech/?action=real-time-proxy-list"); //hosts.Add("http://free-proxy-list.net/anonymous-proxy.html"); // hosts.Add("http://www.us-proxy.org/"); // hosts.Add("www.sslproxies.org"); //hosts.Add("http://irc-proxies24.blogspot.com/2016/08/26-08-16-irc-proxy-servers-900_26.html"); // hosts.Add("http://www.samair.ru/proxy/"); //hosts.Add("https://www.hide-my-ip.com/proxylist.shtml"); //hosts.Add("http://fineproxy.org/eng/?p=6"); //hosts.Add("http://www.blackhatworld.com/seo/new-fresh-big-proxy-lists-worldwide-usa-and-elite-proxies-updated-daily.753956/page-21"); //hosts.Add("https://us-proxy-server.blogspot.com/"); // hosts.Add("http://txt.proxyspy.net/proxy.txt"); //hosts.Add("http://txt.proxyspy.net/proxy.txt"); // hosts.Add("http://proxyrox.com"); //hosts.Add("https://nordvpn.com/wp-admin/admin-ajax.php?searchParameters[0][name]=proxy-country&searchParameters[0][value]=&searchParameters[1][name]=proxy-ports&searchParameters[1][value]=&offset=25&limit=10000&action=getProxies"); lvProxies.BeginUpdate(); // BLOGSPOT //hosts.Add("http://proxyserverlist-24.blogspot.com/"); //hosts.Add("http://sslproxies24.blogspot.ro"); // hosts.Add("http://sslproxies24.blogspot.ro"); bool checkLimit = cbLimit.Checked; var numLimit = (int)this.numLimit.Value; var options = new ParallelOptions() { MaxDegreeOfParallelism = 10 }; var _Scraper = new Scraper.Scraper(); Hashtable hash = new Hashtable(); Stopwatch s = new Stopwatch(); s.Start(); await Task.Run(() => { Parallel.ForEach(hosts, options, (item) => { try { if (checkLimit && hash.Count >= numLimit) { return; } if (!item.StartsWith("http://") && !item.StartsWith("https://")) { item = "http://" + item; } string html = HTTP.DoWebRequest(item); if (string.IsNullOrEmpty(html)) { return; } List <Proxy> proxies = _Scraper.Scrape(item, html); if (proxies == null) { return; } Parallel.ForEach(proxies, options, (proxy) => { if (proxy == null) { return; } if (checkLimit && hash.Count >= numLimit) { return; } lock (hash) { if (!hash.Contains(proxy.Proxy_)) { hash.Add(proxy.Proxy_, proxy); } } }); } catch { } }); }); foreach (DictionaryEntry element in hash) { if (checkLimit && lvProxies.Items.Count >= numLimit) { break; } Proxy proxy = (Proxy)(element.Value); Invoke(new MethodInvoker(() => { ListViewItem i = new ListViewItem((lvProxies.Items.Count + 1).ToString()); var countryCode = CountryInfo.GetCode(proxy.Country); if (!imageList.Images.Keys.Contains(countryCode)) { imageList.Images.Add(countryCode, Image.FromFile(@"Flags\" + countryCode + ".png")); } i.ImageKey = countryCode; // i.UseItemStyleForSubItems = false; i.SubItems.Add(proxy.Proxy_); i.SubItems.Add(proxy.Anonymity); i.SubItems.Add(proxy.Country); i.SubItems.Add(""); i.SubItems.Add(""); i.SubItems.Add(""); lvProxies.Items.Add(i); })); } s.Stop(); lvProxies.EndUpdate(); MessageBox.Show("Done!\r\nTime Elapsed: " + s.Elapsed); btnScrape.Enabled = true; }