private void DokuzEylulHtmlReader(HtmlDocument loadedDocument, Okul okul) { _generalList = new List <GenelYemekListeObject>(); HtmlNodeCollection sayfaDivList = new HtmlNodeCollection(null); string sayfaDivSelector = "//div[@id='tm_lunch_menu_widget-3']"; sayfaDivList = loadedDocument.DocumentNode.SelectNodes(sayfaDivSelector); var sayfaStrongList = sayfaDivList.Descendants("strong").ToList(); var sayfaBrList = sayfaDivList.Descendants("br").ToList(); foreach (var strongNode in sayfaStrongList) { GenelYemekListeObject generalYemek = new GenelYemekListeObject(); string strongDateTime = strongNode.InnerHtml; string brInnerHtml = strongNode.NextSibling.NextSibling.InnerHtml; if (!string.IsNullOrEmpty(strongDateTime)) { generalYemek.TarihString = strongNode.InnerHtml; generalYemek.TarihDateTime = DatetimeSet(strongDateTime, okul); } if (!string.IsNullOrEmpty(brInnerHtml)) { generalYemek.YemekIcerik = StringClean(brInnerHtml, okul); } _generalList.Add(generalYemek); } }
/// <summary> /// Scrapes the winning results for LottoMax with help from base class. /// </summary> /// <returns></returns> public async Task ScrapeLotteryAsync() { var lotteryUrl = "http://*****:*****@class='panel-group category-accordion-LottoMax']"); var newLottoNumList = new List <string>(); // Create a temporary list to store the lottery numbers. var tempList = lottoMax.Descendants("li").Select(x => x.InnerText).ToList(); // Temporary list is then used to remove any leading 0s the website may have used. Json doesn't like leading 0s on numbers (f**k 'im). foreach (var itm in tempList) { var newItm = itm.Replace(itm, itm.TrimStart(new[] { '0' })); newLottoNumList.Add(newItm); } var bonusNum = newLottoNumList.Last(); newLottoNumList.Remove(bonusNum); var lottoMaxDrawNums = string.Join(", ", newLottoNumList); var newResults = await _formatNewLotteryResult.FormatResult(lottoMaxDrawNums, bonusNum); _writeNewResult.NewLotteryResultsWritten += _afterLottoWritten.OnResultsWritten; var writeTask = Task.Run(() => _writeNewResult.WriteNewResults("LottoMax", newResults)); await writeTask; }
private List <Category> GetArchiveProgramsSubCategories(Category category) { List <Category> subCategories = new List <Category>(); HtmlDocument doc = GetWebData <HtmlDocument>(string.Format(cUrlArchive, BaseUrl)); HtmlNodeCollection uls = doc.DocumentNode.SelectNodes("//ul[@class='index']"); foreach (HtmlNode li in uls.Descendants("li")) { string dataLetterAttribute = li.GetAttributeValue("data-letter", ""); if (!string.IsNullOrEmpty(dataLetterAttribute)) { subCategories.Add(new Category() { Name = dataLetterAttribute, ParentCategory = category, HasSubCategories = true, SubCategories = new List <Category>(), SubCategoriesDiscovered = true }); } HtmlNode a = li.SelectNodes("a").First(n => !n.GetAttributeValue("href", "").StartsWith("#")); RssLink program = new RssLink() { Name = a.InnerText, Url = a.GetAttributeValue("href", "").Replace("/", ""), ParentCategory = subCategories.Last(), HasSubCategories = false, Other = cArchiveCategory }; subCategories.Last().SubCategories.Add(program); } return(subCategories); }
public List <ScrappedAdress> getScrappedAdresses(string siteAdress, string tagPath) { string[] splitedTags = splitTags(tagPath); HtmlWeb web = new HtmlWeb(); var htmlDoc = web.Load(siteAdress); StringBuilder sb = new StringBuilder(); List <ScrappedAdress> aList = new List <ScrappedAdress>(); HtmlNodeCollection node = htmlDoc.DocumentNode.SelectNodes("//" + splitedTags[splitedTags.Length - 3]); for (int i = 0; i < aList.Count; i++) { Console.WriteLine($"Country: {aList[i].Country} \nAdress: {aList[i].ConnectionAdress} \nPort: {aList[i].Port}\n"); } foreach (var tagNode in node.Descendants(splitedTags[splitedTags.Length - 2])) { ScrappedAdressBuilder sab = new ScrappedAdressBuilder(); foreach (var tag in tagNode.Descendants(splitedTags[splitedTags.Length - 1])) { if (tag.NodeType == HtmlNodeType.Element) { sab.FilterPart(tag); } } ScrappedAdress sa = (ScrappedAdress)sab.buildScrappedObject(); if (sa.Country != "" && sa.Country != null) { aList.Add(sa); } } FilesIO.serializeList("serList", aList); return(aList); }
void ParsePools(string html) { var doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html); HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes(@"/html/body/section/div/table/tbody"); foreach (var node in nodes.Descendants("tr")) { //TODO: Set up the combo box to use these other elements? var childNodes = node.Descendants("td"); var poolName = childNodes.ElementAt(0).InnerHtml; var poolOwner = childNodes.ElementAt(1).InnerHtml; var poolSite = childNodes.ElementAt(2).InnerHtml; var poolFee = childNodes.ElementAt(3).InnerHtml; var poolAddress = childNodes.ElementAt(4).InnerHtml; var poolVerified = false; if (childNodes.ElementAt(5).InnerHtml == "Yes") { poolVerified = true; } //TODO: If I do this, I should make the drop down non-editable. Then, display the name, and keep the address hidden. Also, if verified, note that somehow. //PoolList.Add(poolName, poolAddress); comboBox_Pools.Items.Add(poolAddress); var temp = -1; } comboBox_Pools.SelectedIndex = 0; }
public void loadDetails(string TeamURL, string playername) { string urlmain = "http://www.pro-football-reference.com/teams/" + TeamURL + "/2016.htm"; string urlshort = "http://www.pro-football-reference.com/play-index/play_finder.cgi?request=1&super_bowl=0&match=summary_all&year_min=2016&year_max=2016&team_id=" + TeamURL + "&opp_id=&game_type=R&game_day_of_week=&game_num_min=0&game_num_max=99&week_num_min=0&week_num_max=99&quarter=1&quarter=2&quarter=3&quarter=4&quarter=5&tr_gtlt=lt&minutes=15&seconds=00&down=0&down=1&down=2&down=3&down=4&yds_to_go_min=&yds_to_go_max=&yg_gtlt=gt&yards=&is_first_down=-1&field_pos_min_field=team&field_pos_min=&field_pos_max_field=team&field_pos_max=&end_field_pos_min_field=team&end_field_pos_min=&end_field_pos_max_field=team&end_field_pos_max=&type=PASS&is_complete=-1&is_turnover=-1&turnover_type=interception&turnover_type=fumble&is_scoring=-1&score_type=touchdown&score_type=field_goal&score_type=safety&is_sack=-1&include_kneels=-1&no_play=0&margin_min=&margin_max=&order_by=yards&more_options=0&rush_direction=LE&rush_direction=LT&rush_direction=LG&rush_direction=M&rush_direction=RG&rush_direction=RT&rush_direction=RE&pass_location=SL&pass_location=SM&pass_location=SR"; string urldeep = "http://www.pro-football-reference.com/play-index/play_finder.cgi?request=1&super_bowl=0&match=summary_all&year_min=2016&year_max=2016&team_id=" + TeamURL + "&opp_id=&game_type=R&playoff_round=&game_location=&game_result=&game_day_of_week=&game_num_min=0&game_num_max=99&week_num_min=0&week_num_max=99&quarter=1&quarter=2&quarter=3&quarter=4&quarter=5&tr_gtlt=lt&minutes=15&seconds=00&down=0&down=1&down=2&down=3&down=4&yds_to_go_min=&yds_to_go_max=&yg_gtlt=gt&yards=&is_first_down=-1&field_pos_min_field=team&field_pos_min=&field_pos_max_field=team&field_pos_max=&end_field_pos_min_field=team&end_field_pos_min=&end_field_pos_max_field=team&end_field_pos_max=&type=PASS&is_complete=-1&is_turnover=-1&turnover_type=interception&turnover_type=fumble&is_scoring=-1&score_type=touchdown&score_type=field_goal&score_type=safety&is_sack=-1&include_kneels=-1&no_play=0&margin_min=&margin_max=&order_by=yards&more_options=0&rush_direction=LE&rush_direction=LT&rush_direction=LG&rush_direction=M&rush_direction=RG&rush_direction=RT&rush_direction=RE&pass_location=DL&pass_location=DM&pass_location=DR"; HtmlWeb main = new HtmlWeb(); HtmlDocument maindoc = main.Load(urlmain); HtmlWeb shrt = new HtmlWeb(); HtmlDocument shortdoc = shrt.Load(urlshort); HtmlWeb deep = new HtmlWeb(); HtmlDocument deepdoc = deep.Load(urldeep); maindoc.LoadHtml(urlmain); HtmlNodeCollection maintexts = maindoc.DocumentNode.SelectNodes("//table[@class='sortable stats_table now_sortable']"); var mainrows = maintexts.Descendants("tr").ToList(); List <List <String> > mainvalues = new List <List <string> >(); foreach (var row in mainrows) { List <String> values = new List <string>(); foreach (var columns in row.ChildNodes) { values.Add(columns.InnerText); } mainvalues.Add(values); } foreach (var v in mainvalues) { } }
public List <string> scrap(string pathToTheTag, string targetedTag) { HtmlWeb web = new HtmlWeb(); var htmlDoc = web.Load(Adress); HtmlNodeCollection node = htmlDoc.DocumentNode.SelectNodes(pathToTheTag); StringBuilder sb = new StringBuilder(); List <string> aList = new List <string>(); foreach (var nNode in node.Descendants(targetedTag)) { if (nNode.NodeType == HtmlNodeType.Element) { aList.Add(nNode.InnerHtml); } } return(aList); }
private void button1_Click(object sender, EventArgs e) { try { Uri uri = new Uri(textBox2.Text); WebClient client = new WebClient(); client.Encoding = Encoding.UTF8; string html = client.DownloadString(uri); textBox3.Text = html; HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html); HtmlNodeCollection hveriler = doc.DocumentNode.SelectNodes("//*[@id='mw-content-text']/div/p");//*[@id="mw-content-text"]/div/p[2]/a[1] HtmlNode baslik = doc.DocumentNode.SelectSingleNode("//*[@id='firstHeading']"); // YASİN İŞİNE YARAYACAK KISIM BURASI HtmlNodeCollection yeni = doc.DocumentNode.SelectNodes("//*[@id='Soru']/div[2]/div/div/div[2]"); var hrefs = yeni.Descendants("a") .Select(node => node.GetAttributeValue("href", "")) .ToList(); ////////////////////////////////////////// kalinbaslik = baslik.InnerText; //int paragraf_sayisi = 0; //foreach (var veri in hveriler) //{ // textBox1.Text += veri.InnerText.Replace("[kaynak belirtilmeli]", ""); // textBox1.AppendText(Environment.NewLine); textBox1.AppendText(Environment.NewLine); // paragraf_sayisi++; //} //MessageBox.Show("Yazılar Düzeltiliyor Biraz Bekleyin"); //duzenle(); } catch { textBox2.Text = "olmadı moruk"; } }
private void GetThreadData() { posts = doc.DocumentNode.SelectNodes(@"//div[@class='postContainer opContainer' or @class='postContainer replyContainer']"); postids = posts.Descendants("div").Where(d => d.Attributes["class"].Value.Contains("post op") || d.Attributes["class"].Value.Contains("post reply")).ToArray(); messages = posts.Descendants("blockquote") .Where(d => d.Attributes["class"].Value.Contains("postMessage") || d.Attributes["class"].Value.Contains("postMessage postwImage")) .ToArray(); ids = posts.Descendants("span").Where(d => d.GetClasses().Contains("posteruid")) .ToArray(); dates = posts.Descendants("span").Where(d => d.GetClasses().Contains("dateTime")) .ToArray(); picturesData = posts.Descendants("span").Where(d => d.GetClasses().Contains("fileText")) .ToArray(); picturesUri = posts.Descendants("a").Where(d => d.GetClasses().Contains("fileThumb")) .ToArray(); }
public Tuple <string, string, string, string> scrapGameData(string url, string console) // Obter toda a informação possível de um jogo { string image = ""; string releasedate = ""; string developers = ""; string publishers = ""; if (console.Equals("NES") || console.Equals("GBC") || console.Equals("GBA") || console.Equals("GB")) { HtmlDocument basepage = new HtmlAgilityPack.HtmlDocument(); WebClient cli = new WebClient(); cli.Encoding = System.Text.Encoding.UTF8; basepage.LoadHtml(cli.DownloadString("http://nintendo.wikia.com" + url)); HtmlNodeCollection rows = basepage.DocumentNode.SelectNodes("//table[@class='infobox']/tr"); foreach (HtmlNode row in rows) // Deteção de campos Developer e Publisher { HtmlNodeCollection subnodes = row.ChildNodes; if (subnodes[1].InnerText.Contains("Developer(s)")) { foreach (HtmlNode subsubnode in subnodes[2].ChildNodes) { if (subsubnode.Name == "a") { developers += (subsubnode.InnerText.Trim() + ","); } } } if (subnodes[1].InnerText.Contains("Publisher(s)")) { foreach (HtmlNode subsubnode in subnodes[2].ChildNodes) { if (subsubnode.Name == "a") { publishers += (subsubnode.InnerText.Trim() + ","); } } } } foreach (HtmlNode row in rows) { Boolean found = false; HtmlNodeCollection subnodes = row.ChildNodes; foreach (HtmlNode descendant in subnodes.Descendants()) // Deteção de um campo de imagem de capa { if (descendant.Name == "img") { foreach (HtmlAttribute attr in descendant.Attributes) { if (attr.Name == "src") { System.Diagnostics.Debug.WriteLine(attr.Value); } image = attr.Value; found = true; break; } if (found) { break; } } } if (found) { break; } } HtmlNodeCollection release_elements = basepage.DocumentNode.SelectNodes("//table[@class='infobox']/tr"); foreach (HtmlNode row in release_elements) { Boolean found = false; HtmlNodeCollection subnodes = row.ChildNodes; foreach (HtmlNode descendant in subnodes.Descendants()) // Deteção de um campo de data { if (descendant.InnerText.Contains("Release Date(s)")) { foreach (HtmlNode subdescendant in descendant.Descendants()) { if (subdescendant.Name == "td" && !subdescendant.InnerText.ToUpper().Contains("NA:") && !subdescendant.InnerText.ToUpper().Contains("JP:") && !subdescendant.InnerText.ToUpper().Contains("EU:") && !subdescendant.InnerText.ToUpper().Contains("AU:") && ( subdescendant.InnerText.ToUpper().Contains("JANUARY") || subdescendant.InnerText.ToUpper().Contains("FEBRUARY") || subdescendant.InnerText.ToUpper().Contains("MARCH") || subdescendant.InnerText.ToUpper().Contains("APRIL") || subdescendant.InnerText.ToUpper().Contains("MAY") || subdescendant.InnerText.ToUpper().Contains("JUNE") || subdescendant.InnerText.ToUpper().Contains("JULY") || subdescendant.InnerText.ToUpper().Contains("AUGUST") || subdescendant.InnerText.ToUpper().Contains("SEPTEMBER") || subdescendant.InnerText.ToUpper().Contains("OCTOBER") || subdescendant.InnerText.ToUpper().Contains("NOVEMBER") || subdescendant.InnerText.ToUpper().Contains("DECEMBER"))) { releasedate = subdescendant.InnerText; found = true; break; } } if (found) { break; } } } if (found) { break; } } } else { // SEGA wiki went down! :( } return(Tuple.Create(image, releasedate, developers, publishers)); }
private static IEnumerable <HtmlNode> GetBody(HtmlDocument html) { HtmlNodeCollection nc = html.DocumentNode.SelectNodes("//body"); return(nc.Descendants()); }
//Query website peopleperhour public async Task <List <Job> > pph() { //clear arrays /lists before use jobsToReturn.Clear(); timeList.Clear(); priceList.Clear(); priceList.Clear(); System.Console.WriteLine("Class SiteSearch: Start"); //phase 1: connect to site // the site to check on site = await httpClient.GetAsync("https://www.peopleperhour.com/freelance-jobs"); //phase 2: load the site siteString = await site.Content.ReadAsStringAsync(); System.Console.WriteLine("HAP: Start"); doc.LoadHtml(siteString); //phase 3: look for specific nodes //Select nodes of different locations. Needed information are separated preLinks = doc.DocumentNode.SelectNodes("//div[contains(@class, 'main-content full-width')]//h6[contains(@class, 'title')]"); preTime = doc.DocumentNode.SelectNodes("//div[contains(@class, 'main-content full-width')]//ul[contains(@class, 'clearfix member-info horizontal crop hidden-xs')]"); preProposalCount = doc.DocumentNode.SelectNodes("//div[contains(@class, 'main-content full-width')]//span[contains(@class, 'value proposal-count')]"); prePriceTag = doc.DocumentNode.SelectNodes("//div[contains(@class, 'main-content full-width')]//div[contains(@class, 'price-tag')]"); System.Console.WriteLine("HAP: precount {0}", preLinks.Count); //phase 4: select all or specific elements in nodes //select tags on which specific queries will be run links = preLinks.Descendants("a"); time = preTime.Descendants("time"); proposals = preProposalCount.Nodes(); price = prePriceTag.Descendants("span"); isFixedSalary = prePriceTag.Descendants("small"); //phase 5: add selected elements to a list foreach (var node in isFixedSalary) { isFixedSalaryList.Add(node.InnerText); System.Console.WriteLine("isFixedSalaryList added this: {0}", node.InnerText); } //querying elements that are located in different nodes foreach (var node in price) { priceList.Add(node.InnerText); System.Console.WriteLine("priceList added this: {0}", node.InnerText); } //querying elements that are located in different nodes foreach (var node in proposals) { proposalList[foreachInteration] = node.InnerText; System.Console.WriteLine("proposallist added this: {0}", node.InnerText); foreachInteration++; } // reset foreachIteration for later use foreachInteration = 0; //querying elements that are located in different nodes foreach (var node in time) { DateTime timePosted = Convert.ToDateTime(node.GetAttributeValue("datetime", string.Empty)); timeList.Add(timePosted); } //phase 6: unify the collected elements in one object // "Main" foreach where all the data are collected into a job object, then written to a List<Job> foreach (var node in links) { Job job = new Job(); System.Console.WriteLine("foreach: {0}", foreachInteration); job.Title = node.GetAttributeValue("title", string.Empty); job.URL = node.GetAttributeValue("href", string.Empty); job.Time = timeList[foreachInteration]; job.ProposalNum = proposalList[foreachInteration]; job.Salary = priceList[foreachInteration]; job.isFixedSalary = isFixedSalaryList[foreachInteration]; //Check that the jobs were posted within a specified timeframe from now. if (job.Time > DateTime.Now.Add(filterTime)) { jobsToReturn.Add(job); } foreachInteration++; } foreachInteration = 0; System.Console.WriteLine("HAP: Finish"); System.Console.WriteLine("Class SiteSearch: return: site"); return(jobsToReturn); }
//------------------------------------------------------ Workana query ------------------ public async Task <List <Job> > workana() { jobsToReturn.Clear(); timeList.Clear(); System.Console.WriteLine("Class SiteSearch: Start"); //phase 1: connect to site // the site to check on site = await httpClient.GetAsync("https://www.workana.com/en/jobs?category=it-programming"); //phase 2: load the site siteString = await site.Content.ReadAsStringAsync(); System.Console.WriteLine("HAP: Start"); doc.LoadHtml(siteString); //phase 3: look for specific nodes //Select nodes of different locations. Needed information are separated preLinks = doc.DocumentNode.SelectNodes("//div[contains(@class, 'col-sm-12 col-md-8 search-results')]//h2[contains(@class, 'h2 project-title')]"); preTime = doc.DocumentNode.SelectNodes("//div[contains(@class, 'col-sm-12 col-md-8 search-results')]//div[contains(@class, 'project-header')]"); preProposalCount = doc.DocumentNode.SelectNodes("//div[contains(@class, 'col-sm-12 col-md-8 search-results')]//span[contains(@class, 'bids')]"); System.Console.WriteLine("HAP: precount {0}", preLinks.Count); //phase 4: select all or specific elements in nodes //select tags on which specific queries will be run links = preLinks.Descendants("a"); time = preTime.Descendants("h5"); proposals = preProposalCount.Nodes(); //phase 5: add selected elements to a list //querying elements that are located in different nodes foreach (var node in proposals) { if (int.TryParse(node.InnerText, out int n)) { proposalList[foreachInteration] = node.InnerText; } // System.Console.WriteLine("proposallist passed on this: {0}", node.InnerText); } foreachInteration++; // reset foreachIteration for later use foreachInteration = 0; //querying elements that are located in different nodes foreach (var node in time) { //System.Console.WriteLine(foreachInteration); DateTime timePosted = Convert.ToDateTime(node.GetAttributeValue("title", "01/01/2000 01.01.01")); System.Console.WriteLine("time added: " + timePosted); timeList.Add(timePosted); foreachInteration++; } // reset foreachIteration for later use foreachInteration = 0; //phase 6: unify the collected elements in one object // "Main" foreach where all the data are collected into a job object, then written to a List<Job> foreach (var node in links) { Job job = new Job(); job.Title = defaultValue; job.URL = defaultValue; job.ProposalNum = defaultValue; job.Salary = defaultValue; job.isFixedSalary = defaultValue; //System.Console.WriteLine("foreach: {0}", foreachInteration); job.Title = node.InnerText; job.URL = "https://www.workana.com" + node.GetAttributeValue("href", string.Empty); job.Time = timeList[foreachInteration].Add(workanaTimezoneCorrection); job.ProposalNum = proposalList[foreachInteration]; System.Console.WriteLine("time added: " + job.Time + " with title: " + job.Title); //Check that the jobs were posted within a specified timeframe from now. if (job.Time > DateTime.Now.Add(filterTime)) { jobsToReturn.Add(job); } foreachInteration++; } foreachInteration = 0; System.Console.WriteLine("HAP: Finish"); System.Console.WriteLine("Class SiteSearch: return: site"); return(jobsToReturn); }
public async Task <ReleaseNotesHistory> ParseAsync(string html) { HtmlWeb web = new HtmlWeb(); web.UserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36"; web.PreRequest = OnPreRequest; HtmlDocument html_doc = await web.LoadFromWebAsync(html); HtmlNodeCollection nodes = null; HtmlNode node_title = html_doc.DocumentNode.SelectSingleNode("//head/title"); if (node_title.InnerText == "Moved Temporarily") { HtmlNodeCollection node_href = html_doc.DocumentNode.SelectNodes("//A"); html_doc = await web.LoadFromWebAsync(html); } if ( html == ReleaseNotesUrls.AndroidX.Stable || html == ReleaseNotesUrls.AndroidX.RC || html == ReleaseNotesUrls.AndroidX.Beta || html == ReleaseNotesUrls.AndroidX.Alpha ) { nodes = html_doc.DocumentNode.SelectNodes ( //"//head/title" //"//h3[@data-text]" //"//div[contains(@class, 'devsite-article-body')]" ".//*[contains(@class, 'devsite-article-body')]" ); } if ( html == ReleaseNotesUrls.AndroidX.All ) { nodes = html_doc.DocumentNode.SelectNodes ( //"//head/title" //"//h3[@data-text]" ".//*[contains(@class, 'devsite-article-body')]" ); } List <ReleaseNote> release_notes = release_notes = new List <ReleaseNote>(); ReleaseNote rn = null; foreach (HtmlNode node in nodes.Descendants()) { if (node.Name == "h3") { string inner_html = node.InnerHtml; DateTime date_time_release = DateTime.Parse(inner_html); rn = new ReleaseNote() { Date = date_time_release }; release_notes.Add(rn); } if (node.Name == "ul") { foreach (HtmlNode node_ul in node.ChildNodes) { if (node_ul.Name == "li") { string inner_text = node_ul.InnerText; rn.Artifacts.Add(inner_text); } } } } ReleaseNotesHistory release_notes_history = new ReleaseNotesHistory() { Count = release_notes.Count, Date = DateTime.Today, ReleaseNotes = release_notes }; return(release_notes_history); }