/// <summary> /// 通过xPath抓取Nodes的内部html /// </summary> /// <param name="html"></param> /// <param name="xpath"></param> /// <returns></returns> public static string SelectNodesHtml(this string html, string xpath) { HtmlNodeCollection res = SelectNodes(html, xpath); if (res == null) { return("null"); } return(string.Join("\r\n", res.Nodes().Select(c => c.InnerHtml))); }
private string GetSha256FromHtmlTable(HtmlNode fileDownloadNode, HtmlNodeCollection nodeCollectionWithSha256Substring) { var result = string.Empty; var nodes = nodeCollectionWithSha256Substring.Nodes(); foreach (var node in nodes) { var tableHeaderCell = _htmlParser.GetInnerNodeWithSubstring(node, SHA256); if (!_htmlParser.IsNodePartOfTableHeader(tableHeaderCell)) { continue; } result = _htmlParser.GetDataFromTableByHeaderAndRow(tableHeaderCell, fileDownloadNode); if (!result.Equals(string.Empty)) { break; } } return(result); }
private void GetMyWishList() { richTextBox1.AppendText(Environment.NewLine + DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss") + " Hello"); DataTable wishListDt = new DataTable(); wishListDt.Columns.Add("Brand", typeof(string)); wishListDt.Columns.Add("ProductName", typeof(string)); wishListDt.Columns.Add("ProductNum", typeof(string)); wishListDt.Columns.Add("PriceKr", typeof(string)); wishListDt.Columns.Add("PriceUS", typeof(string)); wishListDt.Columns.Add("BuyStatusName", typeof(string)); wishListDt.Columns.Add("ProductCode", typeof(string)); int maxPage = 10; for (int i = 1; i <= maxPage; i++) { string wishListHtml = _loggin.GetResponseString("http://www.ssgdfm.com/shop/mypage/wish/listWish?page=" + i.ToString() + "&hash=pageHash", container); HtmlAgilityPack.HtmlDocument mydoc = new HtmlAgilityPack.HtmlDocument(); mydoc.LoadHtml(wishListHtml); HtmlNodeCollection nodeCollection = mydoc.DocumentNode.SelectNodes("//div[@class='list wish-product']//tbody//tr"); HtmlNodeCollection pageNode = mydoc.DocumentNode.SelectNodes("//div[@class='pager']//a"); pageNode.Nodes().Last().Remove(); string temp = pageNode.Nodes().Last().InnerText; //임시처리 if (!string.IsNullOrEmpty(temp)) { int tempPage = Convert.ToInt32(temp); maxPage = tempPage; if (tempPage % 10 > 0) { maxPage++; } } HtmlNode tempNode = null; foreach (HtmlNode node in nodeCollection) { tempNode = HtmlNode.CreateNode(node.OuterHtml); DataRow row = wishListDt.NewRow(); row["Brand"] = tempNode.SelectSingleNode("//div[@class='product-info']//p[@class='brand']//a").InnerText; row["ProductName"] = tempNode.SelectSingleNode("//p[@class='product']//a").InnerText; row["ProductCode"] = tempNode.SelectSingleNode("//td[@class='btn']//div[@class='labox']").Attributes["id"].Value.Substring(2); row["ProductNum"] = tempNode.SelectSingleNode("//p[@class='product-num']").InnerText.Replace("<span>REF. NO :</span>", ""); row["PriceKr"] = tempNode.SelectSingleNode("//p[@class='price']//span[@class='nation-currency']").InnerText; row["PriceUS"] = tempNode.SelectSingleNode("//p[@class='price']//span[@class='us-currency']").InnerText; row["BuyStatusName"] = tempNode.SelectSingleNode("//td[@class='buy']//span[@class='check-on']").InnerText; wishListDt.Rows.Add(row); } } richTextBox1.AppendText(Environment.NewLine + DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss") + " Handle"); dataGridView2.DataSource = wishListDt; richTextBox1.AppendText(Environment.NewLine + DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss") + " END"); }
//Query website peopleperhour public async Task <List <Job> > pph() { //clear arrays /lists before use jobsToReturn.Clear(); timeList.Clear(); priceList.Clear(); priceList.Clear(); System.Console.WriteLine("Class SiteSearch: Start"); //phase 1: connect to site // the site to check on site = await httpClient.GetAsync("https://www.peopleperhour.com/freelance-jobs"); //phase 2: load the site siteString = await site.Content.ReadAsStringAsync(); System.Console.WriteLine("HAP: Start"); doc.LoadHtml(siteString); //phase 3: look for specific nodes //Select nodes of different locations. Needed information are separated preLinks = doc.DocumentNode.SelectNodes("//div[contains(@class, 'main-content full-width')]//h6[contains(@class, 'title')]"); preTime = doc.DocumentNode.SelectNodes("//div[contains(@class, 'main-content full-width')]//ul[contains(@class, 'clearfix member-info horizontal crop hidden-xs')]"); preProposalCount = doc.DocumentNode.SelectNodes("//div[contains(@class, 'main-content full-width')]//span[contains(@class, 'value proposal-count')]"); prePriceTag = doc.DocumentNode.SelectNodes("//div[contains(@class, 'main-content full-width')]//div[contains(@class, 'price-tag')]"); System.Console.WriteLine("HAP: precount {0}", preLinks.Count); //phase 4: select all or specific elements in nodes //select tags on which specific queries will be run links = preLinks.Descendants("a"); time = preTime.Descendants("time"); proposals = preProposalCount.Nodes(); price = prePriceTag.Descendants("span"); isFixedSalary = prePriceTag.Descendants("small"); //phase 5: add selected elements to a list foreach (var node in isFixedSalary) { isFixedSalaryList.Add(node.InnerText); System.Console.WriteLine("isFixedSalaryList added this: {0}", node.InnerText); } //querying elements that are located in different nodes foreach (var node in price) { priceList.Add(node.InnerText); System.Console.WriteLine("priceList added this: {0}", node.InnerText); } //querying elements that are located in different nodes foreach (var node in proposals) { proposalList[foreachInteration] = node.InnerText; System.Console.WriteLine("proposallist added this: {0}", node.InnerText); foreachInteration++; } // reset foreachIteration for later use foreachInteration = 0; //querying elements that are located in different nodes foreach (var node in time) { DateTime timePosted = Convert.ToDateTime(node.GetAttributeValue("datetime", string.Empty)); timeList.Add(timePosted); } //phase 6: unify the collected elements in one object // "Main" foreach where all the data are collected into a job object, then written to a List<Job> foreach (var node in links) { Job job = new Job(); System.Console.WriteLine("foreach: {0}", foreachInteration); job.Title = node.GetAttributeValue("title", string.Empty); job.URL = node.GetAttributeValue("href", string.Empty); job.Time = timeList[foreachInteration]; job.ProposalNum = proposalList[foreachInteration]; job.Salary = priceList[foreachInteration]; job.isFixedSalary = isFixedSalaryList[foreachInteration]; //Check that the jobs were posted within a specified timeframe from now. if (job.Time > DateTime.Now.Add(filterTime)) { jobsToReturn.Add(job); } foreachInteration++; } foreachInteration = 0; System.Console.WriteLine("HAP: Finish"); System.Console.WriteLine("Class SiteSearch: return: site"); return(jobsToReturn); }
//------------------------------------------------------ Workana query ------------------ public async Task <List <Job> > workana() { jobsToReturn.Clear(); timeList.Clear(); System.Console.WriteLine("Class SiteSearch: Start"); //phase 1: connect to site // the site to check on site = await httpClient.GetAsync("https://www.workana.com/en/jobs?category=it-programming"); //phase 2: load the site siteString = await site.Content.ReadAsStringAsync(); System.Console.WriteLine("HAP: Start"); doc.LoadHtml(siteString); //phase 3: look for specific nodes //Select nodes of different locations. Needed information are separated preLinks = doc.DocumentNode.SelectNodes("//div[contains(@class, 'col-sm-12 col-md-8 search-results')]//h2[contains(@class, 'h2 project-title')]"); preTime = doc.DocumentNode.SelectNodes("//div[contains(@class, 'col-sm-12 col-md-8 search-results')]//div[contains(@class, 'project-header')]"); preProposalCount = doc.DocumentNode.SelectNodes("//div[contains(@class, 'col-sm-12 col-md-8 search-results')]//span[contains(@class, 'bids')]"); System.Console.WriteLine("HAP: precount {0}", preLinks.Count); //phase 4: select all or specific elements in nodes //select tags on which specific queries will be run links = preLinks.Descendants("a"); time = preTime.Descendants("h5"); proposals = preProposalCount.Nodes(); //phase 5: add selected elements to a list //querying elements that are located in different nodes foreach (var node in proposals) { if (int.TryParse(node.InnerText, out int n)) { proposalList[foreachInteration] = node.InnerText; } // System.Console.WriteLine("proposallist passed on this: {0}", node.InnerText); } foreachInteration++; // reset foreachIteration for later use foreachInteration = 0; //querying elements that are located in different nodes foreach (var node in time) { //System.Console.WriteLine(foreachInteration); DateTime timePosted = Convert.ToDateTime(node.GetAttributeValue("title", "01/01/2000 01.01.01")); System.Console.WriteLine("time added: " + timePosted); timeList.Add(timePosted); foreachInteration++; } // reset foreachIteration for later use foreachInteration = 0; //phase 6: unify the collected elements in one object // "Main" foreach where all the data are collected into a job object, then written to a List<Job> foreach (var node in links) { Job job = new Job(); job.Title = defaultValue; job.URL = defaultValue; job.ProposalNum = defaultValue; job.Salary = defaultValue; job.isFixedSalary = defaultValue; //System.Console.WriteLine("foreach: {0}", foreachInteration); job.Title = node.InnerText; job.URL = "https://www.workana.com" + node.GetAttributeValue("href", string.Empty); job.Time = timeList[foreachInteration].Add(workanaTimezoneCorrection); job.ProposalNum = proposalList[foreachInteration]; System.Console.WriteLine("time added: " + job.Time + " with title: " + job.Title); //Check that the jobs were posted within a specified timeframe from now. if (job.Time > DateTime.Now.Add(filterTime)) { jobsToReturn.Add(job); } foreachInteration++; } foreachInteration = 0; System.Console.WriteLine("HAP: Finish"); System.Console.WriteLine("Class SiteSearch: return: site"); return(jobsToReturn); }
public void ShowPlayerInfo(object character) { try { var res = _webService.SendRequest(_urlGetCharacter + character); HtmlNodeCollection temp = res.DocumentNode.SelectNodes("//div[contains(@class, 'BoxContent')]/table[1]"); lblFormerNamesVar.Text = "None"; lblFormerWorldVar.Text = "None"; lblGuildVar.Text = "None"; foreach (var node in temp.Nodes()) { if (node.FirstChild.InnerText.Equals("Name:")) { lblNameVar.Text = node.LastChild.InnerText.TrimEnd(); } else if (node.FirstChild.InnerText.Equals("Former Names:")) { lblFormerNamesVar.Text = node.LastChild.InnerText.TrimEnd(); } else if (node.FirstChild.InnerText.Equals("Sex:")) { lblSexVar.Text = node.LastChild.InnerText.TrimEnd(); } else if (node.FirstChild.InnerText.Equals("Vocation:")) { lblVocationVar.Text = node.LastChild.InnerText.TrimEnd(); } else if (node.FirstChild.InnerText.Equals("Level:")) { lblLevelVar.Text = node.LastChild.InnerText.TrimEnd(); } else if (node.FirstChild.InnerText.Equals("Achievement Points:")) { lblAchPointsVar.Text = node.LastChild.InnerText.TrimEnd(); } else if (node.FirstChild.InnerText.Equals("World:")) { lblWorldVar.Text = node.LastChild.InnerText.TrimEnd(); } else if (node.FirstChild.InnerText.Equals("Former World:")) { lblFormerWorldVar.Text = node.LastChild.InnerText.TrimEnd(); } else if (node.FirstChild.InnerText.Equals("Residence:")) { lblResidenceVar.Text = node.LastChild.InnerText.TrimEnd(); } else if (node.FirstChild.InnerText.Equals("Guild Membership:")) { lblGuildVar.Text = node.LastChild.InnerText.Replace(" ", " ").TrimEnd(); } else if (node.FirstChild.InnerText.Equals("Last Login:"******" ", " ").TrimEnd(); } else if (node.FirstChild.InnerText.Equals("Account Status:")) { lblAccountStatusVar.Text = node.LastChild.InnerText.TrimEnd(); } } } catch (NullReferenceException e) { //Log this in logger Console.Out.WriteLine("NullReferenceException caught while setting labels:\n" + e.Message); } }