protected override void ParseLobbyPage(string url, string doc, out DateTimeOffset serverTime, ref List <ForumThread> threadList) { Int32 threadId = VBulletinForum.ThreadIdFromUrl(url); var html = new HtmlAgilityPack.HtmlDocument(); html.LoadHtml(doc); HtmlAgilityPack.HtmlNode root = html.DocumentNode; serverTime = DateTime.Now; HtmlAgilityPack.HtmlNode timeNode = root.SelectNodes("//div[@id='footer_time']").Last(); if (timeNode != null) { String timeText = timeNode.InnerText; serverTime = Utils.Misc.ParsePageTime(timeText, DateTime.UtcNow); } HtmlAgilityPack.HtmlNodeCollection threads = root.SelectNodes("//tbody[contains(@id, 'threadbits_forum_')]/tr"); if (threads == null) { return; } foreach (HtmlAgilityPack.HtmlNode thread in threads) { ForumThread t = HtmlToThread(threadId, thread, serverTime); if (t != null) { threadList.Add(t); } } }
protected void Page_Load(object sender, EventArgs e) { string heads = @"Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8 Accept-Encoding:gzip, deflate Accept-Language:zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2 Cache-Control:max-age=0 Connection:keep-alive Cookie:showNav=#nav-tab|0|0; navCtgScroll=0; cy=1; cye=shanghai; _lxsdk_cuid=1693813fdecc8-062bf66f365a768-11666e4a-384000-1693813fdecc8; _lxsdk_s=1693813fded-ea2-2e7-d89%7C%7C51; _lxsdk=1693813fdecc8-062bf66f365a768-11666e4a-384000-1693813fdecc8; _hc.v=7216e9e3-be12-eff4-1836-49d9b0c4b0ce.1551424029; s_ViewType=10 Host:www.dianping.com Upgrade-Insecure-Requests:1 User-Agent:Mozilla/5.0 (Windows NT 10.0; WOW64; rv:65.0) Gecko/20100101 Firefox/65.0"; string url = "http://www.dianping.com/search/keyword/1/0_%E8%8B%B1%E8%AF%AD%E5%9F%B9%E8%AE%AD/r842"; ClassHttpRequestClient s = new ClassHttpRequestClient(true); string content = ""; string response = s.httpPost(url, heads, content, Encoding.UTF8); // Response.Write(response); // 第一步声明HtmlAgilityPack.HtmlDocument实例 HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); //第二步加载html文档 doc.LoadHtml(response); HtmlAgilityPack.HtmlNodeCollection collection = doc.DocumentNode.SelectNodes("//div[@class=\"txt\"]"); StringBuilder sb = new StringBuilder(); foreach (HtmlAgilityPack.HtmlNode item in collection) { HtmlAgilityPack.HtmlNode divtit = item.SelectNodes("div[@class=\"tit\"]")[0]; HtmlAgilityPack.HtmlNode aname = divtit.SelectNodes("a[1]")[0]; //divtit下面的第一个超级链接 HtmlAgilityPack.HtmlNode divcomment = item.SelectNodes("div[@class=\"comment\"]")[0]; HtmlAgilityPack.HtmlNode anum = divcomment.SelectNodes("a[1]")[0]; //divcomment下面的第一个超级链接 HtmlAgilityPack.HtmlNode aprice = divcomment.SelectNodes("a[2]")[0]; //divcomment下面的第二个超级链接 sb.Append(string.Format("{0}---{1}---{2}</br>", aname.InnerText, anum.InnerText, aprice.InnerText)); } Response.Write(sb); }
private static void ParseStopsTable(OneWayLine line, HtmlAgilityPack.HtmlNode routeNode, string langName) { int stopCounter = 0; HtmlAgilityPack.HtmlNode titleNode = routeNode.SelectNodes("tr[1]/td[1]/table[1]/tr[1]/td[1]")[0]; line.From[langName] = titleNode.ChildNodes[0].InnerText.Trim(); line.To[langName] = titleNode.ChildNodes[4].InnerText.Trim(); line.Name[langName] = String.Format("{0} - {1}", line.From[langName], line.To[langName]); HtmlAgilityPack.HtmlNodeCollection rows = routeNode.SelectNodes("tr[1]/td[1]/table[1]/tr[@class='SmallTableRow ']"); if (rows == null) { Console.WriteLine(routeNode.InnerHtml); Console.WriteLine("null collection"); } else { foreach (HtmlAgilityPack.HtmlNode stopRowNode in rows) { //Console.WriteLine (stopNode.InnerHtml); string stopName = stopRowNode.ChildNodes[1].InnerHtml.Trim(); //Console.WriteLine("Name: " + stopName); HtmlAgilityPack.HtmlNode linkNode = stopRowNode.ChildNodes[7].ChildNodes[1]; string link = linkNode.Attributes["href"].Value; //Console.WriteLine("Link: " + link); Uri linkUri = null; Uri.TryCreate(BusCoIlParser.baseUri, link, out linkUri); NameValueCollection col = System.Web.HttpUtility.ParseQueryString(linkUri.Query); if (line.Stops.ContainsKey(stopCounter)) { line.Stops[stopCounter].Name[langName] = stopName; stopCounter++; } else { Stop s = new Stop() { PlaceID = col["PlaceID"] ?? col["PlaceID1"] }; s.Name[langName] = stopName; if (s.PlaceID == null) { throw new ApplicationException(); } line.Stops.Add(stopCounter++, s); } //Console.WriteLine("----"); } } }
} // End Function GetProxyArray public static void GetProxyList(string htmlFile, string jsonFile) { System.Data.DataTable dt = new System.Data.DataTable(); string html = null; if (System.IO.File.Exists(htmlFile)) { html = System.IO.File.ReadAllText(htmlFile); } if (html == null) { using (System.Net.WebClient wc = new System.Net.WebClient()) { html = wc.DownloadString("https://free-proxy-list.net/"); System.IO.File.WriteAllText(htmlFile, html, System.Text.Encoding.UTF8); } // End Using wc } // End if (html == null) HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html); string selector = "//table[@id='proxylisttable']"; HtmlAgilityPack.HtmlNode tableNode = doc.DocumentNode.SelectSingleNode(selector); System.Console.WriteLine(tableNode); HtmlAgilityPack.HtmlNodeCollection ths = tableNode.SelectNodes("./thead/tr/th"); foreach (HtmlAgilityPack.HtmlNode th in ths) { dt.Columns.Add(th.InnerText, typeof(string)); } // Next th HtmlAgilityPack.HtmlNodeCollection trs = tableNode.SelectNodes("./tbody/tr"); foreach (HtmlAgilityPack.HtmlNode tr in trs) { System.Data.DataRow dr = dt.NewRow(); int i = 0; HtmlAgilityPack.HtmlNodeCollection tds = tr.SelectNodes("./td"); foreach (HtmlAgilityPack.HtmlNode td in tds) { // System.Console.WriteLine(td); dr[i] = td.InnerText; ++i; } // Next td dt.Rows.Add(dr); } // Next tr string json = Newtonsoft.Json.JsonConvert.SerializeObject(dt, Newtonsoft.Json.Formatting.Indented); System.IO.File.WriteAllText(jsonFile, json, System.Text.Encoding.UTF8); } // End Function GetProxyList
protected List <Bold> ParseBolded(HtmlAgilityPack.HtmlNode original) { List <Bold> bolded = new List <Bold>(); HtmlAgilityPack.HtmlNode content = original.CloneNode("Votes", true); RemoveQuotes(content); // strip out quotes List <String> goodColors = new List <string>() { _voteColor }; //"darkolivegreen", "darkgreen", "yellowgreen", "seagreen", //"lime", "palegreen", "olive", "green" RemoveColors(content, goodColors); // strip out colors RemoveNewlines(content); // strip out newlines if (_voteColor == "") { // look for plain bold HtmlAgilityPack.HtmlNodeCollection bolds = content.SelectNodes("child::b"); if (bolds != null) { BoldsFromSet(bolds, bolded); } } else { // look for color,bold. foreach (var n in content.SelectNodes("descendant::font") ?? new HtmlAgilityPack.HtmlNodeCollection(content)) { HtmlAgilityPack.HtmlNodeCollection colorbolds = n.SelectNodes("child::b"); if (colorbolds != null) { BoldsFromSet(colorbolds, bolded); } } // look for bold,color. HtmlAgilityPack.HtmlNodeCollection bolds = content.SelectNodes("descendant::b"); foreach (var n in bolds ?? new HtmlAgilityPack.HtmlNodeCollection(content)) { HtmlAgilityPack.HtmlNodeCollection boldcolors = n.SelectNodes("child::font"); if (boldcolors != null) { BoldsFromSet(boldcolors, bolded); } } // look for span w/color HtmlAgilityPack.HtmlNodeCollection boldspan = content.SelectNodes("descendant::span[starts-with(@style,\"color:red;font-weight:bold;\")]"); if (boldspan != null) { BoldsFromSet(boldspan, bolded); } } return(bolded); }
protected virtual void ParseThreadPage(String url, String doc, out Int32 lastPageNumber, out DateTimeOffset serverTime, ref Posts postList) { Int32 threadId = VBulletinForum.ThreadIdFromUrl(url); lastPageNumber = 0; var html = new HtmlAgilityPack.HtmlDocument(); html.LoadHtml(doc); HtmlAgilityPack.HtmlNode root = html.DocumentNode; serverTime = DateTime.Now; //(//div[class="smallfont", align="center'])[last()] All times are GMT ... The time is now <span class="time">time</span>"." HtmlAgilityPack.HtmlNode timeNode = root.SelectNodes("//div[@class='smallfont'][@align='center']/span[@class='time']/..").Last(); if (timeNode != null) { String timeText = timeNode.InnerText; serverTime = Utils.Misc.ParsePageTime(timeText, DateTime.UtcNow); } // find total posts: /table/tr[1]/td[2]/div[@class="pagenav"]/table[1]/tr[1]/td[1] -- Page 106 of 106 HtmlAgilityPack.HtmlNode pageNode = root.SelectSingleNode("//div[@class='pagenav']/table/tr/td"); if (pageNode != null) { string pages = pageNode.InnerText; Match m = Regex.Match(pages, @"Page (\d+) of (\d+)"); if (m.Success) { //Trace.TraceInformation("{0}/{1}", m.Groups[1].Value, m.Groups[2].Value); lastPageNumber = Convert.ToInt32(m.Groups[2].Value); } } // //div[@id='posts']/div/div/div/div/table/tbody/tr[2] // td[1]/div[1] has (id with post #, <a> with user id, user name.) // td[2]/div[1] has title // td[2]/div[2] has post // "/html[1]/body[1]/table[2]/tr[2]/td[1]/td[1]/div[2]/div[1]/div[1]/div[1]/div[1]/table[1]/tr[2]/td[2]/div[2]" is a post HtmlAgilityPack.HtmlNodeCollection posts = root.SelectNodes("//div[@id='posts']//div[contains(@id, 'edit')]/table/tr[2]/td[2]/div[contains(@id, 'post_message_')]"); if (posts == null) { return; } postList = new Posts(); foreach (HtmlAgilityPack.HtmlNode post in posts) { Post p = HtmlToPost(threadId, post, serverTime); if (p != null) { postList.Add(p); } } }
public List <KeyValuePair <int, int> > GetTableHeaderIndexes(HtmlAgilityPack.HtmlNode table, params String[] vals) { List <KeyValuePair <int, int> > ret = new List <KeyValuePair <int, int> >(); HtmlAgilityPack.HtmlNodeCollection rows = table.SelectNodes("tr"); HtmlAgilityPack.HtmlNodeCollection cells = null; if (rows != null) { HtmlAgilityPack.HtmlNode header = rows[0]; cells = header.SelectNodes("th|td"); } else { HtmlAgilityPack.HtmlNodeCollection thead = table.SelectNodes("thead"); if (thead != null) { rows = thead[0].SelectNodes("tr"); cells = rows[0].SelectNodes("th|td"); } } int colAbsIdx = 0; for (int i = 0; i < cells.Count; ++i) { HtmlAgilityPack.HtmlNode cell = cells[i]; String txt = cell.InnerText.Trim(); for (int j = 0; j < vals.Length; ++j) { String match = vals[j]; if (txt.IndexOf(match) > -1) { ret.Add(new KeyValuePair <int, int>(colAbsIdx, j)); break; } } HtmlAgilityPack.HtmlAttribute colspan_attr = cell.Attributes["colspan"]; int colSpan = 1; if (colspan_attr != null) { if (!int.TryParse(colspan_attr.Value, out colSpan)) { colSpan = 1; } } colAbsIdx += colSpan; } return(ret); }
public static string Submit(this HtmlAgilityPack.HtmlNode form, Encoding encoding) { if (form == null) { throw new ArgumentNullException(nameof(form)); } if (encoding == null) { throw new ArgumentNullException(nameof(encoding)); } string postDataStr = form == null ? string.Empty : form.SelectNodes("//input").ToList() .ToDictionary( (input => input.GetAttributeValue("name", string.Empty)), (input => HttpUtility.UrlEncode(input.GetAttributeValue("value", string.Empty))) ) .SerializeData <string>(); byte[] responseData = HttpRequestUtil.Post(form?.GetAttributeValue("action", null), encoding.GetBytes(postDataStr) ); return(encoding.GetString(responseData)); }
public static void RemoveNewlines(HtmlAgilityPack.HtmlNode node) { foreach (var n in node.SelectNodes("descendant::br") ?? new HtmlAgilityPack.HtmlNodeCollection(node)) { n.Remove(); } }
static public IDialogueMissionRewardAtom RewardAtomFromHtmlNode(this HtmlAgilityPack.HtmlNode htmlNode) { if (null == htmlNode) { return(null); } var SetComponentTypeAndAmount = new Dictionary <RewardTypeEnum, int>(); var SetComponentNode = htmlNode?.SelectNodes(".//tr"); foreach (var Node in SetComponentNode.EmptyIfNull()) { var ImageSrc = Node?.SelectSingleNode(".//img")?.GetAttributeValue("src", ""); var RewardType = RewardTypeFromImageSrc.TryGetValueNullable(ImageSrc); var Amount = (int?)Node?.InnerText?.RegexMatchIfSuccess(Number.DefaultNumberFormatRegexAllowLeadingAndTrailingChars)?.Value?.NumberParseDecimal(); if (!RewardType.HasValue || !Amount.HasValue) { continue; } SetComponentTypeAndAmount[RewardType.Value] = Amount.Value; } return(new DialogueMissionRewardAtom() { Html = htmlNode?.OuterHtml, ISK = SetComponentTypeAndAmount?.TryGetValueNullable(RewardTypeEnum.ISK), LP = SetComponentTypeAndAmount?.TryGetValueNullable(RewardTypeEnum.LP), }); }
static public DialogueMissionObjective ParseObjectiveAtom(this HtmlAgilityPack.HtmlNode htmlNode) { if (null == htmlNode) { return(null); } try { var ListTableCell = htmlNode?.SelectNodes(".//td"); var CompletionCell = ListTableCell?.FirstOrDefault(); bool?CompleteSelf = null; if (CompletionCell?.InnerHtml?.RegexMatchSuccessIgnoreCase(Regex.Escape("38_193")) ?? false) { CompleteSelf = true; } if (CompletionCell?.InnerHtml?.RegexMatchSuccessIgnoreCase(Regex.Escape("38_195")) ?? false) { CompleteSelf = false; } var TypeCell = ListTableCell?.ElementAtOrDefault(2); var LastCell = ListTableCell?.LastOrDefault(); var TypeEnum = TypeCell?.InnerText?.Trim()?.ObjectiveAtomTypeEnumFromTableDialogueText(); DialogueMissionLocation Location = null; DialogueMissionObjectiveItem Item = null; if (new[] { DialogueMissionObjectiveAtomTypeEnum.Location, DialogueMissionObjectiveAtomTypeEnum.LocationPickUp, DialogueMissionObjectiveAtomTypeEnum.LocationDropOff }.CastToNullable() .Contains(TypeEnum)) { Location = MissionLocationFromDialogue(LastCell); } if (new[] { DialogueMissionObjectiveAtomTypeEnum.Item, DialogueMissionObjectiveAtomTypeEnum.Cargo }.CastToNullable() .Contains(TypeEnum)) { Item = ObjectiveItemFromDialogueText(LastCell?.InnerText); } return(new DialogueMissionObjective() { Html = htmlNode?.OuterHtml, TypeEnum = TypeEnum, Location = Location, Item = Item, CompleteSelf = CompleteSelf, }); } catch { return(null); } }
public override IList <string> SelectList(HtmlAgilityPack.HtmlNode element) { List <string> result = new List <string>(); var nodes = element.SelectNodes(_xpath); if (nodes != null) { foreach (var node in nodes) { if (!HasAttribute()) { result.Add(node.OuterHtml?.Trim()); } else { var attr = node.Attributes[_attribute]; if (attr != null) { result.Add(attr.Value?.Trim()); } } } } return(result); }
private void ExpandFieldData(ThinkCrawlField field, dynamic data, HtmlAgilityPack.HtmlNode root, HtmlAgilityPack.HtmlNode parentNode = null) { string name = field.Name; string fieldXPath = field.XPath; if (string.IsNullOrEmpty(name) || string.IsNullOrEmpty(fieldXPath)) { SetDynamicValue(data, name, ""); } if (field.Type == ThinkCrawlFieldType.Single) { SetDynamicValue(data, name, GetHtmlNodeValue(field.Inherit ? parentNode : root, field)); } else if (field.Type == ThinkCrawlFieldType.Group) { List <dynamic> childList = new List <dynamic>(); var nodes = root.SelectNodes(field.XPath); if (nodes != null && nodes.Count > 0) { foreach (var node in nodes) { dynamic childData = new ExpandoObject(); foreach (var childField in field.Children) { ExpandFieldData(childField, childData, root, node); } childList.Add(childData); } } SetDynamicValue(data, name, childList); } }
public static string extractDef(HtmlAgilityPack.HtmlDocument doc) { string resultText = ""; HtmlAgilityPack.HtmlNode node = doc.DocumentNode.SelectSingleNode("//div[@class='def-list']"); foreach (HtmlAgilityPack.HtmlNode node2 in node.SelectNodes(".//section[@class='def-pbk ce-spot']")) { foreach (HtmlAgilityPack.HtmlNode node3 in node2.SelectNodes(".//header[@class='luna-data-header']")) { foreach (HtmlAgilityPack.HtmlNode node4 in node3.SelectNodes(".//span[@class='dbox-pg']")) { Regex regex1 = new Regex("(<.*?>\\s*)+", RegexOptions.Singleline); string tempText_node4 = node4.OuterHtml; tempText_node4 = regex1.Replace(tempText_node4, " ").Trim(); resultText += "\n" + tempText_node4.Trim() + "\n"; } } foreach (HtmlAgilityPack.HtmlNode node5 in node2.SelectNodes(".//div[@class='def-set']")) { Regex regex1 = new Regex("(<.*?>\\s*)+", RegexOptions.Singleline); Regex regex2 = new Regex("(.?<div class=\"def-block def-inline-example\">?.*?</div>)+", RegexOptions.Singleline); Regex regex3 = new Regex(":?", RegexOptions.Singleline); string tempText_node5 = node5.OuterHtml; tempText_node5 = regex2.Replace(tempText_node5, "").Trim(); tempText_node5 = regex1.Replace(tempText_node5, " ").Trim(); tempText_node5 = regex3.Replace(tempText_node5, "").Trim(); resultText += tempText_node5.Trim() + "\n "; } } Console.WriteLine(resultText + "\n"); return(resultText); }
public static void RemoveComments(HtmlAgilityPack.HtmlNode node) { foreach (var n in node.SelectNodes("//comment()") ?? new HtmlAgilityPack.HtmlNodeCollection(node)) { n.Remove(); } }
private List <KeyValuePair <string, string> > getLoginFormParams(string body, string username, string password) { HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(body); HtmlAgilityPack.HtmlNode loginform = doc.GetElementbyId("gaia_loginform"); HtmlAgilityPack.HtmlNodeCollection inputElements = loginform.SelectNodes("//input"); List <KeyValuePair <string, string> > paramList = new List <KeyValuePair <string, string> >(); bool hasPassword = false; foreach (HtmlAgilityPack.HtmlNode input in inputElements) { string name = input.GetAttributeValue("name", string.Empty); string value = input.GetAttributeValue("value", string.Empty); if (name == "Email") { value = username; } else if (name == "Passwd") { hasPassword = true; value = password; } paramList.Add(new KeyValuePair <string, string>(name, value)); } if (!hasPassword) { paramList.Add(new KeyValuePair <string, string>("Passwd", password)); } return(paramList); }
public static void RemoveQuotes(HtmlAgilityPack.HtmlNode node) { foreach (var n in node.SelectNodes("descendant::td[@class='alt2']") ?? new HtmlAgilityPack.HtmlNodeCollection(node)) { HtmlAgilityPack.HtmlNode div = n.ParentNode.ParentNode.ParentNode; div.Remove(); } }
protected void Page_Load(object sender, EventArgs e) { string heads = @"Accept:text/html,application/xhtml+xm…plication/xml;q=0.9,*/*;q=0.8 Accept-Encoding:gzip, deflate Accept-Language:zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2 Cache-Control:max-age=0 Connection:keep-alive Cookie:cy=1; _lxsdk_cuid=15ffc822338c…3fb990e3e-b37-f9f-cd5%7C%7C20 Host:www.dianping.com Upgrade-Insecure-Requests:1 Accept:text/html,application/xhtml+xm…plication/xml;q=0.9,*/*;q=0.8 Accept-Encoding:gzip, deflate Accept-Language:zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2 Cache-Control:max-age=0 Connection:keep-alive Cookie:cy=1; _lxsdk_cuid=15ffc822338c…3fb990e3e-b37-f9f-cd5%7C%7C20 Host:www.dianping.com Upgrade-Insecure-Requests:1 User-Agent:Mozilla/5.0 (Windows NT 10.0; …) Gecko/20100101 Firefox/60.0"; string url = "http://www.dianping.com/shanghai/ch75/g3032"; ClassHttpRequestClient s = new ClassHttpRequestClient(true); string content = ""; string response = s.httpPost(url, heads, content, Encoding.UTF8); // Response.Write(response); // 第一步声明HtmlAgilityPack.HtmlDocument实例 HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); //第二步加载html文档 doc.LoadHtml(response); HtmlAgilityPack.HtmlNodeCollection collection = doc.DocumentNode.SelectNodes("//div[@class=\"txt\"]"); StringBuilder sb = new StringBuilder(); foreach (HtmlAgilityPack.HtmlNode item in collection) { HtmlAgilityPack.HtmlNode divtit = item.SelectNodes("div[@class=\"tit\"]")[0]; HtmlAgilityPack.HtmlNode aname = divtit.SelectNodes("a[1]")[0]; //divtit下面的第一个超级链接 HtmlAgilityPack.HtmlNode divcomment = item.SelectNodes("div[@class=\"comment\"]")[0]; HtmlAgilityPack.HtmlNode anum = divcomment.SelectNodes("a[1]")[0]; //divcomment下面的第一个超级链接 HtmlAgilityPack.HtmlNode aprice = divcomment.SelectNodes("a[2]")[0]; //divcomment下面的第二个超级链接 sb.Append(string.Format("{0}---{1}---{2}</br>", aname.InnerText, anum.InnerText, aprice.InnerText)); } Response.Write(sb); }
private void sb_fillDatatablePSID(string html, string tableId, int editColIndex, Model.htmlTable dt) { if (string.IsNullOrEmpty(html)) { throw new Exception("html is null or empty"); } if (string.IsNullOrEmpty(tableId)) { throw new Exception("tableId is null or empty"); } if (dt == null) { throw new Exception("dt is null"); } HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument(); document.LoadHtml(html); var table = document.GetElementbyId(tableId); HtmlAgilityPack.HtmlNodeCollection rows; if (table == null) { return; } HtmlAgilityPack.HtmlNode tbody = table.SelectSingleNode("tbody"); if (tbody != null) { rows = tbody.SelectNodes("tr"); } else { rows = table.SelectNodes("tr"); } int i; DataRow dr; int skipRowCountAtBegining = dt.prp_skipRowTop; int skipRowCountAtEnd = dt.prp_skipRowBottom; for (i = skipRowCountAtBegining; i <= rows.Count - 1 - skipRowCountAtEnd; i++) { if (dt.prp_skipRowIndecies != null && dt.prp_skipRowIndecies.Any(o => o == i)) { continue; } var cells = rows[i].SelectNodes("td"); dr = dt.Rows[i - skipRowCountAtBegining]; var queryStringCollection = HttpUtility.ParseQueryString(cells[editColIndex].ChildNodes[0].Attributes["href"].Value.Split('?')[1]); if (queryStringCollection != null && !Functions.IsNull(queryStringCollection["psid"])) { dr[wagonPartsGroupsDataTable.fld_psid] = queryStringCollection["psid"]; } //dt.Rows.Add(dr); } }
public List <Order> LoadYear(int year, string url) { List <Order> result = new List <Order>(); List <string> orderPages = new List <string>(); string prefix = new Uri(string.Format(url, year)).GetComponents(UriComponents.SchemeAndServer, UriFormat.SafeUnescaped); HttpWebRequest request = (HttpWebRequest)WebRequest.Create(string.Format(url, year)); request.CookieContainer = cookies; HttpWebResponse response = (HttpWebResponse)request.GetResponse(); cookies.Add(response.Cookies); // for further requests using (StreamReader sr = new StreamReader(response.GetResponseStream())) { HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(sr.ReadToEnd()); HtmlAgilityPack.HtmlNode node = doc.DocumentNode.SelectSingleNode("//ul[@class='a-pagination']"); if (node != null) { foreach (var link in node.SelectNodes(".//a[@href]")) { orderPages.Add(link.Attributes["href"].Value.Trim()); } if (orderPages.Count > 1) { orderPages.RemoveAt(orderPages.Count - 1); // last link in list is next button } } else { result.AddRange(ScanOrders(doc.DocumentNode.SelectSingleNode("//div[@id='ordersContainer']"), prefix)); } } for (int i = 0; i < orderPages.Count; i++) { Console.WriteLine("\tpage {0}...", i + 1); string page_url = orderPages[i].StartsWith("http") ? orderPages[i] : prefix + orderPages[i]; request = (HttpWebRequest)WebRequest.Create(page_url); request.CookieContainer = cookies; response = (HttpWebResponse)request.GetResponse(); cookies.Add(response.Cookies); // for further requests using (StreamReader sr = new StreamReader(response.GetResponseStream())) { HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(sr.ReadToEnd()); result.AddRange(ScanOrders(doc.DocumentNode.SelectSingleNode("//div[@id='ordersContainer']"), prefix)); } } return(result); }
public HtmlAgilityPack.HtmlNode findTableByHeaderPattern(params String[] vals) { HtmlAgilityPack.HtmlNodeCollection tables = GetElementsByTagName("table"); if (tables == null) { return(null); } foreach (HtmlAgilityPack.HtmlNode table in tables) { HtmlAgilityPack.HtmlNodeCollection rows = table.SelectNodes("tr"); HtmlAgilityPack.HtmlNodeCollection cells = null; if (rows != null) { HtmlAgilityPack.HtmlNode header = rows[0]; cells = header.SelectNodes("th|td"); } else { HtmlAgilityPack.HtmlNodeCollection thead = table.SelectNodes("thead"); if (thead != null) { rows = thead[0].SelectNodes("tr"); cells = rows[0].SelectNodes("th|td"); } } int len = cells.Count; int valIdx = 0; int matchCnt = 0; for (int i = 0; i < len; ++i) { if (i >= cells.Count) { break; } if (valIdx >= vals.Length) { break; } if (cells[i].InnerText.IndexOf(vals[valIdx]) != -1) { matchCnt += 1; ++valIdx; } else { valIdx = 0; matchCnt = 0; } } if (matchCnt == vals.Length) { return(table); } } return(null); }
private List <VideoInfo> GetPageVideos(RssLink category, String pageUrl) { List <VideoInfo> pageVideos = new List <VideoInfo>(); if (String.IsNullOrEmpty(pageUrl) && (category.Other != null)) { HtmlAgilityPack.HtmlNode root = (HtmlAgilityPack.HtmlNode)category.Other; HtmlAgilityPack.HtmlNodeCollection shows = root.SelectNodes(".//div[contains(@class, 'article-default')]"); foreach (var show in shows) { HtmlAgilityPack.HtmlNode linkNode = show.SelectSingleNode(".//h3/a"); HtmlAgilityPack.HtmlNode thumbNode = show.SelectSingleNode(".//img"); VideoInfo videoInfo = new VideoInfo() { Thumb = Utils.FormatAbsoluteUrl(thumbNode.Attributes["src"].Value, ApetitTvUtil.baseUrl), Title = linkNode.InnerText, VideoUrl = Utils.FormatAbsoluteUrl(linkNode.Attributes["href"].Value, ApetitTvUtil.baseUrl) }; pageVideos.Add(videoInfo); } } else if (!String.IsNullOrEmpty(pageUrl)) { this.nextPageUrl = String.Empty; String baseWebData = GetWebData(pageUrl, forceUTF8: true); HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument(); document.LoadHtml(baseWebData); HtmlAgilityPack.HtmlNodeCollection shows = document.DocumentNode.SelectNodes(".//div[contains(@class, 'article-default')]"); foreach (var show in shows) { HtmlAgilityPack.HtmlNode linkNode = show.SelectSingleNode(".//h3/a"); HtmlAgilityPack.HtmlNode thumbNode = show.SelectSingleNode(".//img"); VideoInfo videoInfo = new VideoInfo() { Thumb = Utils.FormatAbsoluteUrl(thumbNode.Attributes["src"].Value, ApetitTvUtil.baseUrl), Title = linkNode.InnerText, VideoUrl = Utils.FormatAbsoluteUrl(linkNode.Attributes["href"].Value, ApetitTvUtil.baseUrl) }; pageVideos.Add(videoInfo); } HtmlAgilityPack.HtmlNode nextPageLink = document.DocumentNode.SelectSingleNode(".//li[@class='pager-next']/a"); this.nextPageUrl = (nextPageLink == null) ? this.nextPageUrl : Utils.FormatAbsoluteUrl(System.Web.HttpUtility.HtmlDecode(nextPageLink.Attributes["href"].Value), pageUrl); } return(pageVideos); }
public static void RemoveColors(HtmlAgilityPack.HtmlNode node, IEnumerable <String> exemptList) { foreach (var n in node.SelectNodes("descendant::font") ?? new HtmlAgilityPack.HtmlNodeCollection(node)) { String color = n.GetAttributeValue("color", "nocolor"); color = color.ToLower(); if (!exemptList.Contains(color)) { n.Remove(); } } }
private List <Order> ScanOrders(HtmlAgilityPack.HtmlNode node, string prefix) { List <Order> orders = new List <Order>(); foreach (HtmlAgilityPack.HtmlNode order in node.SelectNodes(".//div[contains(@class, 'order')]")) { HtmlAgilityPack.HtmlNode info = order.SelectSingleNode(".//div[contains(@class, 'order-info')]"); Order o = new Order(); if (info != null) { HtmlAgilityPack.HtmlNode price = info.SelectSingleNode(".//div[contains(@class, 'a-span2')]//span[contains(@class, 'value')]"); if (price != null) { o.Sum = ScanPrice(price.InnerText.Trim()); } HtmlAgilityPack.HtmlNode id = info.SelectSingleNode(".//div[contains(@class, 'a-col-right')]//span[contains(@class, 'value')]"); if (id != null) { o.Id = id.InnerText.Trim(); } HtmlAgilityPack.HtmlNode date = info.SelectSingleNode(".//div[contains(@class, 'a-span4')]//span[contains(@class, 'value')]"); if (date != null) { o.Date = ScanDate(date.InnerText.Trim()); } } if (o.IsInitialized()) { foreach (HtmlAgilityPack.HtmlNode product in order.SelectNodes(".//div[contains(@class, 'a-spacing')]//div[contains(@class, 'a-col-right')]")) { HtmlAgilityPack.HtmlNode name = product.SelectSingleNode(".//a[contains(@class, 'a-link-normal')]"); HtmlAgilityPack.HtmlNode price = product.SelectSingleNode(".//span[contains(@class, 'a-color-price')]"); if ((name != null) && (price != null)) { Product p = new Product(); p.Price = ScanPrice(price.InnerText.Trim()); p.Url = name.Attributes["href"].Value.StartsWith("http") ? name.Attributes["href"].Value : prefix + name.Attributes["href"].Value; p.Name = WebUtility.HtmlDecode(name.InnerText.Trim()); o.Products.Add(p); } } orders.Add(o); } } return(orders); }
private List <MonitoringItem> ParsingTitleType(HtmlAgilityPack.HtmlDocument doc) { List <MonitoringItem> lstItem = new List <MonitoringItem>(); HtmlAgilityPack.HtmlNode mainArea = doc.GetElementbyId("main-area"); if (mainArea != null) { var divArticleBoard = mainArea.SelectNodes("div").FirstOrDefault(x => CommonHelper.HasClass(x, "article-board", "m-tcol-c") && x.Id != "upperArticleList"); if (divArticleBoard != null) { foreach (var trArticle in CommonHelper.GetSingleNode(divArticleBoard, "table", "tbody").SelectNodes("tr")) { var tdArticleWrap = trArticle.SelectNodes("td").FirstOrDefault(x => CommonHelper.HasClass(x, "td_article")); var divArticle = tdArticleWrap.SelectNodes("div").FirstOrDefault(x => CommonHelper.HasClass(x, "board-list", "inner_list")); string articleUrl = divArticle.SelectSingleNode("a").GetAttributeValue("href", ""); string articleId = ""; string articleTitle = divArticle.SelectSingleNode("a").InnerText.Trim(); foreach (var param in articleUrl.Split('&')) { string[] arrParam = param.Split('='); string paramKey = arrParam[0]; if (paramKey == "articleid" && arrParam.Length == 2) { articleId = arrParam[1]; break; } } lstItem.Add(new MonitoringItem() { ItemId = articleId, ItemTitle = articleTitle, ItemUrlPc = string.Format(ItemDetailBaseUrlDesktop, articleId), ItemUrlMobile = string.Format(ItemDetailBaseUrlMobile, articleId), ItemDate = DateTime.Now }); } } else { FuncLog($"could not find article area"); } } else { FuncLog($"could not find main-area"); } return(lstItem); }
static private Product processProduct(HtmlAgilityPack.HtmlNode node) { var priceNode = node.SelectNodes("*/*/*/span[@class='price__value']").First(); var imageNode = node.SelectNodes("*/*/img[@class='item__image']").First(); var mnfNode = node.SelectNodes("*/div[@class='item__mnf']").First(); var nameNode = node.SelectNodes("*/div[@class='item__name']/*").First(); var prodNode = node.SelectNodes("a[@class='link link_no-underline']").First(); var availNode = node.SelectNodes("div[@class='item__avail']/div").First(); string avail = availNode.InnerText; if (avail == "Со склада") { avail = ""; } else if (avail == "По запросу") { avail = null; } var cost_str = priceNode.InnerText.Replace(" ", ""); var cost = Convert.ToDecimal(cost_str, format); var pictureLink = imageNode.Attributes["src"].Value; var productLink = site + prodNode.Attributes["href"].Value; var name = nameNode.InnerText; var dict = new Dictionary <string, string>(); dict.Add("Производитель", mnfNode.InnerText); return(new Product(name, cost, avail, dict, productLink, pictureLink)); }
public static Dictionary <string, string> FormParams(HtmlAgilityPack.HtmlNode node) { Dictionary <string, string> dicpara = new Dictionary <string, string>(); HtmlAgilityPack.HtmlNodeCollection InputTypeNodeList = node.SelectNodes(".//input[@type='hidden' and @name and @value]"); foreach (HtmlAgilityPack.HtmlNode hidenode in InputTypeNodeList) { string key = hidenode.Attributes["name"].Value; string value = hidenode.Attributes["value"].Value; dicpara.Add(key, value); } return(dicpara); }
protected virtual void ParseLobbyPage(string url, string doc, out DateTimeOffset serverTime, ref List <ForumThread> threadList) { Int32 threadId = VBulletinForum.ThreadIdFromUrl(url); var html = new HtmlAgilityPack.HtmlDocument(); html.LoadHtml(doc); HtmlAgilityPack.HtmlNode root = html.DocumentNode; serverTime = DateTime.Now; //(//div[class="smallfont", align="center'])[last()] All times are GMT ... The time is now <span class="time">time</span>"." HtmlAgilityPack.HtmlNode timeNode = root.SelectNodes("//div[@class='smallfont'][@align='center']").Last(); if (timeNode != null) { String timeText = timeNode.InnerText; serverTime = Utils.Misc.ParsePageTime(timeText, DateTime.UtcNow); } HtmlAgilityPack.HtmlNodeCollection threads = root.SelectNodes("//tbody[contains(@id, 'threadbits_forum_')]/tr"); if (threads == null) { return; } String urlBase = url.Substring(0, url.LastIndexOf('/') + 1); foreach (HtmlAgilityPack.HtmlNode thread in threads) { ForumThread t = HtmlToThread(threadId, thread, serverTime); if (t != null) { if (!t.URL.StartsWith("http")) { t.URL = urlBase + t.URL; } threadList.Add(t); } } }
public static List <Event> GetDataScreenScrap() { string body = Utils.GetBody("http://www.ynet.co.il/home/0,7340,L-184,00.html"); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(body); HtmlAgilityPack.HtmlNode root = doc.DocumentNode; HtmlAgilityPack.HtmlNodeCollection nodes = root.SelectNodes("/html[1]/body/div[4]/table[3]/tr/td[2]/table[2]/tr/td/table/tr/td/table"); if (nodes == null || nodes.Count == 0) { return(null); } HtmlAgilityPack.HtmlNode table = nodes.ElementAt(0); List <string> events = (from t in table.SelectNodes("tr") where !string.IsNullOrEmpty(t.InnerText.Trim()) && !t.InnerText.Contains("מבזקים") select t.InnerText).ToList(); return ((from t in table.SelectNodes("tr") where !string.IsNullOrEmpty(t.InnerText.Trim()) && !t.InnerText.Contains("מבזקים") select Event.FromHtmlNode(t)).ToList()); }
private void GetPagesRecursive(HtmlAgilityPack.HtmlNode document, IDictionary <int, string> pages) { var chapterPages = document.SelectNodes(@"//div[@class=""main-body""]//div[@class=""btn-group""][2]/ul[@class=""dropdown-menu""]/li/a"); if (chapterPages == null) { throw new ParserException("Could not find expected elements on website.", document.InnerHtml); } int addedCount = 0; foreach (var pageLink in chapterPages) { int pageNumber = 0; var url = GetFullUrl(pageLink.Attributes["href"].Value); if (pages.Any(kvp => kvp.Value == url)) // skip duplicate urls { continue; } if (!Int32.TryParse(Regex.Match(pageLink.InnerText, @"\d+").Value, out pageNumber)) { _log.Error("Unable to parse page number '" + pageLink.InnerText + "'"); } if (pages.ContainsKey(pageNumber)) // if page is already in dictionary use random number instead { pageNumber = Random; } pages.Add(pageNumber, url); addedCount++; } if (addedCount > 0) { var pageRecord = pages.OrderByDescending(kvp => kvp.Key).Skip(1).FirstOrDefault(); if (pageRecord.Equals(default(KeyValuePair <int, string>))) { return; } var nextDocument = WebHelper.GetHtmlDocument(pageRecord.Value); GetPagesRecursive(nextDocument, pages); } }