protected void Page_Load(object sender, EventArgs e) { string heads = @"Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8 Accept-Encoding:gzip, deflate Accept-Language:zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2 Cache-Control:max-age=0 Connection:keep-alive Cookie:showNav=#nav-tab|0|0; navCtgScroll=0; cy=1; cye=shanghai; _lxsdk_cuid=1693813fdecc8-062bf66f365a768-11666e4a-384000-1693813fdecc8; _lxsdk_s=1693813fded-ea2-2e7-d89%7C%7C51; _lxsdk=1693813fdecc8-062bf66f365a768-11666e4a-384000-1693813fdecc8; _hc.v=7216e9e3-be12-eff4-1836-49d9b0c4b0ce.1551424029; s_ViewType=10 Host:www.dianping.com Upgrade-Insecure-Requests:1 User-Agent:Mozilla/5.0 (Windows NT 10.0; WOW64; rv:65.0) Gecko/20100101 Firefox/65.0"; string url = "http://www.dianping.com/search/keyword/1/0_%E8%8B%B1%E8%AF%AD%E5%9F%B9%E8%AE%AD/r842"; ClassHttpRequestClient s = new ClassHttpRequestClient(true); string content = ""; string response = s.httpPost(url, heads, content, Encoding.UTF8); // Response.Write(response); // 第一步声明HtmlAgilityPack.HtmlDocument实例 HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); //第二步加载html文档 doc.LoadHtml(response); HtmlAgilityPack.HtmlNodeCollection collection = doc.DocumentNode.SelectNodes("//div[@class=\"txt\"]"); StringBuilder sb = new StringBuilder(); foreach (HtmlAgilityPack.HtmlNode item in collection) { HtmlAgilityPack.HtmlNode divtit = item.SelectNodes("div[@class=\"tit\"]")[0]; HtmlAgilityPack.HtmlNode aname = divtit.SelectNodes("a[1]")[0]; //divtit下面的第一个超级链接 HtmlAgilityPack.HtmlNode divcomment = item.SelectNodes("div[@class=\"comment\"]")[0]; HtmlAgilityPack.HtmlNode anum = divcomment.SelectNodes("a[1]")[0]; //divcomment下面的第一个超级链接 HtmlAgilityPack.HtmlNode aprice = divcomment.SelectNodes("a[2]")[0]; //divcomment下面的第二个超级链接 sb.Append(string.Format("{0}---{1}---{2}</br>", aname.InnerText, anum.InnerText, aprice.InnerText)); } Response.Write(sb); }
private void ExtractImageWithHtmlAgilityPack(object html) { try { string value = ""; HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html.ToString()); HtmlAgilityPack.HtmlNodeCollection imgNodeCollection = doc.DocumentNode.SelectNodes("//img"); for (int i = 0; i < imgNodeCollection.Count; i++) { value = imgNodeCollection[i].Attributes["src"].Value; if (value.StartsWith("//")) { value = "http:" + value; } if (value.Contains(":") == false) { value = baseUrl + value; } AddToCollection(new UrlStruct() { Id = globalIndex, Status = "", Title = "", Url = value }); } ShowStatusText($"已抓取到{imgNodeCollection.Count}个图像"); ShowImage(imageCollection); } catch (Exception ex) { ShowStatusText(ex.Message); } }
private void NodeRecursion(ref NodeStruct nodeStruct, HtmlAgilityPack.HtmlNode htmlNode) { HtmlAgilityPack.HtmlNodeCollection childCollection = htmlNode.ChildNodes; List <NodeStruct> list = new List <NodeStruct>(); for (int i = 0; i < childCollection.Count; i++) { if (string.IsNullOrEmpty(childCollection[i].InnerHtml.Trim())) { continue; } NodeStruct htmlStruct = new NodeStruct(); htmlStruct.DisplayName = childCollection[i].Name; htmlStruct.OuterHtml = childCollection[i].OuterHtml; htmlStruct.InnerHtml = childCollection[i].InnerHtml; htmlStruct.InnerText = childCollection[i].InnerText; list.Add(htmlStruct); nodeStruct.Children = list; if (childCollection[i].HasChildNodes) { NodeRecursion(ref htmlStruct, childCollection[i]); } } }
static public List <Storm> GetStorms() { List <Storm> storms = new List <Storm>(); try { string sourceHTML = "https://evescoutrescue.com/home/stormtrack.php"; string tableXPath = "/html/body/div/div[4]/div/div/div[2]/table/tbody"; HtmlAgilityPack.HtmlWeb hw = new HtmlAgilityPack.HtmlWeb(); HtmlAgilityPack.HtmlDocument doc = hw.Load(sourceHTML); HtmlAgilityPack.HtmlNodeCollection hnc = doc.DocumentNode.SelectNodes(tableXPath); List <List <string> > table = hnc.Descendants("tr") .Where(tr => tr.Elements("td").Count() > 1) .Select(tr => tr.Elements("td").Select(td => td.InnerText.Trim()).ToList()) .ToList(); foreach (List <string> ls in table) { Storm s = new Storm(); s.Region = ls[0]; s.System = ls[1]; s.Type = ls[3]; s.Name = ls[2]; storms.Add(s); } } catch { } return(storms); }
public static IEnumerable <SplitEvent> GetSplitEvents(string symbol, DateTime from) { DateTime now = DateTime.Now; string url = string.Format("http://finance.yahoo.com/q/hp?s={0}&a={1}&b={4}&c={6}&d={2}&e={5}&f={3}&g=v", symbol, now.Day, now.Month - 1, now.Year, from.Day, from.Month - 1, from.Year); List <SplitEvent> splits = new List <SplitEvent>(); using (WebBrowserAdv webBrowser = new WebBrowserAdv()) { HtmlAgilityPack.HtmlNodeCollection splitNodes = null; int tries = 0; bool found = false; while (tries < 3 && !found) { try { if (!webBrowser.Navigate(5000, url)) { throw new ApplicationException("Failed navigating to " + url); } splitNodes = webBrowser.GetHtmlAgilityPackDocument().DocumentNode.SelectNodes(".//td[contains(text(),'Stock Split')]"); found = webBrowser.GetHtmlAgilityPackDocument().DocumentNode.SelectNodes(".//th[text()='Prices']") != null; } catch (Exception ex) { if (tries++ >= 3) { throw ex; } System.Threading.Thread.Sleep(1000); } } if (splitNodes != null) { foreach (var td in splitNodes) { try { var dateStr = td.ParentNode.ChildNodes[0].InnerText; var ratio = td.InnerText.Replace("\n", "").Replace(" ", "").Split(new[] { "Stock" }, StringSplitOptions.RemoveEmptyEntries)[0].Split(':'); var sharesBefore = ratio[0]; var sharesAfter = ratio[1]; splits.Add(new SplitEvent(DateTime.Parse(dateStr), int.Parse(sharesBefore), int.Parse(sharesAfter))); } catch (Exception ex) { throw new ApplicationException("Failed reading split", ex); } } } } splits.Add(new SplitEvent(DateTime.MaxValue, 1, 1)); splits.CalculateCumalitveAdjustment(); return(splits.OrderBy(d => d.date)); }
public List <RicInfo> GetRicList() { List <RicInfo> warrantRicList = new List <RicInfo>(); RicChineseList = GetRicChineseInfo(); HtmlAgilityPack.HtmlNodeCollection ricNodeList = GetNewlyWarrantRicNode(warrantMainPageUri); int endPos = ricNodeList.Count < parent.WarrantEndPos ? ricNodeList.Count : parent.WarrantEndPos; for (int i = parent.WarrantStartPos - 1; i < endPos; i++) { HtmlAgilityPack.HtmlNode ricNode = ricNodeList[i]; RicInfo ricInfo = new RicInfo(); ricInfo.Code = MiscUtil.GetCleanTextFromHtml(ricNode.ChildNodes[2 * 1 + 1].InnerText); ricInfo.Underlying = MiscUtil.GetCleanTextFromHtml(ricNode.ChildNodes[2 * 4 + 1].InnerText); string id = ricNode.ChildNodes[2 * 1 + 1].ChildNodes[0].Attributes["href"].Value; id = id.Substring(id.IndexOf('=') + 1); id = id.Substring(0, id.IndexOf("'")); //id = id.Substring(id.IndexOf('=') + 1, 5); string[] arr = GetUnderlyingForStockAndIssuePrice(id); if (Char.IsDigit(ricInfo.Underlying, 0)) { ricInfo.UnderlyingNameForStock = arr[0]; } ricInfo.IssuerPrice = arr[1]; ricInfo.Name = MiscUtil.GetCleanTextFromHtml(ricNode.ChildNodes[2 * 2 + 1].InnerText); ricInfo.Issuer = MiscUtil.GetCleanTextFromHtml(ricNode.ChildNodes[2 * 3 + 1].InnerText); ricInfo.BullBear = MiscUtil.GetCleanTextFromHtml(ricNode.ChildNodes[2 * 5 + 1].InnerText); ricInfo.BoardLot = MiscUtil.GetCleanTextFromHtml(ricNode.ChildNodes[2 * 6 + 1].InnerText); ricInfo.StrikeLevel = MiscUtil.GetCleanTextFromHtml(ricNode.ChildNodes[2 * 7 + 1].InnerText); ricInfo.EntitlementRatio = MiscUtil.GetCleanTextFromHtml(ricNode.ChildNodes[2 * 8 + 1].InnerText); ricInfo.TotalIssueSize = MiscUtil.GetCleanTextFromHtml(ricNode.ChildNodes[2 * 9 + 1].InnerText); ricInfo.LauntchDate = MiscUtil.GetCleanTextFromHtml(ricNode.ChildNodes[2 * 10 + 1].InnerText); ricInfo.ClearingCommencementDate = MiscUtil.GetCleanTextFromHtml(ricNode.ChildNodes[2 * 11 + 1].InnerText); ricInfo.ListingDate = MiscUtil.GetCleanTextFromHtml(ricNode.ChildNodes[2 * 12 + 1].InnerText); ricInfo.MaturityDate = MiscUtil.GetCleanTextFromHtml(ricNode.ChildNodes[2 * 13 + 1].InnerText); //Get Chinese name information foreach (RicInfo ric in RicChineseList) { if (ric.Code == ricInfo.Code) { ricInfo.ChineseName = ric.ChineseName; break; } else { continue; } } //Get Gear and Premium from PDF parent.PDFAnalysis(ricInfo, FMType.Warrant); warrantRicList.Add(ricInfo); } return(warrantRicList); }
public static HtmlAgilityPack.HtmlNodeCollection ThrowExceptionIfNotExists(this HtmlAgilityPack.HtmlNodeCollection collection, string message) { if (collection == null) { throw new InvalidDOMStructureException(message); } return(collection); }
public static List <Exhibitor> GetExhibitorList(bool doPartial) { List <Exhibitor> results = new List <Exhibitor>(); // TODO: make this 0 int page = 0; while (true) { bool isBlankPage = true; #region Ajax Args // this.eventId = 7; //this.pageSize = 20; //this.page = isNaN(page) ? 0 : page; //this.sortOrder = $("#page_content_ctl00_ddlSort").val(); //this.filterChar = filterChar; //this.searchName = $("#searchName").val(); //this.searchCompany = $("#searchCompany").val(); //this.searchCountry = $("#page_content_ctl00_ddlCountries option:selected").text(); //this.searchInterest = $("#page_content_ctl00_ddlInterest").val(); #endregion string url = baseUrl + "/API/WebService.asmx/GetExhibitors"; string data = String.Concat("{\"eventId\":7,\"pageSize\":20,\"page\":", page, ",\"sortOrder\":0,\"filterChar\":\"\",\"searchName\":\"\",\"searchCompany\":\"\",\"searchCountry\":\"Any\",\"searchInterest\":-1}"); string json = string.Empty; json = WebHelper.HttpPost(url, data); var j = JObject.Parse(json); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(j["d"]["ReturnText"].ToString()); HtmlAgilityPack.HtmlNodeCollection nodes = null; if ((nodes = doc.DocumentNode.SelectNodes("//li[@class='exhibitorListItem Connect']")) != null) { isBlankPage = false; results.AddRange(ParseExhibitors(nodes, true)); } if ((nodes = doc.DocumentNode.SelectNodes("//li[@class='exhibitorListItem Basic']")) != null) { isBlankPage = false; results.AddRange(ParseExhibitors(nodes, false)); } if (isBlankPage) { break; } page++; if (doPartial) { break; } } results = GetExtendedData(results); return(results); }
/// <summary> /// get company infor after web search /// </summary> /// <param name="nodes"></param> /// <param name="fileName"></param> private void getCompanyInfor2(HtmlAgilityPack.HtmlNodeCollection nodes, string fileName) { string name = Path.GetFileNameWithoutExtension(fileName); List <string> urls = new List <string>(); foreach (HtmlAgilityPack.HtmlNode node in nodes) { if (node.InnerHtml.IndexOf(name) > -1) { string url = node.GetAttributeValue("href", ""); if (url.StartsWith("http")) { if (url.Split('/').Length < 5 && url.IndexOf("?") < 0) { urls.Add(url); } } } } if (urls.Count > 0) { string url = ""; foreach (string url_ in urls) { if (url_.IndexOf("profile") > -1) { url = url_; break; } } if (url == "") { foreach (string url_ in urls) { if (url_.IndexOf("company") > -1) { url = url_; break; } } } if (url == "") { url = urls[0]; } string[] data = (string[])ht[name]; if (data != null) { data[7] = url; int idx = int.Parse(data[0]); if (idx < dgvData.Rows.Count) { dgvData.Rows[idx].Cells[7].Value = url; } Application.DoEvents(); } } }
private static void ParseStopsTable(OneWayLine line, HtmlAgilityPack.HtmlNode routeNode, string langName) { int stopCounter = 0; HtmlAgilityPack.HtmlNode titleNode = routeNode.SelectNodes("tr[1]/td[1]/table[1]/tr[1]/td[1]")[0]; line.From[langName] = titleNode.ChildNodes[0].InnerText.Trim(); line.To[langName] = titleNode.ChildNodes[4].InnerText.Trim(); line.Name[langName] = String.Format("{0} - {1}", line.From[langName], line.To[langName]); HtmlAgilityPack.HtmlNodeCollection rows = routeNode.SelectNodes("tr[1]/td[1]/table[1]/tr[@class='SmallTableRow ']"); if (rows == null) { Console.WriteLine(routeNode.InnerHtml); Console.WriteLine("null collection"); } else { foreach (HtmlAgilityPack.HtmlNode stopRowNode in rows) { //Console.WriteLine (stopNode.InnerHtml); string stopName = stopRowNode.ChildNodes[1].InnerHtml.Trim(); //Console.WriteLine("Name: " + stopName); HtmlAgilityPack.HtmlNode linkNode = stopRowNode.ChildNodes[7].ChildNodes[1]; string link = linkNode.Attributes["href"].Value; //Console.WriteLine("Link: " + link); Uri linkUri = null; Uri.TryCreate(BusCoIlParser.baseUri, link, out linkUri); NameValueCollection col = System.Web.HttpUtility.ParseQueryString(linkUri.Query); if (line.Stops.ContainsKey(stopCounter)) { line.Stops[stopCounter].Name[langName] = stopName; stopCounter++; } else { Stop s = new Stop() { PlaceID = col["PlaceID"] ?? col["PlaceID1"] }; s.Name[langName] = stopName; if (s.PlaceID == null) { throw new ApplicationException(); } line.Stops.Add(stopCounter++, s); } //Console.WriteLine("----"); } } }
override internal string GetUpdatedFilterDefinition() { //validate HtmlAgilityPack.HtmlDocument d = new HtmlAgilityPack.HtmlDocument(); d.LoadHtml("<html></html>"); HtmlAgilityPack.HtmlNodeCollection nc = d.DocumentNode.SelectNodes(XpathBox.Text); return(XpathBox.Text + "\n" + GroupName.Text); }
} // End Function GetProxyArray public static void GetProxyList(string htmlFile, string jsonFile) { System.Data.DataTable dt = new System.Data.DataTable(); string html = null; if (System.IO.File.Exists(htmlFile)) { html = System.IO.File.ReadAllText(htmlFile); } if (html == null) { using (System.Net.WebClient wc = new System.Net.WebClient()) { html = wc.DownloadString("https://free-proxy-list.net/"); System.IO.File.WriteAllText(htmlFile, html, System.Text.Encoding.UTF8); } // End Using wc } // End if (html == null) HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html); string selector = "//table[@id='proxylisttable']"; HtmlAgilityPack.HtmlNode tableNode = doc.DocumentNode.SelectSingleNode(selector); System.Console.WriteLine(tableNode); HtmlAgilityPack.HtmlNodeCollection ths = tableNode.SelectNodes("./thead/tr/th"); foreach (HtmlAgilityPack.HtmlNode th in ths) { dt.Columns.Add(th.InnerText, typeof(string)); } // Next th HtmlAgilityPack.HtmlNodeCollection trs = tableNode.SelectNodes("./tbody/tr"); foreach (HtmlAgilityPack.HtmlNode tr in trs) { System.Data.DataRow dr = dt.NewRow(); int i = 0; HtmlAgilityPack.HtmlNodeCollection tds = tr.SelectNodes("./td"); foreach (HtmlAgilityPack.HtmlNode td in tds) { // System.Console.WriteLine(td); dr[i] = td.InnerText; ++i; } // Next td dt.Rows.Add(dr); } // Next tr string json = Newtonsoft.Json.JsonConvert.SerializeObject(dt, Newtonsoft.Json.Formatting.Indented); System.IO.File.WriteAllText(jsonFile, json, System.Text.Encoding.UTF8); } // End Function GetProxyList
public HtmlAgilityPack.HtmlNode findTableByHeaderPattern(params String[] vals) { HtmlAgilityPack.HtmlNodeCollection tables = GetElementsByTagName("table"); if (tables == null) { return(null); } foreach (HtmlAgilityPack.HtmlNode table in tables) { HtmlAgilityPack.HtmlNodeCollection rows = table.SelectNodes("tr"); HtmlAgilityPack.HtmlNodeCollection cells = null; if (rows != null) { HtmlAgilityPack.HtmlNode header = rows[0]; cells = header.SelectNodes("th|td"); } else { HtmlAgilityPack.HtmlNodeCollection thead = table.SelectNodes("thead"); if (thead != null) { rows = thead[0].SelectNodes("tr"); cells = rows[0].SelectNodes("th|td"); } } int len = cells.Count; int valIdx = 0; int matchCnt = 0; for (int i = 0; i < len; ++i) { if (i >= cells.Count) { break; } if (valIdx >= vals.Length) { break; } if (cells[i].InnerText.IndexOf(vals[valIdx]) != -1) { matchCnt += 1; ++valIdx; } else { valIdx = 0; matchCnt = 0; } } if (matchCnt == vals.Length) { return(table); } } return(null); }
public override ParsedResult Parse(string domain, string proxy = null) { ParsedResult result = new ParsedResult(); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); string html = getHTML(domain, proxy); if (html == null) { result.Url = domain; result.Rank = "-"; return(result); } doc.LoadHtml(html); result.Url = doc.DocumentNode.SelectSingleNode("//h1[@class=\"domain text-center-xs text-left-not-xs\"]").InnerText.Trim().ToLower(); HtmlAgilityPack.HtmlNode node = doc.DocumentNode.SelectSingleNode("//div[@id=\"worldRanking-item\"]//div[@class=\"rankValue\"]"); if (node != null) { var rg = new Regex(@"#(.*?)<"); result.Rank = rg.Match(node.InnerHtml.TrimEnd()).Groups[1].Value; } HtmlAgilityPack.HtmlNodeCollection nc = doc.DocumentNode.SelectNodes("//td[@class=\"text-right\"]"); if (nc != null) { result.Overall_Visits = nc[0].InnerText; result.Time_On_Site = nc[1].InnerText; result.Pages_per_Visit = nc[2].InnerText; result.Pages_per_Visit = result.Pages_per_Visit.Replace('.', ','); result.Bounce_Rate = nc[3].InnerText; } node = doc.DocumentNode.SelectSingleNode("//div[@id=\"review\"]"); if (node != null) { var rg = new Regex("\"Organic Search\" [(](.*?)%"); result.Organic_Search = rg.Match(node.InnerText).Groups[1].Value.TrimEnd(); } HtmlAgilityPack.HtmlNodeCollection htmlNodes = doc.DocumentNode.SelectNodes("//table[@id=\"countriesBreakdownTable\"]//tr"); if (htmlNodes != null) { foreach (HtmlAgilityPack.HtmlNode row in htmlNodes) { string country = row.SelectSingleNode("td").InnerText; string procent = row.SelectSingleNode("td/div[@class=\"shareValue\"]").InnerText; result.CountresAdd(country, procent); } } result.CountraseRelease(); return(result); }
private string SetTitles(string value) { HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(value); HtmlAgilityPack.HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("//img[@src]"); if (nodes == null || nodes.Count == 0) { return(value); } string id = null; // non-null indicates need to update field value after looping foreach (HtmlAgilityPack.HtmlNode node in nodes) { string src = node.GetAttributeValue("src", String.Empty); if (src == null) { continue; } Match match = this.regex.Match(src); if (match.Success) { id = match.Groups[1].Value; Sitecore.Data.ID guid = Sitecore.Data.ID.Parse(id); Sitecore.Data.Items.Item item = Sitecore.Context.Database.GetItem(guid); if (item == null) { continue; } string title = String.Format( "{0} [{1}, {2}]", item.Name, item["extension"].ToUpper(), this.FormatBytes(Int32.Parse(item["size"]))); node.SetAttributeValue("title", title); } } if (id == null) { return(value); } StringBuilder sb = new StringBuilder(); StringWriter sw = new StringWriter(sb); doc.Save(sw); sw.Flush(); return(sb.ToString()); }
public override int DiscoverSubCategories(Category parentCategory) { int showsCount = 0; String url = ((RssLink)parentCategory).Url; if (parentCategory.ParentCategory != null) { parentCategory = parentCategory.ParentCategory; // last category is next category, remove it parentCategory.SubCategories.RemoveAt(parentCategory.SubCategories.Count - 1); } if (parentCategory.SubCategories == null) { parentCategory.SubCategories = new List <Category>(); } RssLink category = (RssLink)parentCategory; String baseWebData = GetWebData(url, forceUTF8: true); HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument(); document.LoadHtml(baseWebData); HtmlAgilityPack.HtmlNodeCollection series = document.DocumentNode.SelectNodes(".//div[@class='programs-wrapper']/*/div[@class='item']"); foreach (var serie in series) { HtmlAgilityPack.HtmlNode serieNode = serie.SelectSingleNode(".//div[@class='field-title']/./a"); HtmlAgilityPack.HtmlNode imgNode = serie.SelectSingleNode(".//div[@class='field-image-primary']/*/*/img"); category.SubCategories.Add( new RssLink() { Name = serieNode.InnerText, HasSubCategories = false, Url = Utils.FormatAbsoluteUrl(serieNode.Attributes["href"].Value, url), Thumb = imgNode.Attributes["src"].Value }); showsCount++; } HtmlAgilityPack.HtmlNode nextPageNode = document.DocumentNode.SelectSingleNode(".//li[@class='pager-next last']/./a"); if (nextPageNode != null) { parentCategory.SubCategories.Add(new NextPageCategory() { Url = Utils.FormatAbsoluteUrl(nextPageNode.Attributes["href"].Value, url), ParentCategory = parentCategory }); } if (showsCount > 0) { parentCategory.SubCategoriesDiscovered = true; } return(showsCount); }
protected List <Bold> ParseBolded(HtmlAgilityPack.HtmlNode original) { List <Bold> bolded = new List <Bold>(); HtmlAgilityPack.HtmlNode content = original.CloneNode("Votes", true); RemoveQuotes(content); // strip out quotes List <String> goodColors = new List <string>() { _voteColor }; //"darkolivegreen", "darkgreen", "yellowgreen", "seagreen", //"lime", "palegreen", "olive", "green" RemoveColors(content, goodColors); // strip out colors RemoveNewlines(content); // strip out newlines if (_voteColor == "") { // look for plain bold HtmlAgilityPack.HtmlNodeCollection bolds = content.SelectNodes("child::b"); if (bolds != null) { BoldsFromSet(bolds, bolded); } } else { // look for color,bold. foreach (var n in content.SelectNodes("descendant::font") ?? new HtmlAgilityPack.HtmlNodeCollection(content)) { HtmlAgilityPack.HtmlNodeCollection colorbolds = n.SelectNodes("child::b"); if (colorbolds != null) { BoldsFromSet(colorbolds, bolded); } } // look for bold,color. HtmlAgilityPack.HtmlNodeCollection bolds = content.SelectNodes("descendant::b"); foreach (var n in bolds ?? new HtmlAgilityPack.HtmlNodeCollection(content)) { HtmlAgilityPack.HtmlNodeCollection boldcolors = n.SelectNodes("child::font"); if (boldcolors != null) { BoldsFromSet(boldcolors, bolded); } } // look for span w/color HtmlAgilityPack.HtmlNodeCollection boldspan = content.SelectNodes("descendant::span[starts-with(@style,\"color:red;font-weight:bold;\")]"); if (boldspan != null) { BoldsFromSet(boldspan, bolded); } } return(bolded); }
/// <summary> /// 分析从google获得源码 获取排名前50个url集合【导出前50个竞争对手的站点URL】 /// </summary> /// <param name="strHTMLCode"></param> /// <returns></returns> public List <string> GetUrlList(string strHTMLCode) { HtmlAgilityPack.HtmlDocument htmldoc = new HtmlAgilityPack.HtmlDocument(); htmldoc.LoadHtml(strHTMLCode); HtmlAgilityPack.HtmlNodeCollection hrefList = htmldoc.DocumentNode.SelectNodes(".//li[@class=\"g\"]"); //表示单个的节点 string strCite = ".//cite"; List <string> alistURL = new List <string>(); if (hrefList != null) { //foreach (HtmlAgilityPack.HtmlNode href in hrefList) //{ // //HtmlAgilityPack.HtmlAttribute att = href.Attributes["href"]; // //this.txtResult.AppendText(att.Value+Environment.NewLine); // //this.txtResult.AppendText(href.InnerText); // HtmlAgilityPack.HtmlNode node = href.SelectSingleNode(strCite); // this.txtResult.AppendText(node.InnerText); //} for (int i = 0; i < hrefList.Count; i++) { HtmlAgilityPack.HtmlNode curSpanNode = hrefList[i]; //bool bl = new CheckBox().Checked; //if (bl) //{ // //勾选 // HtmlAgilityPack.HtmlNodeCollection curImageNode = curSpanNode.SelectNodes(".//cite"); //} //else //{ // //不勾选 // HtmlAgilityPack.HtmlNode curImageNode = curSpanNode.SelectSingleNode(".//cite"); //} HtmlAgilityPack.HtmlNode curImageNode = curSpanNode.SelectSingleNode(strCite); if (curImageNode == null || curImageNode.InnerText == "") { continue; } alistURL.Add(OpearURL(curImageNode.InnerText)); //this.txtResult.AppendText(curImageNode.InnerText + Environment.NewLine); //HtmlAgilityPack.HtmlNode curLinkNode = curSpanNode.SelectSingleNode("a"); //ImageInfo image = new ImageInfo(); //image.Title = curLinkNode.InnerText; //image.SrcPath = curImageNode.Attributes["src"].Value; imageList.Add(image); } } return(alistURL); }
//********************************************************************************************* //********************************************************************************************* //********************************************************************************************* public List <string> ExtractLinks(Regex filter = null) { HtmlAgilityPack.HtmlNodeCollection ns = HtmlDocument.DocumentNode.SelectNodes("/./a"); if (filter == null) { return((from n in ns select n.Attributes["href"].Value).ToList()); } return((from n in ns where filter.IsMatch(n.Attributes["href"].Value) select n.Attributes["href"].Value).ToList()); }
private uint GetLastestComicID() { HtmlAgilityPack.HtmlDocument archivePage = new HtmlAgilityPack.HtmlDocument(); HtmlAgilityPack.HtmlWeb web = new HtmlAgilityPack.HtmlWeb(); archivePage = web.Load(XKCD_URL + "archive/"); HtmlAgilityPack.HtmlNodeCollection items = archivePage.DocumentNode.SelectNodes("//*[@id='middleContainer']/a"); // Last comic ID = comic count return((uint)items.Count); }
private List <VideoInfo> GetPageVideos(RssLink category, String pageUrl) { List <VideoInfo> pageVideos = new List <VideoInfo>(); if (String.IsNullOrEmpty(pageUrl) && (category.Other != null)) { HtmlAgilityPack.HtmlNode root = (HtmlAgilityPack.HtmlNode)category.Other; HtmlAgilityPack.HtmlNodeCollection shows = root.SelectNodes(".//div[contains(@class, 'article-default')]"); foreach (var show in shows) { HtmlAgilityPack.HtmlNode linkNode = show.SelectSingleNode(".//h3/a"); HtmlAgilityPack.HtmlNode thumbNode = show.SelectSingleNode(".//img"); VideoInfo videoInfo = new VideoInfo() { Thumb = Utils.FormatAbsoluteUrl(thumbNode.Attributes["src"].Value, ApetitTvUtil.baseUrl), Title = linkNode.InnerText, VideoUrl = Utils.FormatAbsoluteUrl(linkNode.Attributes["href"].Value, ApetitTvUtil.baseUrl) }; pageVideos.Add(videoInfo); } } else if (!String.IsNullOrEmpty(pageUrl)) { this.nextPageUrl = String.Empty; String baseWebData = GetWebData(pageUrl, forceUTF8: true); HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument(); document.LoadHtml(baseWebData); HtmlAgilityPack.HtmlNodeCollection shows = document.DocumentNode.SelectNodes(".//div[contains(@class, 'article-default')]"); foreach (var show in shows) { HtmlAgilityPack.HtmlNode linkNode = show.SelectSingleNode(".//h3/a"); HtmlAgilityPack.HtmlNode thumbNode = show.SelectSingleNode(".//img"); VideoInfo videoInfo = new VideoInfo() { Thumb = Utils.FormatAbsoluteUrl(thumbNode.Attributes["src"].Value, ApetitTvUtil.baseUrl), Title = linkNode.InnerText, VideoUrl = Utils.FormatAbsoluteUrl(linkNode.Attributes["href"].Value, ApetitTvUtil.baseUrl) }; pageVideos.Add(videoInfo); } HtmlAgilityPack.HtmlNode nextPageLink = document.DocumentNode.SelectSingleNode(".//li[@class='pager-next']/a"); this.nextPageUrl = (nextPageLink == null) ? this.nextPageUrl : Utils.FormatAbsoluteUrl(System.Web.HttpUtility.HtmlDecode(nextPageLink.Attributes["href"].Value), pageUrl); } return(pageVideos); }
private void getPersonalInfor(HtmlAgilityPack.HtmlNodeCollection nodes, string fileName) { string Idx = Path.GetFileNameWithoutExtension(fileName); if (Idx == "1") { Idx = ""; } foreach (HtmlAgilityPack.HtmlNode node in nodes) { string url = node.GetAttributeValue("href", ""); if (url.StartsWith("mailto")) { if (node.InnerHtml.IndexOf("@") > -1 && node.InnerHtml.IndexOf("admin") < 0) { string[] data = new string[9]; data[0] = dgvData.Rows.Count.ToString(); data[6] = node.InnerHtml; HtmlAgilityPack.HtmlNode node2 = nodes[nodes[node] - 1]; data[1] = node2.GetAttributeValue("name", ""); string link = string.Format("http://www.minami-osaka.net/jobs{1}.htm#{0}", data[1], Idx); foreach (HtmlAgilityPack.HtmlNode subnode in nodes) { string url2 = subnode.GetAttributeValue("href", ""); if (url2 == link) { HtmlAgilityPack.HtmlNode tdnode = subnode.ParentNode; if (tdnode.Name.ToUpper() != "TD") { tdnode = tdnode.ParentNode; } HtmlAgilityPack.HtmlNode trnode = tdnode.ParentNode; if (tdnode.Name.ToUpper() == "TR") { data[2] = tdnode.ChildNodes[5].InnerText; data[3] = tdnode.ChildNodes[3].InnerText; data[4] = tdnode.ChildNodes[7].InnerText; data[8] = tdnode.ChildNodes[9].InnerText.Replace(" ", "").Replace("\n", "") + " " + tdnode.ChildNodes[11].InnerText.Replace(" ", "").Replace("\n", ""); } else { data[2] = trnode.ChildNodes[5].InnerText; data[3] = trnode.ChildNodes[3].InnerText; data[4] = trnode.ChildNodes[7].InnerText; data[8] = trnode.ChildNodes[9].InnerText.Replace(" ", "").Replace("\n", "") + " " + trnode.ChildNodes[11].InnerText.Replace(" ", "").Replace("\n", ""); } break; } } dgvData.Rows.Add(data); } } Application.DoEvents(); } }
protected virtual void ParseThreadPage(String url, String doc, out Int32 lastPageNumber, out DateTimeOffset serverTime, ref Posts postList) { Int32 threadId = VBulletinForum.ThreadIdFromUrl(url); lastPageNumber = 0; var html = new HtmlAgilityPack.HtmlDocument(); html.LoadHtml(doc); HtmlAgilityPack.HtmlNode root = html.DocumentNode; serverTime = DateTime.Now; //(//div[class="smallfont", align="center'])[last()] All times are GMT ... The time is now <span class="time">time</span>"." HtmlAgilityPack.HtmlNode timeNode = root.SelectNodes("//div[@class='smallfont'][@align='center']/span[@class='time']/..").Last(); if (timeNode != null) { String timeText = timeNode.InnerText; serverTime = Utils.Misc.ParsePageTime(timeText, DateTime.UtcNow); } // find total posts: /table/tr[1]/td[2]/div[@class="pagenav"]/table[1]/tr[1]/td[1] -- Page 106 of 106 HtmlAgilityPack.HtmlNode pageNode = root.SelectSingleNode("//div[@class='pagenav']/table/tr/td"); if (pageNode != null) { string pages = pageNode.InnerText; Match m = Regex.Match(pages, @"Page (\d+) of (\d+)"); if (m.Success) { //Trace.TraceInformation("{0}/{1}", m.Groups[1].Value, m.Groups[2].Value); lastPageNumber = Convert.ToInt32(m.Groups[2].Value); } } // //div[@id='posts']/div/div/div/div/table/tbody/tr[2] // td[1]/div[1] has (id with post #, <a> with user id, user name.) // td[2]/div[1] has title // td[2]/div[2] has post // "/html[1]/body[1]/table[2]/tr[2]/td[1]/td[1]/div[2]/div[1]/div[1]/div[1]/div[1]/table[1]/tr[2]/td[2]/div[2]" is a post HtmlAgilityPack.HtmlNodeCollection posts = root.SelectNodes("//div[@id='posts']//div[contains(@id, 'edit')]/table/tr[2]/td[2]/div[contains(@id, 'post_message_')]"); if (posts == null) { return; } postList = new Posts(); foreach (HtmlAgilityPack.HtmlNode post in posts) { Post p = HtmlToPost(threadId, post, serverTime); if (p != null) { postList.Add(p); } } }
/// <summary> /// Get list string from google.com by 1 xpath(regex) /// </summary> /// <param name="keywordsearch">từ khóa</param> /// <param name="xpath"></param> /// <param name="page">trang</param> /// <returns></returns> public static List <string> GetListStringFromGoogle(string keywordsearch, string xpath, int page) { //Random r = new Random(); //Thread.Sleep(r.Next(15000, 20000)); string url = ""; if (page == 0) { url = "https://www.google.com.vn/search?q="; } else { url = string.Format("https://www.google.com.vn/search?q={0}&btnG=T%C3%ACm+v%E1%BB%9Bi+Google&start={1}", System.Web.HttpUtility.UrlEncode(keywordsearch), (page * 10)); } //url += System.Web.HttpUtility.UrlEncode(keywordsearch)+"&start=" + (page * 10); //StringBuilder sb = new StringBuilder(); List <string> listkeywords = new List <string>(); try { Uri urlRoot = new Uri(url, UriKind.RelativeOrAbsolute); HttpWebRequest oReq = (HttpWebRequest)WebRequest.Create(urlRoot); oReq.AllowAutoRedirect = true; //Nếu gặp response code 300 hoặc 309 nó sẽ tự chuyển theo response.header['location'] oReq.UserAgent = @"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36"; oReq.Timeout = 3000; HttpWebResponse resp = (HttpWebResponse)oReq.GetResponse(); //HttpWebResponse resp = (HttpWebResponse)GetResponseNoCache(urlRoot); var encoding = Encoding.GetEncoding(resp.CharacterSet); if (resp.ContentType.StartsWith("text/html", StringComparison.InvariantCultureIgnoreCase)) { HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); var resultStream = resp.GetResponseStream(); doc.Load(resultStream, encoding); #region Get Value HtmlAgilityPack.HtmlNodeCollection node = doc.DocumentNode.SelectNodes(xpath); if (node != null) { foreach (HtmlAgilityPack.HtmlNode item in node) { listkeywords.Add(item.InnerText); } } #endregion resultStream.Close(); } resp.Close(); } catch (Exception ex) { Log.Error("Error: ", ex); Thread.Sleep(1200000); return(null); } return(listkeywords); }
private HtmlAgilityPack.HtmlNodeCollection GetNewlyWarrantRicNode(string uri) { HtmlAgilityPack.HtmlDocument htmlDoc = WebClientUtil.GetHtmlDocument(uri, 180000); var node = htmlDoc.DocumentNode.SelectNodes("html/body/font/table/tbody/tr/td/table/tbody")[0].ChildNodes[1]; HtmlAgilityPack.HtmlNodeCollection ricNodeList = node.SelectNodes("//td/table/tr/td/table/tbody/tr[@class='tr_normal']"); if (ricNodeList == null || ricNodeList.Count == 0) { logger.Log("There's no newly launched Warrant. Please go to " + uri + "to have a check."); } return(ricNodeList); }
/// <summary> /// Gets Image Urls from all of the src attributes from the images node collection passed in /// </summary> /// <param name="htmlNodes"></param> /// <returns>List<string></returns> public List <string> GetImageUrls(HtmlAgilityPack.HtmlNodeCollection htmlNodes) { List <string> imageFiles = new List <string>(); List <string> imageUrls = new List <string>(); foreach (var node in htmlNodes) { imageUrls.Add(node.Attributes["src"].Value); } return(imageUrls); }
/// <summary> /// 获取激光波数 /// </summary> /// <returns></returns> public override double?GetLaserWavelength() { lock (thisLock) { try { double lwn = 0; //return lwn; string htmlstr = DownloadWebPage("http://" + IpAddress + "/config/report.htm"); if (htmlstr == null) { return(null); } HtmlAgilityPack.HtmlDocument dom = new HtmlAgilityPack.HtmlDocument(); dom.LoadHtml(htmlstr); //HtmlAgilityPack.HtmlNode temperT = dom.GetElementbyId("TD"); HtmlAgilityPack.HtmlNodeCollection nodes = dom.DocumentNode.SelectNodes(@"//tbody//tr"); HtmlAgilityPack.HtmlNodeCollection nodeCollection = dom.DocumentNode.SelectNodes(@"//body//fieldset//table"); if (nodeCollection != null) { var item = nodeCollection.FirstOrDefault(p => p.InnerText.Contains("Additional parameters for Cmds"));// && p.InnerText.Contains("ITC*")); if (item != null) { var ip = item.ParentNode.ChildNodes.Last().ChildNodes.FirstOrDefault(p => p.InnerText.Contains("ITC*")); if (ip != null) { var ite = ip.InnerText.Trim().Split('\n').FirstOrDefault(p => p.Contains("ITC")); if (ite != null) { var regex = new System.Text.RegularExpressions.Regex(@"\d*\.\d*|0\.\d*[1-9]\d*$"); //string[] result = new string[] { regex.Match(ite).Value, regex.Replace(ite, "")}; if (!double.TryParse(regex.Match(ite).Value, out lwn)) { ErrorString = "Read LWN Error"; return(null); } } } } } return(lwn); //HtmlAgilityPack.HtmlEntity } catch { ErrorString = "Read LWN Error"; return(null); } } }
private void start() { my_delegate = new add_text(add_text_method); for (int i = this.first_page - 1; i < this.last_page; i++) { string content; if (i == 0) { content = getRequest(UrlTxtBox.Text); } else { content = getRequest(UrlTxtBox.Text + this.param_separator + "PAGEN_5=" + (i + 1)); } HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(content); HtmlAgilityPack.HtmlNodeCollection c = doc.DocumentNode.SelectNodes("//div[@class='mid pt']/h3[@class='h3 mt']/a"); List <string> url_news = new List <string>(); int g = 0; if (c != null) { foreach (HtmlAgilityPack.HtmlNode n in c) { //url_news.Add(n.Attributes["href"].Value); /*if(n.Attributes["href"] != null) * { * string url = n.Attributes["href"].Value; * * rtb_output.Text += url; * }*/ //url_news.Add(n.Attributes["href"].Value); url_news.Add(n.InnerText.Trim() + "\n"); rtb_output.Text += (url_news[g]); g++; } } //rtb_output.Invoke(a_delegate, new object[] { array_unique(rtb_output.Lines) }); //tr.Abort(); } }
public static Dictionary <string, string> FormParams(HtmlAgilityPack.HtmlNode node) { Dictionary <string, string> dicpara = new Dictionary <string, string>(); HtmlAgilityPack.HtmlNodeCollection InputTypeNodeList = node.SelectNodes(".//input[@type='hidden' and @name and @value]"); foreach (HtmlAgilityPack.HtmlNode hidenode in InputTypeNodeList) { string key = hidenode.Attributes["name"].Value; string value = hidenode.Attributes["value"].Value; dicpara.Add(key, value); } return(dicpara); }
private List <string> getElements(string body, string xpath) { List <string> list = new List <string>(); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(body); HtmlAgilityPack.HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes(xpath); foreach (var node in nodes) { list.Add(node.InnerHtml); } return(list); }
private void getColumnList() { columns = row.SelectNodes(".//td"); }
private void GetTargets() { nodes = htmlDom.GetElementbyId("internal_transfer_target_account").ChildNodes; for (int i = 0; i < nodes.Count; i += 2) { accountInfo = nodes[i + 1].InnerText.Split(' '); targets.Add(accountInfo[0], nodes[i].GetAttributeValue("value", "")); } }
private void GetAccounts() { nodes = htmlDom.GetElementbyId("sel_bills2").Element("optgroup").ChildNodes; for (int i = 0; i < nodes.Count; i += 3) { accountInfo = nodes[i + 1].InnerText.Split(' '); accounts.Add(accountInfo[0], nodes[i].GetAttributeValue("value", "")); accountTypes.Add(accountInfo[0], accountInfo[1].Contains("cent")); accountBox1.Items.Add(accountInfo[0]); } }
private void Transfer() { amount = balance - (baseConst * (centAcc ? 100 : 1)); NameValueCollection transferData = new NameValueCollection(); transferData["internal_transfer[password]"] = passwords[account1]; transferData["internal_transfer[amount]"] = amount.ToString(); transferData["internal_transfer[target_account]"] = targets[account2]; transferData["internal_transfer[_token]"] = htmlDom.GetElementbyId("internal_transfer__token").GetAttributeValue("value", ""); html = Http.Post(url, transferData); htmlDom.LoadHtml(html); nodes = htmlDom.DocumentNode.SelectNodes("//div[@class='form-error help error']"); if (nodes != null) { string errors = ""; foreach (var node in nodes) errors += node.InnerText + "\n"; throw new Exception(errors); } string transfered = Math.Round(amount / (centAcc ? 100 : 1), 2).ToString(); message = string.Format("Переведено {0} USD с счёта {1} на счёт {2}", transfered, account1, account2); worker.ReportProgress(0, false); }
private void extractTimetableRows() { extractTimetableNode(); timetableRows = timetableNode.SelectNodes(".//tr"); }