override public Page DownloadPage(Uri link) { HtmlDocument doc = this.GetHtmlDocumentFromLink(link); KeyValuePair <string, List <string> > baseInfo = GetTitleAndKeywords(doc); List <string> authors = new List <string>(); HtmlNode pretexNode = doc.DocumentNode.SelectSingleNode("//div[@class='clanek']/div[@class='perex']"); HtmlNodeCollection paragraphs = doc.DocumentNode.SelectNodes("//div[@class='clanek']/p"); if (paragraphs == null) { throw new NullReferenceException(link + " doesn't contain paragraphs!!!"); } HtmlNode authorNode = doc.DocumentNode.SelectSingleNode("//div[@class='clanek']/p[@class='clanek-autor']"); if (authorNode != null) { paragraphs.Remove(authorNode); string authorsString = authorNode.InnerText; authorsString = authorsString.Substring(authorsString.IndexOf(':') + 1).Trim(); authors.AddRange(authorsString.Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries).Select(p => p.Trim())); } StringBuilder textBuilder = new StringBuilder(); try { textBuilder.AppendLine(pretexNode.InnerText); } catch (NullReferenceException) {; } foreach (HtmlNode node in paragraphs) { textBuilder.AppendLine(node.InnerText); } Page page = new Page(link.AbsoluteUri, textBuilder.ToString(), baseInfo.Key); page.Keywords = baseInfo.Value; page.Categories = GetCategory(doc); page.Author = authors; page.PublishDate = GetPublishDate(doc); return(page); }
private string GetStock(int stockId) { //指定來源網頁 WebClient url = new WebClient(); MemoryStream ms = new MemoryStream(url.DownloadData("http://tw.stock.yahoo.com/q/q?s=" + stockId)); HtmlDocument doc = new HtmlDocument(); doc.Load(ms, Encoding.GetEncoding("big5")); HtmlDocument hdc = new HtmlDocument(); hdc.LoadHtml( doc.DocumentNode.SelectSingleNode("/html[1]/body[1]/center[1]/table[2]/tr[1]/td[1]/table[1]") .InnerHtml); // 取得個股標頭 HtmlNodeCollection htnode = hdc.DocumentNode.SelectNodes("./tr[1]/th"); htnode.Remove(htnode.FirstOrDefault(item => item.InnerText == "個股資料")); // 取得個股數值 string[] txt = hdc.DocumentNode.SelectSingleNode("./tr[2]").InnerText.Replace("加到投資組合", string.Empty) .Trim().Split('\n'); int i = 0; var result = new StringBuilder(); foreach (HtmlNode nodeHeader in htnode) { var title = i == 0 ? string.Empty : nodeHeader.InnerText + ":"; result.Append($"{title}{txt[i]} \n"); i++; } doc = null; hdc = null; url = null; ms.Close(); return(HttpUtility.HtmlDecode(result.ToString())); }
private async Task LoadHtmlAsync() { if (!await LoadHtmlDocumentAsync()) { return; } HtmlNodeCollection nodes = null; if (htmlDoc != null) { nodes = htmlDoc.DocumentNode.ChildNodes; foreach (var node in nodes.ToArray()) { if (node.NodeType != HtmlNodeType.Element) { nodes.Remove(node); } } } if (WebPage.BlackWhiteList != null) { foreach (var id in WebPage.BlackWhiteList) { if (id == null) { continue; } var line = new HtmlBlackWhiteListItemLine(id.Clone()); line.Deleted += Line_Deleted; stkIdentifies.Children.Add(line); } } tree.ItemTemplate = tree.Resources["htmlTemplate"] as HierarchicalDataTemplate; HtmlNodes = nodes; tree.ItemsSource = HtmlNodes; }
private static List <Raport> GetCompanyRaports(string name) { List <Raport> CompanyRaports = new List <Raport>(); HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create("https://www.biznesradar.pl/raporty-finansowe-rachunek-zyskow-i-strat/" + name + ",Q"); WebResponse response = request.GetResponse(); Stream stream = response.GetResponseStream(); HtmlDocument doc = new HtmlDocument(); doc.Load(stream); HtmlNodeCollection r = doc.DocumentNode.SelectNodes("//table[@class='report-table']"); HtmlNode raport = r[0]; //nazwy kwartałów HtmlNodeCollection rh = raport.SelectNodes("//th[@class='thq h'] | //th[@class='thq h newest']"); foreach (HtmlNode element in rh) { string namen = element.InnerHtml; namen = Regex.Replace(namen, @"\s", ""); CompanyRaports.Add(new Raport(namen)); } HtmlNodeCollection tr = raport.SelectNodes(".//tr "); tr.Remove(0); int i = 0; foreach (HtmlNode element in tr) { HtmlNodeCollection rSpan = element.SelectNodes(".//span[@class='value']/span/span"); int j = 0; if (i == CompanyRaports[j].NbElement) { return(null); } foreach (HtmlNode value in rSpan) { string v = value.InnerHtml; v = Regex.Replace(v, @"\s", ""); CompanyRaports[j].Set(i, Convert.ToInt64(v)); j++; } //System.Console.Write(i); i++; } request = (HttpWebRequest)HttpWebRequest.Create("https://www.biznesradar.pl/wskazniki-wartosci-rynkowej/" + name + ",0"); response = request.GetResponse(); stream = response.GetResponseStream(); doc = new HtmlDocument(); doc.Load(stream); //kurs akcji r = doc.DocumentNode.SelectNodes("//table[@class='report-table']//tr"); List <double> prices = new List <double>(); foreach (var p in r[1].SelectNodes(".//td")) { string price = p.InnerText.Trim(); if (price != "") { try { double pp = Convert.ToDouble(price); prices.Add(pp); } catch (FormatException) { } } } for (int j = 0; j < prices.Count; j++) { CompanyRaports[CompanyRaports.Count - j - 1].Price = prices[prices.Count - j - 1]; } //ilość akcji r = doc.DocumentNode.SelectNodes("//table[@class='report-table']//tr"); List <long> numbers = new List <long>(); foreach (var n in r[2].SelectNodes(".//td")) { string number = n.InnerText.Trim(); if (number != "") { try { number = number.Replace(" ", ""); long nn = Convert.ToInt64(number); numbers.Add(nn); } catch (FormatException) { } } } for (int j = 0; j < numbers.Count; j++) { CompanyRaports[CompanyRaports.Count - j - 1].NumberShares = numbers[numbers.Count - j - 1]; } return(CompanyRaports.GetRange(5, CompanyRaports.Count - 5)); }
//------------------------------------------------------------------------ async private Task <HtmlNodeCollection> GetNodes(string request, int page = 1) { HtmlNodeCollection html_node_collection = null; CookieContainer cookies = new CookieContainer(); cookies.Add(Program.BaseAddress, new Cookie("PHPSESSID", Program.settings.PHPSESSID)); string responseString; string request_temp = request; if (page > 1) { if (request_temp == "") { request_temp += "giveaways/search?page=" + page; } else { request_temp += "&page=" + page; } } using (var handler = new HttpClientHandler() { CookieContainer = cookies }) using (var client = new HttpClient(handler) { BaseAddress = Program.BaseAddress }) { HttpResponseMessage result; try { result = await client.GetAsync(request_temp); result.EnsureSuccessStatusCode(); } catch { MainForm.ShowLoadErrorMessage("Filed to get giveaways"); return(html_node_collection); } responseString = await result.Content.ReadAsStringAsync(); } HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument(); document.LoadHtml(responseString); html_node_collection = document.DocumentNode.SelectNodes("//*[@class=\"giveaway__row-inner-wrap\"]"); List <HtmlNode> remove_list = new List <HtmlNode>(); foreach (var single_node in html_node_collection) { if (single_node.ParentNode.ParentNode.XPath == "/html[1]/body[1]/div[4]/div[1]/div[1]/div[2]/div[1]/div[1]") { remove_list.Add(single_node); continue; } HtmlNode cost_node = single_node.SelectSingleNode(".//*[@class=\"giveaway__heading__thin\"]"); if (CheckNode(cost_node) == false) { remove_list.Add(single_node); continue; } Match m = Regex.Match(cost_node.InnerText, @"\d+"); decimal game_cost = 0; if (m.Success) { game_cost = Convert.ToDecimal(m.Value); if (game_cost < Program.settings.From || game_cost > Program.settings.To) { remove_list.Add(single_node); continue; } } } foreach (var remove in remove_list) { html_node_collection.Remove(remove); } if (html_node_collection.Count == 0) { document.LoadHtml(responseString); HtmlNode result_node = document.DocumentNode.SelectSingleNode("//*[@class=\"pagination__results\"]"); if (CheckNode(result_node) == false || result_node.InnerText == "No results were found.") { return(html_node_collection); } document.LoadHtml(responseString); HtmlNode pages_node = document.DocumentNode.SelectSingleNode("//*[@class=\"pagination__navigation\"]"); if (CheckNode(pages_node) == false) { return(html_node_collection); } else { return(await GetNodes(request, page + 1)); } } return(html_node_collection); }
//private static List<BarInfo> Barinfo_list { get; set; } = new List<BarInfo>(); public static void Do(out List <BarInfo> barinfo_list_list) { barinfo_list_list = new List <BarInfo>(); //while (true) //{ bool EndOfPages = false; int CountOfPages = 1; while (EndOfPages == false) { HtmlDocument doc = new HtmlDocument(); //doc.LoadHtml(getRequest(@"http://gdebar.ru/bars?mainType[0]=3&withFilter=1&p=" + CountOfPages.ToString() + "&fromUrl=/bars")); doc.LoadHtml(Program.getRequest(@"http://gdebar.ru/bars?mainType[0]=3&withFilter=1&p=" + CountOfPages.ToString() + "&fromUrl=/bars")); //http://gdebar.ru/bars?mainType[0]=3&withFilter=1&p=' + str(i) + '&fromUrl=/bars //HtmlNodeCollection l = doc.DocumentNode.SelectNodes("//section[@class = 'catalog__list']"); //Console.WriteLine(doc.DocumentNode.SelectNodes("//div[@class = 'catalog__list']").Count); if (doc.DocumentNode.SelectNodes("//section[@class = 'catalog__list']")[0].InnerText.Contains("По данному запросу заведений не найдено :(")) { EndOfPages = true; Console.WriteLine("stop"); continue; } else { Console.WriteLine("continue"); } //Console.WriteLine(doc.Encoding.EncodingName); //Console.WriteLine(doc); HtmlNodeCollection BarCollection = doc.DocumentNode.SelectNodes("//div[@class = 'place-card__specif']/a"); for (int i = 0; i < BarCollection.Count; i++) { if (!BarCollection[i].InnerText.Contains("\r\n")) { BarCollection.Remove(BarCollection[i]); } } foreach (var item in BarCollection) //тестовый вывод { Console.WriteLine(item.InnerText); // Console.WriteLine(item.Attributes["href"].Value); } Console.WriteLine(BarCollection.Count); foreach (var item in BarCollection) { HtmlDocument doc_2 = new HtmlDocument(); doc_2.LoadHtml(Program.getRequest("http://gdebar.ru" + item.Attributes["href"].Value + "/menu")); HtmlNodeCollection Menu = doc_2.DocumentNode.SelectNodes("//div[@class = 'menu__dish d-flex align-items-center justify-content-between p-2']"); //if (Menu == null) continue; if (doc_2.DocumentNode.SelectNodes("//div[@class = 'text-center alert alert-danger h1']") != null) { continue; } if (Menu == null) { BarInfo info = new BarInfo(); HtmlNodeCollection ff = doc_2.DocumentNode.SelectNodes("//div[@id = 'bar-gallery-main']/div/a"); // отсюда берем ссылки на пикчи if (ff != null) { foreach (var item_3 in ff) { info.PictureLinks.Add(item_3.Attributes["href"].Value); } } else { info.PictureLinks.Add(null); } if (doc_2.DocumentNode.SelectNodes("//div[@class = 'dropdown-menu dropdown-menu--tooltip']/a") != null) { HtmlNodeCollection subwayy = doc_2.DocumentNode.SelectNodes("//div[@class = 'dropdown-menu dropdown-menu--tooltip']/a"); List <string> twmm = new List <string>(); foreach (var item_2 in subwayy) { twmm.Add(item_2.InnerText.Replace("\n", "").Trim().Split(" ")[0]); } info.NearSubway = twmm.ToArray(); } else if (doc_2.DocumentNode.SelectNodes("//div[@class = 'metro d-flex align-items-start pl-4 mt-2 flex-wrap']") != null) { List <string> twm = new List <string>(); HtmlNode temp = doc_2.DocumentNode.SelectNodes("//div[@class = 'metro d-flex align-items-start pl-4 mt-2 flex-wrap']")[0]; string sub = temp.InnerText.Replace("\r\n", "").Trim().Split(" ")[0]; twm.Add(sub); info.NearSubway = twm.ToArray(); } else { info.NearSubway = new string[] { "отсутствует" }; } if (doc_2.DocumentNode.SelectNodes("//a[@class = 'fancybox3']")[0].InnerText.ToLower().Contains("работает")) { info.WorkTime = doc_2.DocumentNode.SelectNodes("//a[@class = 'fancybox3']")[0].InnerText.Replace("\r\n", "").Split("работает ")[1].Replace(" ", ""); } else { info.WorkTime = "время работы неизвестно"; } if (doc_2.DocumentNode.SelectNodes("//a[@class = 'roistat-phone']") != null) { info.Phone = doc_2.DocumentNode.SelectNodes("//a[@class = 'roistat-phone']")[0].InnerText.Trim(); } else { info.Phone = doc_2.DocumentNode.SelectNodes("//div[@class = 'phone bar__main--info__line d-flex align-items-center justify-content-start mb-4 w-100 flex-nowrap']")[0].InnerText.Trim(); } string add = doc_2.DocumentNode.SelectNodes("//span[@class = 'font-weight-light mr-0']")[0].InnerText.Trim(); List <string> poss = Yandex.Yandex.GetPos(Apikey, add); Console.WriteLine("------------------------------------------------------------------------------------------------------------------------------------------------------"); Console.WriteLine(poss[0].Split(" ")[1]); Console.WriteLine("---------------------------------------------------"); info.Lat = Convert.ToDouble(poss[0].Split(" ")[1].Replace(".", ",")); //широта info.Lng = Convert.ToDouble(poss[0].Split(" ")[0].Replace(".", ",")); //долгота info.BarName = item.InnerText.Trim(); if (info.BarName.Contains('ё')) { info.BarName.Replace("ё", "е"); } info.HasMenu = false; barinfo_list_list.Add(info); continue; } BarInfo barinfo = new BarInfo(); MenuItems menuitems = new MenuItems(); //barinfo.NearSubway[0] = doc_2.DocumentNode.SelectNodes("//div[@class = 'metro d-flex align-items-start pl-4 mt-2 flex-wrap']/div"); //HtmlNodeCollection temp = doc_2.DocumentNode.SelectNodes("//div[@class = 'metro d-flex align-items-start pl-4 mt-2 flex-wrap']"); HtmlNodeCollection subway = doc_2.DocumentNode.SelectNodes("//div[@class = 'dropdown-menu dropdown-menu--tooltip']/a"); Console.WriteLine(item.InnerText.Trim()); if (doc_2.DocumentNode.SelectNodes("//div[@class = 'dropdown-menu dropdown-menu--tooltip']/a") != null) { subway = doc_2.DocumentNode.SelectNodes("//div[@class = 'dropdown-menu dropdown-menu--tooltip']/a"); List <string> twm = new List <string>(); foreach (var item_2 in subway) { twm.Add(item_2.InnerText.Replace("\n", "").Trim().Split(" ")[0]); } barinfo.NearSubway = twm.ToArray(); } else if (doc_2.DocumentNode.SelectNodes("//div[@class = 'metro d-flex align-items-start pl-4 mt-2 flex-wrap']") != null) { List <string> twm = new List <string>(); HtmlNode temp = doc_2.DocumentNode.SelectNodes("//div[@class = 'metro d-flex align-items-start pl-4 mt-2 flex-wrap']")[0]; string sub = temp.InnerText.Replace("\r\n", "").Trim().Split(" ")[0]; twm.Add(sub); barinfo.NearSubway = twm.ToArray(); } else { barinfo.NearSubway = new string[] { "отсутствует" }; } //HtmlNode tttt = doc_2.DocumentNode.SelectNodes("//div[@class = 'metro d-flex align-items-start pl-4 mt-2 flex-wrap']")[0]; foreach (var item_2 in Menu) { //barinfo = new BarInfo(); menuitems = new MenuItems(); HtmlNode subtitle_path_2 = item_2.ParentNode.ParentNode.ParentNode; //сладкая вода - не обязательная subtitle_2 HtmlNode subtitle_path = subtitle_path_2.ParentNode.ParentNode; //вода HtmlNode title_path = subtitle_path.ParentNode.ParentNode; //бар //Console.WriteLine(name_path.InnerText); //Console.WriteLine(subtitle_path.InnerText); menuitems.BarName = item.InnerText.Replace("\r", "").Replace("\n", "").Trim(); if (menuitems.BarName.Contains('ё')) { menuitems.BarName.Replace("ё", "е"); } //if (title_path.Name == "parent") Console.WriteLine("1"); if (title_path.GetAttributeValue("class", "") == "parent") { if (title_path.ChildNodes[0].InnerText.Contains(" (")) { menuitems.Title = title_path.ChildNodes[0].InnerText.Split(" (")[0]; //главная принадлежность } else { menuitems.Title = title_path.ChildNodes[0].InnerText; } Console.WriteLine(title_path.ChildNodes[0].InnerText); } if (subtitle_path.ChildNodes[0].InnerText.Contains(" (")) { menuitems.Subtitle = subtitle_path.ChildNodes[0].InnerText.Split(" (")[0]; //вторичная принадлежность } else { menuitems.Subtitle = subtitle_path.ChildNodes[0].InnerText; } if (subtitle_path_2.ChildNodes[0].ChildNodes[0].InnerText.Contains(" (")) { menuitems.Subtitle_2 = subtitle_path_2.ChildNodes[0].ChildNodes[0].InnerText.Split(" (")[0]; // если существует, то третичная } else { menuitems.Subtitle_2 = subtitle_path_2.ChildNodes[0].ChildNodes[0].InnerText; } Console.WriteLine(subtitle_path.ChildNodes[0].InnerText); Console.WriteLine(subtitle_path_2.ChildNodes[0].ChildNodes[0].InnerText); HtmlNodeCollection childrens = item_2.ChildNodes; //Console.WriteLine(childrens.Count); menuitems.Dish = childrens[0].ChildNodes[0].InnerText; menuitems.Price = Convert.ToInt32(childrens[1].InnerText.Split(" ")[0]); Console.WriteLine("блюдо - " + childrens[0].ChildNodes[0].InnerText); Console.WriteLine("цена - " + childrens[1].InnerText); Menuitems_list.Add(menuitems); //Console.WriteLine(subtitle_path.ChildNodes[0].InnerText); //Console.WriteLine(name_path.ChildNodes[0].ChildNodes[0].InnerText); //Console.WriteLine(title_path.ChildNodes[0].InnerText); //Console.WriteLine(name_path.Name); //Console.WriteLine(subtitle_path.Name); //Console.WriteLine(title_path.OriginalName); } Console.WriteLine("-"); //Console.WriteLine(item); //doc_2.LoadHtml(getRequest("http://gdebar.ru" + item.Attributes["href"].Value)); HtmlNodeCollection f = doc_2.DocumentNode.SelectNodes("//div[@id = 'bar-gallery-main']/div/a"); // отсюда берем ссылки на пикчи foreach (var item_3 in f) { barinfo.PictureLinks.Add(item_3.Attributes["href"].Value); } if (doc_2.DocumentNode.SelectNodes("//a[@class = 'fancybox3']")[0].InnerText.ToLower().Contains("работает")) { barinfo.WorkTime = doc_2.DocumentNode.SelectNodes("//a[@class = 'fancybox3']")[0].InnerText.Replace("\r\n", "").Split("работает ")[1].Replace(" ", ""); } else { barinfo.WorkTime = "отсутсвует"; } if (doc_2.DocumentNode.SelectNodes("//a[@class = 'roistat-phone']") != null) { barinfo.Phone = doc_2.DocumentNode.SelectNodes("//a[@class = 'roistat-phone']")[0].InnerText.Trim(); } else { barinfo.Phone = doc_2.DocumentNode.SelectNodes("//div[@class = 'phone bar__main--info__line d-flex align-items-center justify-content-start mb-4 w-100 flex-nowrap']")[0].InnerText.Trim(); } string address = doc_2.DocumentNode.SelectNodes("//span[@class = 'font-weight-light mr-0']")[0].InnerText.Trim(); List <string> pos = Yandex.Yandex.GetPos(Apikey, address); Console.WriteLine("------------------------------------------------------------------------------------------------------------------------------------------------------"); Console.WriteLine(pos[0].Split(" ")[1]); Console.WriteLine("---------------------------------------------------"); barinfo.Lat = Convert.ToDouble(pos[0].Split(" ")[1].Replace(".", ",")); //широта barinfo.Lng = Convert.ToDouble(pos[0].Split(" ")[0].Replace(".", ",")); //долгота Console.WriteLine("dddddddddddddddd - " + barinfo.Lat); barinfo.BarName = item.InnerText.Trim(); if (barinfo.BarName.Contains('ё')) { barinfo.BarName.Replace("ё", "е"); } barinfo.HasMenu = true; barinfo_list_list.Add(barinfo); } Console.WriteLine(CountOfPages); CountOfPages += 1; Thread.Sleep(2000); } //запись в бд // ClearBD("barinfo"); //ClearBD("menuitems"); //PastIntoBD(barinfo_list_list, Menuitems_list); Console.WriteLine("--------------------------------------------------------------------"); //Thread.Sleep(50000); //86400000 - это сутки //} }
override public Page DownloadPage(Uri link) { HtmlDocument doc = this.GetHtmlDocumentFromLink(link); KeyValuePair <string, List <string> > baseInfo = GetTitleAndKeywords(doc); List <string> authors = new List <string>(); HtmlNode mainnode = doc.DocumentNode.SelectSingleNode("//article"); if (mainnode == null) { mainnode = doc.DocumentNode.SelectSingleNode("//div[@class='article ']"); } //HtmlNode pretexNode = mainnode.SelectSingleNode("//h4"); HtmlNode pretextNode = mainnode.SelectSingleNode("//section[@class='detailViewIntro']"); HtmlNodeCollection paragraphs = mainnode.SelectNodes("//section[@class='detailViewContent']/p"); if (paragraphs == null) { throw new NullReferenceException(link + " doesn't contain paragraph!!!"); } HtmlNode authorNode = mainnode.SelectSingleNode("//p[@class='contentAuthor']"); if (authorNode != null) { try { paragraphs.Remove(authorNode); foreach (HtmlNode n in authorNode.SelectNodes("//span[@itemprop='name']")) { authors.Add(n.InnerText.Trim()); } } catch (ArgumentOutOfRangeException) {; } } StringBuilder textBuilder = new StringBuilder(); if (pretextNode != null) { textBuilder.AppendLine(pretextNode.InnerText.Trim()); } foreach (HtmlNode p in paragraphs) { if (p.InnerText.Trim() != "") { textBuilder.AppendLine(p.InnerText.Trim()); } } Page page = new Page(link.AbsoluteUri, textBuilder.ToString(), baseInfo.Key); page.Keywords = baseInfo.Value; page.Categories = GetCategory(doc); page.Author = authors; page.PublishDate = GetPublishDate(doc); return(page); throw new NotImplementedException(); }
private void getBaiduCreative(string key) { List <string> BDurl = new List <string>(); for (int i = 0; i < bdpage; i++) { BDurl.Add("http://www.baidu.com/s?wd=" + key + "&pn=" + (i * 10).ToString() + "&ie=utf-8&usm=4"); } for (int j = 0; j < BDurl.Count; j++) { restart: //OutDelegateSim simdelegate = new OutDelegateSim(OutTextSim); //this.Dispatcher.BeginInvoke(simdelegate, new object[] { BDurl[j] }); //Thread.Sleep(5000); //string bd_source = htmlSim; //MessageBox.Show(htmlSim); string bd_source = GetWebPageSource(BDurl[j]); //被屏蔽的时候 if (bd_source.Contains("很抱歉,您的请求暂时无法响应!")) { MessageBox.Show("对不起!在点击确定之前解除百度屏蔽!"); Thread.Sleep(30000); goto restart; } HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(bd_source); try { //采集左边排名 if (leftrank) { int bdbig = 1; HtmlNode BDLeftHN = doc.GetElementbyId("content_left"); string bdlefthtml = "<!doctype html><html><head><title>baidu</title></head><body>" + BDLeftHN.InnerHtml + "</body></html>"; HtmlDocument docright = new HtmlDocument(); docright.LoadHtml(bdlefthtml); HtmlNodeCollection LeftNodes = docright.DocumentNode.SelectNodes("/html/body/div"); foreach (HtmlNode Node in LeftNodes) { if (!isContains(Node.OuterHtml)) { string Nodehtml = "<!doctype html><html><head><title>baidu</title></head><body>" + Node.InnerHtml + "</body></html>"; HtmlDocument html_node = new HtmlDocument(); html_node.LoadHtml(Nodehtml); HtmlNodeCollection hncNode = html_node.DocumentNode.SelectNodes("/html/body/div"); if (hncNode.Count == 3) { string bdlefthtml_node = "<!doctype html><html><head><title>baidu</title></head><body>" + Node.InnerHtml + "</body></html>"; HtmlDocument docleft_node = new HtmlDocument(); docleft_node.LoadHtml(bdlefthtml_node); string title = docleft_node.DocumentNode.SelectSingleNode("/html/body/div").InnerText; string desc1 = docleft_node.DocumentNode.SelectSingleNode("/html/body/div[2]").InnerText.Replace(" ", " "); string desc2 = ""; string biddomain = docleft_node.DocumentNode.SelectSingleNode("/html/body/div[3]/span").InnerText; string hospital = Common.MatchURL(Node.InnerHtml, "data-renzheng=\"{title:'", ":'"); //dt1.Rows.Add(new object[10] { id++, key, rule, "Baidu", "左" + (bdbig++).ToString(), format(title), format(desc1), desc2, biddomain, hospital }); if (rule.Trim() == "") { dt1.Rows.Add(new object[10] { id++, key, rule, "Baidu", "左" + (bdbig++).ToString(), format(title), format(desc1), desc2, biddomain, hospital }); } else { if (mode == "NameMode" && MatchRule(hospital)) { dt1.Rows.Add(new object[10] { id++, key, rule, "Baidu", "左" + (bdbig++).ToString(), format(title), format(desc1), desc2, biddomain, hospital }); } else if (mode == "DomainMode" && MatchRule(biddomain)) { dt1.Rows.Add(new object[10] { id++, key, rule, "Baidu", "左" + (bdbig++).ToString(), format(title), format(desc1), desc2, biddomain, hospital }); } } } else if (hncNode.Count == 4) { string bdlefthtml_node = "<!doctype html><html><head><title>baidu</title></head><body>" + Node.InnerHtml + "</body></html>"; HtmlDocument docleft_node = new HtmlDocument(); docleft_node.LoadHtml(bdlefthtml_node); string title = docleft_node.DocumentNode.SelectSingleNode("/html/body/div").InnerText; string desc1 = docleft_node.DocumentNode.SelectSingleNode("/html/body/div[2]").InnerText.Replace(" ", " "); string desc2 = docleft_node.DocumentNode.SelectSingleNode("/html/body/div[3]").InnerText.Replace(" ", " "); string biddomain = docleft_node.DocumentNode.SelectSingleNode("/html/body/div[4]/span").InnerText; string hospital = Common.MatchURL(Node.InnerHtml, "data-renzheng=\"{title:'", ":'"); //dt1.Rows.Add(new object[10] { id++, key, rule, "Baidu", "左" + (bdbig++).ToString(), format(title), format(desc1), format(desc2), biddomain, hospital }); if (rule.Trim() == "") { dt1.Rows.Add(new object[10] { id++, key, rule, "Baidu", "左" + (bdbig++).ToString(), format(title), format(desc1), format(desc2), biddomain, hospital }); } else { if (mode == "NameMode" && MatchRule(hospital)) { dt1.Rows.Add(new object[10] { id++, key, rule, "Baidu", "左" + (bdbig++).ToString(), format(title), format(desc1), format(desc2), biddomain, hospital }); } else if (mode == "DomainMode" && MatchRule(biddomain)) { dt1.Rows.Add(new object[10] { id++, key, rule, "Baidu", "左" + (bdbig++).ToString(), format(title), format(desc1), format(desc2), biddomain, hospital }); } } } } } } //采集右边排名 if (rightrank) { int bdbig = 1; HtmlNode BDRightHN = doc.GetElementbyId("ec_im_container"); string bdrighthtml = "<!doctype html><html><head><title>baidu</title></head><body>" + BDRightHN.InnerHtml + "</body></html>"; HtmlDocument docright = new HtmlDocument(); docright.LoadHtml(bdrighthtml); HtmlNodeCollection RightNodes = docright.DocumentNode.SelectNodes("/html/body/div"); RightNodes.Remove(0); foreach (HtmlNode Node in RightNodes) { string bdrighthtml_node = "<!doctype html><html><head><title>baidu</title></head><body>" + Node.InnerHtml + "</body></html>"; HtmlDocument docright_node = new HtmlDocument(); docright_node.LoadHtml(bdrighthtml_node); string title = docright_node.DocumentNode.SelectSingleNode("/html/body/a").InnerText; string desc1 = docright_node.DocumentNode.SelectSingleNode("/html/body/a[2]").InnerText; string desc2 = ""; string biddomain = docright_node.DocumentNode.SelectSingleNode("/html/body/a[2]/font[2]").InnerText; string hospital = Common.MatchURL(Node.InnerHtml, "data-renzheng=\"{title:'", ":'"); if (rule.Trim() == "") { dt1.Rows.Add(new object[10] { id++, key, rule, "Baidu", "右" + (bdbig++).ToString(), format(title), format(desc1.Replace(biddomain, "")), desc2, biddomain, hospital }); } else { if (mode == "NameMode" && MatchRule(hospital)) { dt1.Rows.Add(new object[10] { id++, key, rule, "Baidu", "右" + (bdbig++).ToString(), format(title), format(desc1.Replace(biddomain, "")), desc2, biddomain, hospital }); } else if (mode == "DomainMode" && MatchRule(biddomain)) { dt1.Rows.Add(new object[10] { id++, key, rule, "Baidu", "右" + (bdbig++).ToString(), format(title), format(desc1.Replace(biddomain, "")), desc2, biddomain, hospital }); } } } } } catch { } } Thread.Sleep(2000); y++; OutDelegateNew outdelegate = new OutDelegateNew(OutTextNew); this.Dispatcher.BeginInvoke(outdelegate, new object[] { key, "Baidu" }); }