private void getBaiduDownword(string key) { string url = "http://suggestion.baidu.com/su?wd=" + System.Net.WebUtility.UrlEncode(key) + "&p=3&t=1273278850500"; string filename = "downword\\su.txt"; if (File.Exists(filename)) { File.Delete(filename); } SaveFileFromUrl(filename, url); if (File.Exists(filename)) { string sufile = File.ReadAllText(filename, System.Text.Encoding.Default); string[] su = sufile.Split('[')[1].Split(']')[0].Split(','); foreach (string item in su) { dt1.Rows.Add(new object[4] { id++, key, item.Replace("\"", ""), "Baidu" }); } } y++; OutDelegateNew outdelegate = new OutDelegateNew(OutTextNew); this.Dispatcher.BeginInvoke(outdelegate, new object[] { key, "Baidu" }); }
private void getSoDownword(string key) { string url = "http://sug.so.360.cn/suggest?callback=suggest_so&encodein=utf-8&encodeout=utf-8&format=json&fields=word,obdata&word=" + key; string source = getHtmlInfo(url); string[] su = source.Split('[')[1].Split(']')[0].Split(','); foreach (string item in su) { string str = item.Trim().Replace("{\"word\":\"", "").Replace("\"}", ""); if (MatchInclude(str) && MatchNoInclude(str) && MatchDataTable(str)) { dt1.Rows.Add(new object[4] { id++, key, str, "So" }); //if (iscycle == true) //{ // OutDelegateAdd adddelegate = new OutDelegateAdd(AddTvKey); // this.Dispatcher.BeginInvoke(adddelegate, new object[] { str, "So" }); //} OutDelegateAddO adddelegate = new OutDelegateAddO(OutTextAddO); this.Dispatcher.BeginInvoke(adddelegate, new object[4] { id, str, key, "So" }); } } y++; OutDelegateNew outdelegate = new OutDelegateNew(OutTextNew); this.Dispatcher.BeginInvoke(outdelegate, new object[] { key, "So" }); Thread.Sleep(500); }
public void Go() { for (int i = 0; i < dt1.Rows.Count; i++) { GetPingResult_new(new Person { Id = i, Url = dt1.Rows[i][1].ToString() }); } for (int i = 0; i < forbid_url.Count; i++) { string ip = getIP(forbid_url[i]); foreach (DataRow dr in dt1.Rows) { if (dr[1].ToString() == forbid_url[i]) { dr[2] = ip; dr[3] = "no-ping"; } } y++; OutDelegateNew outdelegate = new OutDelegateNew(OutTextNew); this.Dispatcher.BeginInvoke(outdelegate, new object[] { forbid_url[i], "模式no-ping" }); } foreach (DataRow dr in dt1.Rows) { if (dr[2].ToString() == "") { dr[3] = "error"; } } }
private void GetPingResult_new(object son) { Person person = son as Person; int i = person.Id; string url = person.Url; try { System.Net.NetworkInformation.Ping p1 = new System.Net.NetworkInformation.Ping(); //只是演示,没有做错误处理 PingReply reply = p1.Send(url); //阻塞方式 if (reply.Status == IPStatus.Success) { dt1.Rows[i][2] = reply.Address.ToString(); dt1.Rows[i][3] = "OK"; dt1.Rows[i][4] = reply.RoundtripTime; y++; OutDelegateNew outdelegate = new OutDelegateNew(OutTextNew); this.Dispatcher.BeginInvoke(outdelegate, new object[] { url, "模式ping" }); } else { forbid_url.Add(url); } } catch { forbid_url.Add(url); } }
private void getTaobaoDownword(string key) { string url = "http://suggest.taobao.com/sug?code=utf-8&q=" + System.Net.WebUtility.UrlEncode(key) + "&_ksTS=1412814635814_1642&callback=jsonp1643&k=1&area=c2c&bucketid=7"; string source = getHtmlInfo(url); int startindex = source.IndexOf(":["); int lastindex = source.LastIndexOf("],\"magic\""); string taobao = ""; string[] su = null; try { taobao = source.Substring(startindex + 2, lastindex - startindex - 2).Replace("\",\"", ",").Replace("\"],[\"", ",").Replace("[\"", "").Replace("\"]", ""); su = taobao.Split(','); } catch { } int i = 1; if (su != null) { foreach (string item in su) { if (i % 2 == 1) { string str = Common.StripHT(item.Trim()); if (MatchInclude(str) && MatchNoInclude(str) && MatchDataTable(str)) { dt1.Rows.Add(new object[4] { id++, key, str, "Taobao" }); //if (iscycle == true) //{ // OutDelegateAdd adddelegate = new OutDelegateAdd(AddTvKey); // this.Dispatcher.BeginInvoke(adddelegate, new object[] { str, "Baidu" }); //} OutDelegateAddO adddelegate = new OutDelegateAddO(OutTextAddO); this.Dispatcher.BeginInvoke(adddelegate, new object[4] { id, str, key, "Taobao" }); } } i++; } } y++; OutDelegateNew outdelegate = new OutDelegateNew(OutTextNew); this.Dispatcher.BeginInvoke(outdelegate, new object[] { key, "Taobao" }); Thread.Sleep(500); }
private void getSogouDownword(string key) { string url = "http://w.sugg.sogou.com/sugg/ajaj_json.jsp?key=" + key + "&type=web&ori=yes&pr=web&abtestid=8&ipn="; string source = getHtmlInfo(url); string[] su = null; try { su = source.Split('[')[2].Split(']')[0].Split(','); } catch { } if (su != null) { foreach (string item in su) { string str = item.Trim().Replace("\"", ""); if (MatchInclude(str) && MatchNoInclude(str) && MatchDataTable(str)) { dt1.Rows.Add(new object[4] { id++, key, str, "Sogou" }); //if (iscycle == true) //{ // OutDelegateAdd adddelegate = new OutDelegateAdd(AddTvKey); // this.Dispatcher.BeginInvoke(adddelegate, new object[] { str, "Sogou" }); //} OutDelegateAddO adddelegate = new OutDelegateAddO(OutTextAddO); this.Dispatcher.BeginInvoke(adddelegate, new object[4] { id, str, key, "Sogou" }); } } } y++; OutDelegateNew outdelegate = new OutDelegateNew(OutTextNew); this.Dispatcher.BeginInvoke(outdelegate, new object[] { key, "Sogou" }); Thread.Sleep(500); }
private void getSoRelatedword(string sourcekey) { string pageUrl = "http://www.so.com/s?ie=utf-8&shb=1&src=360sou_newhome&q=" + sourcekey; WebClient wc = new WebClient(); byte[] pageSourceBytes = wc.DownloadData(new Uri(pageUrl)); string pageSource = Encoding.GetEncoding("utf-8").GetString(pageSourceBytes); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(pageSource); HtmlNodeCollection keyNodes = doc.DocumentNode.SelectNodes("//*[@id=\"rs\"]"); Regex href = new Regex(@"<a\s*[^>]*>([\s\S]+?)</a>", RegexOptions.IgnoreCase); try { MatchCollection like_key = href.Matches(keyNodes[0].InnerHtml); foreach (Match ma in like_key) { string str = ma.Groups[1].Value; if (MatchInclude(str) && MatchNoInclude(str) && MatchDataTable(str)) { dt1.Rows.Add(new object[4] { id++, ma.Groups[1].Value, sourcekey, "So" }); OutDelegateNew outdelegate = new OutDelegateNew(OutTextNew); this.Dispatcher.BeginInvoke(outdelegate, new object[4] { id, ma.Groups[1].Value, sourcekey, "So" }); } } } catch { } Thread.Sleep(500); }
private void getBaiduDownword2(string key) { string url = "http://suggestion.baidu.com/su?wd=" + System.Net.WebUtility.UrlEncode(key) + "&sugmode=2&zxmode=1&json=1&p=3"; string source = getHtmlInfo(url); //写入文件 //string filename = "downword\\su.txt"; //StreamWriter sw = new StreamWriter(filename);//文件保存位置 //sw.Write(source); //sw.Close(); string[] su = source.Split('[')[1].Split(']')[0].Split(','); foreach (string item in su) { string str = item.Trim().Replace("\"", ""); if (MatchInclude(str) && MatchNoInclude(str) && MatchDataTable(str)) { dt1.Rows.Add(new object[4] { id++, str, key, "Baidu" }); //if (iscycle == true) //{ // OutDelegateAdd adddelegate = new OutDelegateAdd(AddTvKey); // this.Dispatcher.BeginInvoke(adddelegate, new object[] { str, "Baidu" }); //} OutDelegateAddO adddelegate = new OutDelegateAddO(OutTextAddO); this.Dispatcher.BeginInvoke(adddelegate, new object[4] { id, str, key, "Baidu" }); } } y++; OutDelegateNew outdelegate = new OutDelegateNew(OutTextNew); this.Dispatcher.BeginInvoke(outdelegate, new object[] { key, "Baidu" }); Thread.Sleep(500); }
//public void SuppressScriptErrors(WebBrowser wb, bool Hide) //{ // FieldInfo fiComWebBrowser = typeof(WebBrowser).GetField("_axIWebBrowser2", BindingFlags.Instance | BindingFlags.NonPublic); // if (fiComWebBrowser == null) return; // object objComWebBrowser = fiComWebBrowser.GetValue(wb); // if (objComWebBrowser == null) return; // objComWebBrowser.GetType().InvokeMember("Silent", BindingFlags.SetProperty, null, objComWebBrowser, new object[] { Hide }); //} //void wbSimulation_Navigated(object sender, System.Windows.Navigation.NavigationEventArgs e) //{ // SuppressScriptErrors(wbSimulation, true); //} //private void wbSimulation_LoadCompleted(object sender, System.Windows.Navigation.NavigationEventArgs e) //{ // mshtml.IHTMLDocument2 doc2 = (mshtml.IHTMLDocument2)wbSimulation.Document; // htmlSim = doc2.body.innerHTML; // //MessageBox.Show(htmlSim); //} //public delegate void OutDelegateSim(string sbdurl); //public void OutTextSim(string sbdurl) //{ // wbSimulation.Navigate(sbdurl); //} #endregion #region Sogou排名 private void getSogouRank(string key) { List <string> BDurl = new List <string>(); for (int i = 0; i < bdpage; i++) { BDurl.Add("http://www.sogou.com/web?query=" + System.Net.WebUtility.UrlEncode(key) + "&ie=utf8&cid=null&page=" + (i + 1).ToString() + "&p=40040100&dp=1&w=01029901&dr=1"); } for (int j = 0; j < BDurl.Count; j++) { restartso: string pageSource = GetWebPageSource(BDurl[j]); //MessageBox.Show(pageSource); //被屏蔽的时候 if (pageSource.Contains("document.location.href = ")) { MessageBox.Show("对不起!Sogou已被屏蔽,先切换IP,后确定!"); Thread.Sleep(30000); goto restartso; } MatchCollection htmlSeg = Common.MatchURLs(pageSource, "<!-- a -->", "<!-- z -->"); //去掉第一个<!-- a -->"),d=b.indexOf("<!-- z --> for (int i = 1; i < htmlSeg.Count; i++) { string html = htmlSeg[i].Value; string titlehtml = Common.MatchURL(html, "<h3", "</h3>"); string title = Common.StripHT("<h3" + titlehtml + "</h3>"); string url = Common.MatchURL(titlehtml, "href=\"", "\""); if (mode == "TitleMode") { if (MatchRule(title) && !MatchDelDomain(url)) { dt1.Rows.Add(new object[7] { id++, key, rule, "Sogou", j * 10 + i, title, url }); } Thread.Sleep(Common.RandomInt(500, 700)); } else if (mode == "DetailMode") { if (MatchRule(html) && !MatchDelDomain(url)) { dt1.Rows.Add(new object[7] { id++, key, rule, "Sogou", j * 10 + i, title, url }); } Thread.Sleep(Common.RandomInt(500, 700)); } else if (mode == "SnapshotMode") { if (url != "") { string pshtml = GetWebPageSource(url); if (MatchRule(pshtml) && !MatchDelDomain(url)) { dt1.Rows.Add(id++, key, rule, "Sogou", j * 10 + i, title, url); } } } } } y++; OutDelegateNew outdelegate = new OutDelegateNew(OutTextNew); this.Dispatcher.BeginInvoke(outdelegate, new object[] { key, "Sogou" }); }
private void getBaiduCreative(string key) { List <string> BDurl = new List <string>(); for (int i = 0; i < bdpage; i++) { BDurl.Add("http://www.baidu.com/s?wd=" + key + "&pn=" + (i * 10).ToString() + "&ie=utf-8&usm=4"); } for (int j = 0; j < BDurl.Count; j++) { restart: //OutDelegateSim simdelegate = new OutDelegateSim(OutTextSim); //this.Dispatcher.BeginInvoke(simdelegate, new object[] { BDurl[j] }); //Thread.Sleep(5000); //string bd_source = htmlSim; //MessageBox.Show(htmlSim); string bd_source = GetWebPageSource(BDurl[j]); //被屏蔽的时候 if (bd_source.Contains("很抱歉,您的请求暂时无法响应!")) { MessageBox.Show("对不起!在点击确定之前解除百度屏蔽!"); Thread.Sleep(30000); goto restart; } HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(bd_source); try { //采集左边排名 if (leftrank) { int bdbig = 1; HtmlNode BDLeftHN = doc.GetElementbyId("content_left"); string bdlefthtml = "<!doctype html><html><head><title>baidu</title></head><body>" + BDLeftHN.InnerHtml + "</body></html>"; HtmlDocument docright = new HtmlDocument(); docright.LoadHtml(bdlefthtml); HtmlNodeCollection LeftNodes = docright.DocumentNode.SelectNodes("/html/body/div"); foreach (HtmlNode Node in LeftNodes) { if (!isContains(Node.OuterHtml)) { string Nodehtml = "<!doctype html><html><head><title>baidu</title></head><body>" + Node.InnerHtml + "</body></html>"; HtmlDocument html_node = new HtmlDocument(); html_node.LoadHtml(Nodehtml); HtmlNodeCollection hncNode = html_node.DocumentNode.SelectNodes("/html/body/div"); if (hncNode.Count == 3) { string bdlefthtml_node = "<!doctype html><html><head><title>baidu</title></head><body>" + Node.InnerHtml + "</body></html>"; HtmlDocument docleft_node = new HtmlDocument(); docleft_node.LoadHtml(bdlefthtml_node); string title = docleft_node.DocumentNode.SelectSingleNode("/html/body/div").InnerText; string desc1 = docleft_node.DocumentNode.SelectSingleNode("/html/body/div[2]").InnerText.Replace(" ", " "); string desc2 = ""; string biddomain = docleft_node.DocumentNode.SelectSingleNode("/html/body/div[3]/span").InnerText; string hospital = Common.MatchURL(Node.InnerHtml, "data-renzheng=\"{title:'", ":'"); //dt1.Rows.Add(new object[10] { id++, key, rule, "Baidu", "左" + (bdbig++).ToString(), format(title), format(desc1), desc2, biddomain, hospital }); if (rule.Trim() == "") { dt1.Rows.Add(new object[10] { id++, key, rule, "Baidu", "左" + (bdbig++).ToString(), format(title), format(desc1), desc2, biddomain, hospital }); } else { if (mode == "NameMode" && MatchRule(hospital)) { dt1.Rows.Add(new object[10] { id++, key, rule, "Baidu", "左" + (bdbig++).ToString(), format(title), format(desc1), desc2, biddomain, hospital }); } else if (mode == "DomainMode" && MatchRule(biddomain)) { dt1.Rows.Add(new object[10] { id++, key, rule, "Baidu", "左" + (bdbig++).ToString(), format(title), format(desc1), desc2, biddomain, hospital }); } } } else if (hncNode.Count == 4) { string bdlefthtml_node = "<!doctype html><html><head><title>baidu</title></head><body>" + Node.InnerHtml + "</body></html>"; HtmlDocument docleft_node = new HtmlDocument(); docleft_node.LoadHtml(bdlefthtml_node); string title = docleft_node.DocumentNode.SelectSingleNode("/html/body/div").InnerText; string desc1 = docleft_node.DocumentNode.SelectSingleNode("/html/body/div[2]").InnerText.Replace(" ", " "); string desc2 = docleft_node.DocumentNode.SelectSingleNode("/html/body/div[3]").InnerText.Replace(" ", " "); string biddomain = docleft_node.DocumentNode.SelectSingleNode("/html/body/div[4]/span").InnerText; string hospital = Common.MatchURL(Node.InnerHtml, "data-renzheng=\"{title:'", ":'"); //dt1.Rows.Add(new object[10] { id++, key, rule, "Baidu", "左" + (bdbig++).ToString(), format(title), format(desc1), format(desc2), biddomain, hospital }); if (rule.Trim() == "") { dt1.Rows.Add(new object[10] { id++, key, rule, "Baidu", "左" + (bdbig++).ToString(), format(title), format(desc1), format(desc2), biddomain, hospital }); } else { if (mode == "NameMode" && MatchRule(hospital)) { dt1.Rows.Add(new object[10] { id++, key, rule, "Baidu", "左" + (bdbig++).ToString(), format(title), format(desc1), format(desc2), biddomain, hospital }); } else if (mode == "DomainMode" && MatchRule(biddomain)) { dt1.Rows.Add(new object[10] { id++, key, rule, "Baidu", "左" + (bdbig++).ToString(), format(title), format(desc1), format(desc2), biddomain, hospital }); } } } } } } //采集右边排名 if (rightrank) { int bdbig = 1; HtmlNode BDRightHN = doc.GetElementbyId("ec_im_container"); string bdrighthtml = "<!doctype html><html><head><title>baidu</title></head><body>" + BDRightHN.InnerHtml + "</body></html>"; HtmlDocument docright = new HtmlDocument(); docright.LoadHtml(bdrighthtml); HtmlNodeCollection RightNodes = docright.DocumentNode.SelectNodes("/html/body/div"); RightNodes.Remove(0); foreach (HtmlNode Node in RightNodes) { string bdrighthtml_node = "<!doctype html><html><head><title>baidu</title></head><body>" + Node.InnerHtml + "</body></html>"; HtmlDocument docright_node = new HtmlDocument(); docright_node.LoadHtml(bdrighthtml_node); string title = docright_node.DocumentNode.SelectSingleNode("/html/body/a").InnerText; string desc1 = docright_node.DocumentNode.SelectSingleNode("/html/body/a[2]").InnerText; string desc2 = ""; string biddomain = docright_node.DocumentNode.SelectSingleNode("/html/body/a[2]/font[2]").InnerText; string hospital = Common.MatchURL(Node.InnerHtml, "data-renzheng=\"{title:'", ":'"); if (rule.Trim() == "") { dt1.Rows.Add(new object[10] { id++, key, rule, "Baidu", "右" + (bdbig++).ToString(), format(title), format(desc1.Replace(biddomain, "")), desc2, biddomain, hospital }); } else { if (mode == "NameMode" && MatchRule(hospital)) { dt1.Rows.Add(new object[10] { id++, key, rule, "Baidu", "右" + (bdbig++).ToString(), format(title), format(desc1.Replace(biddomain, "")), desc2, biddomain, hospital }); } else if (mode == "DomainMode" && MatchRule(biddomain)) { dt1.Rows.Add(new object[10] { id++, key, rule, "Baidu", "右" + (bdbig++).ToString(), format(title), format(desc1.Replace(biddomain, "")), desc2, biddomain, hospital }); } } } } } catch { } } Thread.Sleep(2000); y++; OutDelegateNew outdelegate = new OutDelegateNew(OutTextNew); this.Dispatcher.BeginInvoke(outdelegate, new object[] { key, "Baidu" }); }
//得到360排名 private void getSoRank(string key) { string pageUrl = "http://www.so.com/s?q=" + key; WebClient wc = new WebClient(); byte[] pageSourceBytes = null; try { pageSourceBytes = wc.DownloadData(new Uri(pageUrl)); //wc.DownloadString(pageUrl); } catch (Exception ex) { MessageBox.Show("538行--" + ex.ToString()); return; } string pageSource = Encoding.GetEncoding("utf-8").GetString(pageSourceBytes); //被屏蔽的时候 if (pageSource.Contains("很抱歉,您的请求暂时无法响应!")) { MessageBox.Show("对不起!在点击确定之前解除360屏蔽!"); Thread.Sleep(30000); } MatchCollection htmlSeg = Common.MatchURLs(pageSource, "class=\"res-list\">", "</li>"); for (int n = 0; n < htmlSeg.Count; n++) { string titlehtml = Common.MatchURL(htmlSeg[n].Value, "<h3", "</h3>"); string title = Common.StripHT("<h3" + titlehtml + "</h3>"); string url = Common.MatchURL(titlehtml, "<a href=\"", "\""); string html = htmlSeg[n].Value; if (mode == "TitleMode") { if (MatchRule(title)) { if (n + 1 > 10) { dt1.Rows.Add(new object[7] { id++, key, rule, "So__1", n + 1, title, url }); } else { dt1.Rows.Add(new object[7] { id++, key, rule, "So", n + 1, title, url }); } } } else if (mode == "DetailMode") { if (MatchRule(html)) { if (n + 1 > 10) { dt1.Rows.Add(new object[7] { id++, key, rule, "So__1", n + 1, title, url }); } else { dt1.Rows.Add(new object[7] { id++, key, rule, "So", n + 1, title, url }); } } } else if (mode == "SnapshotMode") { if (url != "") { //WebClient wcso = new WebClient(); //byte[] psbso = null; //try //{ // psbso = wcso.DownloadData(new Uri(url)); //} //catch (Exception ex) //{ // MessageBox.Show("602行--"+ex.ToString()); // continue; //} //string pshtml = GetHtmlEncode(psbso); MessageBox.Show(url.ToString()); string pshtml = GetWebPageSource(url); MessageBox.Show(pshtml); //string pshtml = GetPageData(url,""); //MessageBox.Show(pshtml); //string pshtml = ""; //if (Common.isurl(url)) //{ // pshtml = GetWebPageSource(url); // //MessageBox.Show(pshtml); //} if (MatchRule(pshtml)) { if (n + 1 > 10) { dt1.Rows.Add(new object[7] { id++, key, rule, "So__1", n + 1, title, url }); } else { dt1.Rows.Add(new object[7] { id++, key, rule, "So", n + 1, title, url }); } } Thread.Sleep(Common.RandomInt(400, 700)); } } } //获得360搜索结果页的2-5的url和排名 if (bdpage != 1) { string baiduurls = Common.MatchURL(pageSource, "<div id=\"page\"", "</div>"); MatchCollection mcbaiduurls = Common.MatchURLs(baiduurls, "<a href=\"", "\">"); List <string> BDurl = new List <string>(); for (int i = 0; i < mcbaiduurls.Count; i++) { if (i < bdpage - 1) { BDurl.Add("http://www.so.com/" + mcbaiduurls[i].Value); } } for (int p = 0; p < BDurl.Count; p++) { getSoRank_25(key, p + 2, BDurl[p]); Thread.Sleep(500); } } y++; OutDelegateNew outdelegate = new OutDelegateNew(OutTextNew); this.Dispatcher.BeginInvoke(outdelegate, new object[] { key, "So" }); Thread.Sleep(500); }
//public void Go() //{ // for (int i = 0; i < key.Count; i++) // { // if (baidu) // { // Gather((object)key[i]); // } // if (so) // { // getSoRank(key[i]); // } // if (sogou) // { // //getSogouRank(key[i]); // } // } //} #endregion #region 百度排名 private void Gather(object obj) { string key = obj.ToString(); StreamReader sr; try { HttpWebRequest webRequest = (HttpWebRequest)WebRequest.Create("http://www.baidu.com/s?wd=" + key); HttpWebResponse webResponse = (HttpWebResponse)webRequest.GetResponse(); Stream srtStream = webResponse.GetResponseStream(); //sr = new StreamReader(webResponse.GetResponseStream(), Encoding.GetEncoding("utf-8")); sr = new StreamReader(srtStream, Encoding.GetEncoding("utf-8")); } catch (Exception ee) { MessageBox.Show("对不起!网络没有连接。" + "230行--" + ee.ToString()); return; } finally { } //String totalLines = sr.ReadToEnd(); //避免sr为空 String totalLines = ""; if (sr != null) { totalLines = sr.ReadToEnd(); } //被屏蔽的时候 if (totalLines.Contains("很抱歉,您的请求暂时无法响应!")) { MessageBox.Show("对不起!在点击确定之前解除百度屏蔽!"); Thread.Sleep(30000); } Thread.Sleep(Common.RandomInt(500, 700)); for (int n = 1; n <= 10; n++) { string str = " id=\"" + n.ToString() + "\""; string html = Common.BDSegment(totalLines, n); //totalLines.Substring(totalLines.IndexOf(str), 1592);//需要改进 string title = (Common.StripHT(Common.MatchURL(html, "<h3.*>", "</h3>"))).Trim(); Regex reg = new Regex(@"([a-zA-Z0-9]([a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,6}"); Match mat = reg.Match(Common.StripHT(html)); string url = mat.Value; //if(title != "" && url != "") //{ // dt1.Rows.Add(new object[6] { id, key, rule, n, title,url }); // id++; //} if (mode == "TitleMode") { if (MatchRule(title)) { dt1.Rows.Add(new object[7] { id, key, rule, "Baidu", n, title, url }); id++; } } else if (mode == "DetailMode") { if (MatchRule(html)) { dt1.Rows.Add(new object[7] { id, key, rule, "Baidu", n, title, url }); id++; } } else if (mode == "SnapshotMode") { //快照模式包含标题模式和详细模式 //bool TitleMode = false; //bool DetailMode = false; //if (MatchRule(title)) //{ // dt1.Rows.Add(new object[6] { id, key, rule, n, title, url }); // id++; // TitleMode = true; //} //if (MatchRule(html) && !TitleMode==true) //{ // dt1.Rows.Add(new object[6] { id, key, rule, n, title, url }); // id++; // DetailMode = true; //} string snapshot_url = Common.MatchURL(html, "<a data-nolog href=\"", "\""); if (snapshot_url != "")//&& !TitleMode == true && !DetailMode == true { WebClient wc = new WebClient(); //byte[] pageSourceBytes = wc.DownloadData(new Uri(snapshot_url)); byte[] pageSourceBytes = null; try { pageSourceBytes = wc.DownloadData(new Uri(snapshot_url)); } catch (Exception ee) { MessageBox.Show("318行--" + ee.ToString()); url = "404"; } if (pageSourceBytes != null) { string pageSource = Encoding.GetEncoding("gb2312").GetString(pageSourceBytes); //HtmlDocument doc = new HtmlDocument(); //doc.LoadHtml(pageSource); //网址在快照body之外,不能用HtmlAgilityPack string urlstr1 = Common.MatchURL(pageSource, "<div id=\"bd_snap_note\">", "</div>"); string urlstr2 = Common.MatchURL(urlstr1, "<a href=\"", "\">"); if (urlstr2 != "") { url = urlstr2; } if (MatchRule(pageSource)) { dt1.Rows.Add(new object[7] { id, key, rule, "Baidu", n, title, url }); id++; } } Thread.Sleep(Common.RandomInt(500, 700)); } } } Thread.Sleep(Common.RandomInt(500, 700)); //获得百度搜索结果页的2-10的title和url if (bdpage != 1) { string baiduurls = Common.MatchURL(totalLines, "<p id=\"page\"", "</p>"); MatchCollection mcbaiduurls = Common.MatchURLs(baiduurls, "<a href=\"", "\">"); List <string> BDurl = new List <string>(); j = 1; foreach (Match ma in mcbaiduurls) { if (j < bdpage) { BDurl.Add("http://www.baidu.com" + ma.Value); j++; } } for (int p = 0; p < BDurl.Count; p++) { GetTitleUrl(BDurl[p], key, p + 1); } } y++; //实时更新界面元素 //Thread trd = new Thread(new ThreadStart(this.ThreadTask)); //trd.IsBackground = true; //trd.Start(); //修改图标控件 //OutDelegate outdelegate = new OutDelegate(OutText); //this.Dispatcher.BeginInvoke(outdelegate, new object[] { "xx" }); OutDelegateNew outdelegate = new OutDelegateNew(OutTextNew); this.Dispatcher.BeginInvoke(outdelegate, new object[] { key, "Baidu" }); Thread.Sleep(Common.RandomInt(500, 700)); }