Ejemplo n.º 1
0
        /// <summary>
        /// 获取百度查询结果
        /// </summary>
        /// <param name="html">网络地址</param>
        /// <returns>百度查询结果</returns>
        public  string GetSearchResultFromBaidu(string searchword)
        {
            StringBuilder sb = new StringBuilder();
            string url = "http://www.baidu.com/s?wd=" + searchword;
            try
            {
                WebClient wc = new WebClient
                {
                    Encoding = Encoding.GetEncoding("gb2312")
                };
                string html = wc.DownloadString(url.Trim());

                //string html = "";
                //System.IO.StreamReader sr = new System.IO.StreamReader("D:\\CnInterface\\CnWeb\\html\\baiducode.txt", Encoding.GetEncoding("gb2312"));
                //lock (sr)
                //{
                //    html = sr.ReadToEnd();
                //    sr.Close();
                //}

                var doc = new HtmlDocument();
                doc.LoadHtml(html);
                
                //sb.Append(" <div style=\"margin-top: 20px; margin-bottom: 20px;width:678px\"><a href=\"" + url + "\" target=\"_blank\">百度</a><span id='spanbaidu' onclick='if(document.getElementById(\"divbaidu\").style.display == \"none\"){document.getElementById(\"divbaidu\").style.display = \"block\";document.getElementById(\"spanbaidu\").innerHTML =\"<font color=#666699><b>第一页数据</b></font>&nbsp;<font  color=red>收起</font>\";}else{document.getElementById(\"divbaidu\").style.display = \"none\";document.getElementById(\"spanbaidu\").innerHTML =\"<font color=#666699><b>第一页数据</b></font>&nbsp;<font  color=green>展开</font>\";}' style=\"cursor:hand\"><font color='#666699'><b>第一页数据</b></font>&nbsp;<font  color=red>收起</font></span>");
                //sb.Append(" <div id='divbaidu' style='display:none;'>");
                for (int i = 4; i <= 13; i++)
                {
                    string nameXpath = "/html[1]/body[1]/table[" + i.ToString() + "]/tr[1]/td[1]/a[1]";
                    string urlXpath = "/html[1]/body[1]/table[" + i.ToString() + "]/tr[1]/td[1]/a[1]/@href[1]";
                    string bodyXpath = "/html[1]/body[1]/table[" + i.ToString() + "]/tr[1]/td[1]/font[1]";
                    string timeXpath = "/html[1]/body[1]/table[" + i.ToString() + "]/tr[1]/td[1]/font[1]/font[2]";

                    string nameValue = "";
                    try
                    {
                        nameValue = doc.DocumentNode.SelectSingleNode(nameXpath).InnerHtml;
                        nameValue = new HtmlToText().ConvertHtml(nameValue).Trim();
                    }
                    catch
                    {
                        try
                        {
                            nameValue = doc.DocumentNode.SelectSingleNode("/html[1]/body[1]/table[" + i.ToString() + "]/tr[1]/td[1]/font[1]/a[1]").InnerHtml;
                            nameValue = new HtmlToText().ConvertHtml(nameValue).Trim();
                        }
                        catch
                        {
                            continue;
                        }
                    }
                    string urlValue = "";
                    try
                    {
                        urlValue = doc.DocumentNode.SelectSingleNode(urlXpath).Attributes["href"].Value;
                    }
                    catch
                    {
                        try
                        {
                            urlValue = doc.DocumentNode.SelectSingleNode("/html[1]/body[1]/table[" + i.ToString() + "]/tr[1]/td[1]/font[1]/a[1]/@href[1]").Attributes["href"].Value;
                        }
                        catch
                        {
                            continue;
                        }
                    }
                    string bodyValue = "";
                    try
                    {
                        bodyValue = doc.DocumentNode.SelectSingleNode(bodyXpath).InnerHtml;
                        bodyValue = new HtmlToText().ConvertHtml(bodyValue).Trim();
                        if (bodyValue.Length < 20)
                        {
                            throw new Exception();
                        }
                    }
                    catch
                    {
                        try
                        {
                            bodyValue = doc.DocumentNode.SelectSingleNode("/html[1]/body[1]/table[" + i.ToString() + "]/tr[2]/td[1]/p[1]").InnerHtml;
                        }
                        catch
                        {
                            bodyValue = "";
                        }
                    }
                    string timeValue = "";
                    try
                    {
                        timeValue = doc.DocumentNode.SelectSingleNode(timeXpath).InnerHtml;
                        timeValue = timeValue.Substring(timeValue.LastIndexOf("20"));
                    }
                    catch
                    {
                        try
                        {
                            timeValue = doc.DocumentNode.SelectSingleNode("/html[1]/body[1]/table[" + i.ToString() + "]/tr[1]/td[1]/font[1]/font[1]").InnerHtml;
                            timeValue = timeValue.Substring(timeValue.LastIndexOf("20"));
                        }
                        catch
                        {
                            timeValue = DateTime.Now.ToString("yyyy-MM-dd");
                        }
                    }
                    try
                    {
                        DateTime dt = Convert.ToDateTime(timeValue);
                    }
                    catch
                    {
                        timeValue = DateTime.Now.ToString("yyyy-MM-dd");
                    }
                    sb.Append(" <div style=\"margin-top: 20px; margin-bottom: 20px;\">");
                    sb.Append(" <div>");
                    sb.Append(" <a target=\"_blank\"  href=\"" + urlValue + "\"><font color=blue>" + nameValue.Replace(System.Web.HttpUtility.UrlDecode(searchword, Encoding.GetEncoding("gb2312")), "<font color=red>" + System.Web.HttpUtility.UrlDecode(searchword, Encoding.GetEncoding("gb2312")) + "</font>") + "</font></a>");
                    sb.Append(" </div>");
                    sb.Append(" <div style=\"font-size: 13px;\">");
                    sb.Append(" <font size=2 >" + bodyValue.Replace(System.Web.HttpUtility.UrlDecode(searchword, Encoding.GetEncoding("gb2312")), "<font color=red>" + System.Web.HttpUtility.UrlDecode(searchword, Encoding.GetEncoding("gb2312")) + "</font>") + "</font>");
                    sb.Append(" </div>");
                    sb.Append(" <div>");
                    sb.Append(" <font color=#006600>" + (urlValue.Length > 66 ? urlValue.Substring(0, 66)+"..." : urlValue) + "</font>&nbsp;&nbsp;<font size=2 color=#006600>" + timeValue + "</font>&nbsp;&nbsp;<a target=\"_blank\" href=\"" + url + "\"><font size=2  color=#666699>百度搜索</font></a>");
                    sb.Append("  </div>");
                    sb.Append(" </div>");
                    System.Threading.Thread.Sleep(200);
                }
                //sb.Append("  </div>");
                //sb.Append(" </div>");
            }
            catch (Exception ex)
            {
                sb.Append(ex.Message+"&nbsp;<a href=\""+url+"\" target=\"_blank\"><font color=blue>此处查看</font></a>");
            }
            if (sb.ToString().Trim().Equals(""))
                sb.Append("该网站相关Xpath出现改动,请根据改动及时修正匹配Xpath。" + "&nbsp;<a href=\"" + url + "\" target=\"_blank\"><font color=blue>此处查看</font></a>");
            return sb.ToString();
        }
Ejemplo n.º 2
0
        /// <summary>
        /// 获取有道查询结果
        /// </summary>
        /// <param name="html">网络地址</param>
        /// <returns>有道查询结果</returns>
        public string GetSearchResultFromYoudao(string searchword)
        {
            StringBuilder sb = new StringBuilder();
            string url = "http://www.youdao.com/search?q=" + searchword + "&ue=utf8&keyfrom=web.index";
            try
            {
                WebClient wc = new WebClient
                {
                    Encoding = Encoding.GetEncoding("utf-8")
                };
                string html = wc.DownloadString(url.Trim());

                //string html = "";
                //System.IO.StreamReader sr = new System.IO.StreamReader("D:\\CnInterface\\CnWeb\\html\\youdaocode.txt", Encoding.GetEncoding("gb2312"));
                //lock (sr)
                //{
                //    html = sr.ReadToEnd();
                //    sr.Close();
                //}

                var doc = new HtmlDocument();
                doc.LoadHtml(html);
                string nameValue = "";
                string urlValue = "";
                string bodyValue = "";
                string timeValue = "";

                //sb.Append(" <div style=\"margin-top: 20px; margin-bottom: 20px;width:678px\"><a href=\"" + url + "\" target=\"_blank\">有道</a><span id='spanyoudao' onclick='if(document.getElementById(\"divyoudao\").style.display == \"none\"){document.getElementById(\"divyoudao\").style.display = \"block\";document.getElementById(\"spanyoudao\").innerHTML =\"<font color=#666699><b>第一页数据</b></font>&nbsp;<font  color=red>收起</font>\";}else{document.getElementById(\"divyoudao\").style.display = \"none\";document.getElementById(\"spanyoudao\").innerHTML =\"<font color=#666699><b>第一页数据</b></font>&nbsp;<font  color=green>展开</font>\";}' style=\"cursor:hand\"><font color='#666699'><b>第一页数据</b></font>&nbsp;<font  color=green>展开</font></span>");
                //sb.Append(" <div id='divyoudao' style='display:none;'>");
                for (int i = 1; i <= 10; i++)
                {
                    try
                    {
                        string nameXpath = "/body[1]/div[6]/div[2]/ul[1]/li[" + i.ToString() + "]/h3[1]/a[1]";
                        string urlXpath = "/body[1]/div[6]/div[2]/ul[1]/li[" + i.ToString() + "]/h3[1]/a[1]/@href[1]";
                        string bodyXpath = "/body[1]/div[6]/div[2]/ul[1]/li[" + i.ToString() + "]/div[1]";
                        string timeXpath = "/body[1]/div[6]/div[2]/ul[1]/li[" + i.ToString() + "]/p[1]/span[1]";

                        nameValue = doc.DocumentNode.SelectSingleNode(nameXpath).InnerText;
                        nameValue = new HtmlToText().ConvertHtml(nameValue).Trim();
                        urlValue = doc.DocumentNode.SelectSingleNode(urlXpath).Attributes["href"].Value;
                        try
                        {
                            bodyValue = doc.DocumentNode.SelectSingleNode(bodyXpath).InnerHtml;
                            bodyValue = new HtmlToText().ConvertHtml(bodyValue).Trim();
                        }
                        catch
                        {
                            bodyValue = "";
                        }
                        try
                        {
                            timeValue = doc.DocumentNode.SelectSingleNode(timeXpath).InnerText;
                            timeValue = timeValue.Substring(timeValue.LastIndexOf("20")).Replace("-->", "");
                        }
                        catch
                        {
                            timeValue = "";
                        }

                        sb.Append(" <div style=\"margin-top: 20px; margin-bottom: 20px;\">");
                        sb.Append(" <div>");
                        sb.Append(" <a target=\"_blank\"  href=\"" + urlValue + "\"><font color=blue>" + nameValue.Replace(System.Web.HttpUtility.UrlDecode(searchword, Encoding.UTF8), "<font color=red>" + System.Web.HttpUtility.UrlDecode(searchword, Encoding.UTF8) + "</font>") + "</font></a>");
                        sb.Append(" </div>");
                        sb.Append(" <div style=\"font-size: 13px;\">");
                        sb.Append(" <font size=2 >" + bodyValue.Replace(System.Web.HttpUtility.UrlDecode(searchword, Encoding.UTF8), "<font color=red>" + System.Web.HttpUtility.UrlDecode(searchword, Encoding.UTF8) + "</font>") + "</font>");
                        sb.Append(" </div>");
                        sb.Append(" <div>");
                        sb.Append(" <font color=#006600>" + (urlValue.Length > 66 ? urlValue.Substring(0, 66)+"..." : urlValue) + "</font>&nbsp;&nbsp;<font size=2 color=#006600>" + timeValue + "</font>&nbsp;&nbsp;<a target=\"_blank\" href=\"" + url + "\"><font size=2  color=#666699>有道搜索</font></a>");
                        sb.Append("  </div>");
                        sb.Append(" </div>");
                    }
                    catch
                    {
                        continue;
                    }

                    System.Threading.Thread.Sleep(200);
                }
                //sb.Append("  </div>");
                //sb.Append(" </div>");
            }
            catch (Exception ex)
            {
                sb.Append(ex.Message + "&nbsp;<a href=\"" + url + "\" target=\"_blank\"><font color=blue>此处查看</font></a>");
            }
            if (sb.ToString().Trim().Equals(""))
                sb.Append("该网站相关Xpath出现改动,请根据改动及时修正匹配Xpath。" + "&nbsp;<a href=\"" + url + "\" target=\"_blank\"><font color=blue>此处查看</font></a>");
            return sb.ToString();
        }
Ejemplo n.º 3
0
        /// <summary>
        /// 获取Google查询结果
        /// </summary>
        /// <param name="html">网络地址</param>
        /// <returns>Google查询结果</returns>
        public string GetSearchResultFromGoogle(string searchword)
        {
            StringBuilder sb = new StringBuilder();
            string url = "http://www.google.com.hk/search?hl=zh-CN&source=hp&q=" + searchword + "&meta=&aq=f&aqi=g10&aql=&oq=&gs_rfai=";
            try
            {
                WebClient wc = new WebClient
                {
                    Encoding = Encoding.GetEncoding("gb2312")
                };
                string html = wc.DownloadString(url.Trim());

                //string html = "";
                //System.IO.StreamReader sr = new System.IO.StreamReader("D:\\CnInterface\\CnWeb\\html\\googlecode.txt", Encoding.GetEncoding("utf-8"));
                //lock (sr)
                //{
                //    html = sr.ReadToEnd();
                //    sr.Close();
                //}

                var doc = new HtmlDocument();
                doc.LoadHtml(html);
                string nameValue = "";
                string urlValue = "";
                string bodyValue = "";
                string timeValue = DateTime.Now.ToString("yyyy-MM-dd");

                //sb.Append(" <div style=\"margin-top: 20px; margin-bottom: 20px;width:678px\"><a href=\"" + url + "\" target=\"_blank\">谷歌</a><span id='spangoogle' onclick='if(document.getElementById(\"divgoogle\").style.display == \"none\"){document.getElementById(\"divgoogle\").style.display = \"block\";document.getElementById(\"spangoogle\").innerHTML =\"<font color=#666699><b>第一页数据</b></font>&nbsp;<font  color=red>收起</font>\";}else{document.getElementById(\"divgoogle\").style.display = \"none\";document.getElementById(\"spangoogle\").innerHTML =\"<font color=#666699><b>第一页数据</b></font>&nbsp;<font  color=green>展开</font>\";}' style=\"cursor:hand\"><font color='#666699'><b>第一页数据</b></font>&nbsp;<font  color=green>展开</font></span>");
                //sb.Append(" <div id='divgoogle' style='display:none;'>");
                string listr = "/li[1]";
                for (int i = 0; i <= 10; i++)
                {
                    try
                    {
                        string nameXpath = "/body[1]/div[5]/div[3]/div[1]/ol[1]" + listr + "/h3[1]/a[1]";
                        string urlXpath = "/body[1]/div[5]/div[3]/div[1]/ol[1]" + listr + "/h3[1]/a[1]/@href[1]";
                        string bodyXpath = "/body[1]/div[5]/div[3]/div[1]/ol[1]" + listr + "/div[1]";

                        try
                        {
                            nameValue = doc.DocumentNode.SelectSingleNode(nameXpath).InnerText;
                        }
                        catch
                        {
                            nameValue = doc.DocumentNode.SelectSingleNode("/body[1]/div[5]/div[4]/div[1]/ol[1]" + listr + "/h3[1]/a[1]").InnerText;
                        }

                        try
                        {
                            urlValue = doc.DocumentNode.SelectSingleNode(urlXpath).Attributes["href"].Value;
                        }
                        catch
                        {
                            urlValue = doc.DocumentNode.SelectSingleNode("/body[1]/div[5]/div[4]/div[1]/ol[1]" + listr + "/h3[1]/a[1]/@href[1]").Attributes["href"].Value;
                        }
                        try
                        {
                            bodyValue = doc.DocumentNode.SelectSingleNode(bodyXpath).InnerHtml;
                            bodyValue = new HtmlToText().ConvertHtml(bodyValue).Trim();
                        }
                        catch
                        {
                            try
                            {
                                bodyValue = doc.DocumentNode.SelectSingleNode("/body[1]/div[5]/div[3]/div[1]/ol[1]"+ listr +"/table[1]").InnerHtml;
                            }
                            catch
                            {
                                try
                                {                                                                                 
                                    bodyValue = doc.DocumentNode.SelectSingleNode("/body[1]/div[5]/div[4]/div[1]/ol[1]" + listr + "/div[1]").InnerHtml;
                                }
                                catch
                                {
                                    bodyValue = "";
                                }
                            }
                        }

                        sb.Append(" <div style=\"margin-top: 20px; margin-bottom: 20px;\">");
                        sb.Append(" <div>");
                        sb.Append(" <a target=\"_blank\"  href=\"" + urlValue + "\"><font color=blue>" + nameValue.Replace(System.Web.HttpUtility.UrlDecode(searchword, Encoding.UTF8), "<font color=red>" + System.Web.HttpUtility.UrlDecode(searchword, Encoding.UTF8) + "</font>") + "</font></a>");
                        sb.Append(" </div>");
                        sb.Append(" <div style=\"font-size: 13px;\">");
                        sb.Append(" <font size=2 >" + bodyValue.Replace(System.Web.HttpUtility.UrlDecode(searchword, Encoding.UTF8), "<font color=red>" + System.Web.HttpUtility.UrlDecode(searchword, Encoding.UTF8) + "</font>") + "</font>");
                        sb.Append(" </div>");
                        sb.Append(" <div>");
                        sb.Append(" <font color=#006600>" + (urlValue.Length > 66 ? urlValue.Substring(0, 66)+"..." : urlValue) + "</font>&nbsp;&nbsp;<font size=2 color=#006600>" + timeValue + "</font>&nbsp;&nbsp;<a target=\"_blank\" href=\"" + url + "\"><font size=2  color=#666699>Google搜索</font></a>");
                        sb.Append("  </div>");
                        sb.Append(" </div>");
                    }
                    catch
                    {
                        listr += "/li[1]";
                        continue;
                    }

                    listr += "/li[1]";

                    System.Threading.Thread.Sleep(200);
                }
                //sb.Append("  </div>");
                //sb.Append(" </div>");
            }
            catch (Exception ex)
            {
                sb.Append(ex.Message + "&nbsp;<a href=\"" + url + "\" target=\"_blank\"><font color=blue>此处查看</font></a>");
            }
            if (sb.ToString().Trim().Equals(""))
                sb.Append("该网站相关Xpath出现改动,请根据改动及时修正匹配Xpath。" + "&nbsp;<a href=\"" + url + "\" target=\"_blank\"><font color=blue>此处查看</font></a>");
            return sb.ToString();
        }