Example #1
0
        /// <summary>
        /// 通过URL获取接口对象.
        /// </summary>
        /// <param name="wc">WebClient 对象.</param>
        /// <param name="url">接口URL.</param>
        /// <param name="cnnamexp">接口名称(XPath).</param>
        /// <param name="cnnameRgx">接口名称正则表达式(Regex)</param>
        /// <param name="cnbodyXp">接口内容(XPath).</param>
        /// <param name="cnbodyRgx">接口内容正则表达式(Regex).</param>
        /// <param name="cntimeXp">接口发布时间(XPath).</param>
        /// <param name="cntimeRgx">接口发布时间正则表达式(Regex).</param>
        /// <param name="cntimeF">接口发布时间Format.</param>
        /// <param name="cnsource">接口网络来源.</param>
        /// <param name="cnsourceurl">接口网络来源地址.</param>
        /// <returns>接口对象.</returns>
        public static CnInterfaceInfo GetCnInterfaceInfoEntity(WebClient wc, 
                                                                                              string url, 
                                                                                              string cnnamexp,
                                                                                              string cnnameRgx,
                                                                                              string cnbodyXp,
                                                                                              string cnbodyRgx, 
                                                                                              string cntimeXp,
                                                                                              string cntimeRgx,
                                                                                              string cntimeF,
                                                                                              string cnsource,
                                                                                              string cnsourceurl,
                                                                                              int      cntypeid)
        {
            var html = wc.DownloadString(url);
            var doc = new HtmlDocument();
            doc.LoadHtml(html);
            //取接口名称
            var title = doc.DocumentNode.SelectSingleNode(cnnamexp).InnerHtml;
            if (cnnameRgx != null && !cnnameRgx.Equals(""))
                title = new Regex(cnnameRgx).Match(title).Groups[1].Value;
            //取接口发布时间
            var ptStr = doc.DocumentNode.SelectSingleNode(cntimeXp).InnerHtml;
            if (cntimeRgx != null && !cntimeRgx.Equals(""))
                ptStr = new Regex(cntimeRgx).Match(ptStr).Groups[1].Value;
            //避免与系统format定义冲突
            /*ptStr = ptStr.Replace("s", "S");
            var pt = DateTime.ParseExact(ptStr, cntimeF, CultureInfo.CurrentCulture);*/
            //取接口内容
            var body = doc.DocumentNode.SelectSingleNode(cnbodyXp).InnerHtml;
            if (cnbodyRgx !=null  &&  !cnbodyRgx.Equals(""))
                title = new Regex(cnbodyRgx).Match(body).Groups[1].Value;
            //去除html标记
            body = new HtmlToText().ConvertHtml(body).Trim();

            return new CnInterfaceInfo
            {
                CnInterfaceTypeID = cntypeid,
                CnInterfaceName = title,
                CnInterfacebody = body,
                CnInterfaceUrl = url,
                CnInterfaceSource = cnsource,
                CnInterfaceSourceUrl = cnsourceurl,
                CnInterfaceAppearTime = Convert.ToDateTime(ptStr)
            };
        }
Example #2
0
        /// <summary>
        /// 获取有道查询结果
        /// </summary>
        /// <param name="html">网络地址</param>
        /// <returns>有道查询结果</returns>
        public string GetSearchResultFromYoudao(string searchword)
        {
            StringBuilder sb = new StringBuilder();
            string url = "http://www.youdao.com/search?q=" + searchword + "&ue=utf8&keyfrom=web.index";
            try
            {
                WebClient wc = new WebClient
                {
                    Encoding = Encoding.GetEncoding("utf-8")
                };
                string html = wc.DownloadString(url.Trim());

                //string html = "";
                //System.IO.StreamReader sr = new System.IO.StreamReader("D:\\CnInterface\\CnWeb\\html\\youdaocode.txt", Encoding.GetEncoding("gb2312"));
                //lock (sr)
                //{
                //    html = sr.ReadToEnd();
                //    sr.Close();
                //}

                var doc = new HtmlDocument();
                doc.LoadHtml(html);
                string nameValue = "";
                string urlValue = "";
                string bodyValue = "";
                string timeValue = "";

                //sb.Append(" <div style=\"margin-top: 20px; margin-bottom: 20px;width:678px\"><a href=\"" + url + "\" target=\"_blank\">有道</a><span id='spanyoudao' onclick='if(document.getElementById(\"divyoudao\").style.display == \"none\"){document.getElementById(\"divyoudao\").style.display = \"block\";document.getElementById(\"spanyoudao\").innerHTML =\"<font color=#666699><b>第一页数据</b></font>&nbsp;<font  color=red>收起</font>\";}else{document.getElementById(\"divyoudao\").style.display = \"none\";document.getElementById(\"spanyoudao\").innerHTML =\"<font color=#666699><b>第一页数据</b></font>&nbsp;<font  color=green>展开</font>\";}' style=\"cursor:hand\"><font color='#666699'><b>第一页数据</b></font>&nbsp;<font  color=green>展开</font></span>");
                //sb.Append(" <div id='divyoudao' style='display:none;'>");
                for (int i = 1; i <= 10; i++)
                {
                    try
                    {
                        string nameXpath = "/body[1]/div[6]/div[2]/ul[1]/li[" + i.ToString() + "]/h3[1]/a[1]";
                        string urlXpath = "/body[1]/div[6]/div[2]/ul[1]/li[" + i.ToString() + "]/h3[1]/a[1]/@href[1]";
                        string bodyXpath = "/body[1]/div[6]/div[2]/ul[1]/li[" + i.ToString() + "]/div[1]";
                        string timeXpath = "/body[1]/div[6]/div[2]/ul[1]/li[" + i.ToString() + "]/p[1]/span[1]";

                        nameValue = doc.DocumentNode.SelectSingleNode(nameXpath).InnerText;
                        nameValue = new HtmlToText().ConvertHtml(nameValue).Trim();
                        urlValue = doc.DocumentNode.SelectSingleNode(urlXpath).Attributes["href"].Value;
                        try
                        {
                            bodyValue = doc.DocumentNode.SelectSingleNode(bodyXpath).InnerHtml;
                            bodyValue = new HtmlToText().ConvertHtml(bodyValue).Trim();
                        }
                        catch
                        {
                            bodyValue = "";
                        }
                        try
                        {
                            timeValue = doc.DocumentNode.SelectSingleNode(timeXpath).InnerText;
                            timeValue = timeValue.Substring(timeValue.LastIndexOf("20")).Replace("-->", "");
                        }
                        catch
                        {
                            timeValue = "";
                        }

                        sb.Append(" <div style=\"margin-top: 20px; margin-bottom: 20px;\">");
                        sb.Append(" <div>");
                        sb.Append(" <a target=\"_blank\"  href=\"" + urlValue + "\"><font color=blue>" + nameValue.Replace(System.Web.HttpUtility.UrlDecode(searchword, Encoding.UTF8), "<font color=red>" + System.Web.HttpUtility.UrlDecode(searchword, Encoding.UTF8) + "</font>") + "</font></a>");
                        sb.Append(" </div>");
                        sb.Append(" <div style=\"font-size: 13px;\">");
                        sb.Append(" <font size=2 >" + bodyValue.Replace(System.Web.HttpUtility.UrlDecode(searchword, Encoding.UTF8), "<font color=red>" + System.Web.HttpUtility.UrlDecode(searchword, Encoding.UTF8) + "</font>") + "</font>");
                        sb.Append(" </div>");
                        sb.Append(" <div>");
                        sb.Append(" <font color=#006600>" + (urlValue.Length > 66 ? urlValue.Substring(0, 66)+"..." : urlValue) + "</font>&nbsp;&nbsp;<font size=2 color=#006600>" + timeValue + "</font>&nbsp;&nbsp;<a target=\"_blank\" href=\"" + url + "\"><font size=2  color=#666699>有道搜索</font></a>");
                        sb.Append("  </div>");
                        sb.Append(" </div>");
                    }
                    catch
                    {
                        continue;
                    }

                    System.Threading.Thread.Sleep(200);
                }
                //sb.Append("  </div>");
                //sb.Append(" </div>");
            }
            catch (Exception ex)
            {
                sb.Append(ex.Message + "&nbsp;<a href=\"" + url + "\" target=\"_blank\"><font color=blue>此处查看</font></a>");
            }
            if (sb.ToString().Trim().Equals(""))
                sb.Append("该网站相关Xpath出现改动,请根据改动及时修正匹配Xpath。" + "&nbsp;<a href=\"" + url + "\" target=\"_blank\"><font color=blue>此处查看</font></a>");
            return sb.ToString();
        }
Example #3
0
        /// <summary>
        /// 获取百度查询结果
        /// </summary>
        /// <param name="html">网络地址</param>
        /// <returns>百度查询结果</returns>
        public  string GetSearchResultFromBaidu(string searchword)
        {
            StringBuilder sb = new StringBuilder();
            string url = "http://www.baidu.com/s?wd=" + searchword;
            try
            {
                WebClient wc = new WebClient
                {
                    Encoding = Encoding.GetEncoding("gb2312")
                };
                string html = wc.DownloadString(url.Trim());

                //string html = "";
                //System.IO.StreamReader sr = new System.IO.StreamReader("D:\\CnInterface\\CnWeb\\html\\baiducode.txt", Encoding.GetEncoding("gb2312"));
                //lock (sr)
                //{
                //    html = sr.ReadToEnd();
                //    sr.Close();
                //}

                var doc = new HtmlDocument();
                doc.LoadHtml(html);
                
                //sb.Append(" <div style=\"margin-top: 20px; margin-bottom: 20px;width:678px\"><a href=\"" + url + "\" target=\"_blank\">百度</a><span id='spanbaidu' onclick='if(document.getElementById(\"divbaidu\").style.display == \"none\"){document.getElementById(\"divbaidu\").style.display = \"block\";document.getElementById(\"spanbaidu\").innerHTML =\"<font color=#666699><b>第一页数据</b></font>&nbsp;<font  color=red>收起</font>\";}else{document.getElementById(\"divbaidu\").style.display = \"none\";document.getElementById(\"spanbaidu\").innerHTML =\"<font color=#666699><b>第一页数据</b></font>&nbsp;<font  color=green>展开</font>\";}' style=\"cursor:hand\"><font color='#666699'><b>第一页数据</b></font>&nbsp;<font  color=red>收起</font></span>");
                //sb.Append(" <div id='divbaidu' style='display:none;'>");
                for (int i = 4; i <= 13; i++)
                {
                    string nameXpath = "/html[1]/body[1]/table[" + i.ToString() + "]/tr[1]/td[1]/a[1]";
                    string urlXpath = "/html[1]/body[1]/table[" + i.ToString() + "]/tr[1]/td[1]/a[1]/@href[1]";
                    string bodyXpath = "/html[1]/body[1]/table[" + i.ToString() + "]/tr[1]/td[1]/font[1]";
                    string timeXpath = "/html[1]/body[1]/table[" + i.ToString() + "]/tr[1]/td[1]/font[1]/font[2]";

                    string nameValue = "";
                    try
                    {
                        nameValue = doc.DocumentNode.SelectSingleNode(nameXpath).InnerHtml;
                        nameValue = new HtmlToText().ConvertHtml(nameValue).Trim();
                    }
                    catch
                    {
                        try
                        {
                            nameValue = doc.DocumentNode.SelectSingleNode("/html[1]/body[1]/table[" + i.ToString() + "]/tr[1]/td[1]/font[1]/a[1]").InnerHtml;
                            nameValue = new HtmlToText().ConvertHtml(nameValue).Trim();
                        }
                        catch
                        {
                            continue;
                        }
                    }
                    string urlValue = "";
                    try
                    {
                        urlValue = doc.DocumentNode.SelectSingleNode(urlXpath).Attributes["href"].Value;
                    }
                    catch
                    {
                        try
                        {
                            urlValue = doc.DocumentNode.SelectSingleNode("/html[1]/body[1]/table[" + i.ToString() + "]/tr[1]/td[1]/font[1]/a[1]/@href[1]").Attributes["href"].Value;
                        }
                        catch
                        {
                            continue;
                        }
                    }
                    string bodyValue = "";
                    try
                    {
                        bodyValue = doc.DocumentNode.SelectSingleNode(bodyXpath).InnerHtml;
                        bodyValue = new HtmlToText().ConvertHtml(bodyValue).Trim();
                        if (bodyValue.Length < 20)
                        {
                            throw new Exception();
                        }
                    }
                    catch
                    {
                        try
                        {
                            bodyValue = doc.DocumentNode.SelectSingleNode("/html[1]/body[1]/table[" + i.ToString() + "]/tr[2]/td[1]/p[1]").InnerHtml;
                        }
                        catch
                        {
                            bodyValue = "";
                        }
                    }
                    string timeValue = "";
                    try
                    {
                        timeValue = doc.DocumentNode.SelectSingleNode(timeXpath).InnerHtml;
                        timeValue = timeValue.Substring(timeValue.LastIndexOf("20"));
                    }
                    catch
                    {
                        try
                        {
                            timeValue = doc.DocumentNode.SelectSingleNode("/html[1]/body[1]/table[" + i.ToString() + "]/tr[1]/td[1]/font[1]/font[1]").InnerHtml;
                            timeValue = timeValue.Substring(timeValue.LastIndexOf("20"));
                        }
                        catch
                        {
                            timeValue = DateTime.Now.ToString("yyyy-MM-dd");
                        }
                    }
                    try
                    {
                        DateTime dt = Convert.ToDateTime(timeValue);
                    }
                    catch
                    {
                        timeValue = DateTime.Now.ToString("yyyy-MM-dd");
                    }
                    sb.Append(" <div style=\"margin-top: 20px; margin-bottom: 20px;\">");
                    sb.Append(" <div>");
                    sb.Append(" <a target=\"_blank\"  href=\"" + urlValue + "\"><font color=blue>" + nameValue.Replace(System.Web.HttpUtility.UrlDecode(searchword, Encoding.GetEncoding("gb2312")), "<font color=red>" + System.Web.HttpUtility.UrlDecode(searchword, Encoding.GetEncoding("gb2312")) + "</font>") + "</font></a>");
                    sb.Append(" </div>");
                    sb.Append(" <div style=\"font-size: 13px;\">");
                    sb.Append(" <font size=2 >" + bodyValue.Replace(System.Web.HttpUtility.UrlDecode(searchword, Encoding.GetEncoding("gb2312")), "<font color=red>" + System.Web.HttpUtility.UrlDecode(searchword, Encoding.GetEncoding("gb2312")) + "</font>") + "</font>");
                    sb.Append(" </div>");
                    sb.Append(" <div>");
                    sb.Append(" <font color=#006600>" + (urlValue.Length > 66 ? urlValue.Substring(0, 66)+"..." : urlValue) + "</font>&nbsp;&nbsp;<font size=2 color=#006600>" + timeValue + "</font>&nbsp;&nbsp;<a target=\"_blank\" href=\"" + url + "\"><font size=2  color=#666699>百度搜索</font></a>");
                    sb.Append("  </div>");
                    sb.Append(" </div>");
                    System.Threading.Thread.Sleep(200);
                }
                //sb.Append("  </div>");
                //sb.Append(" </div>");
            }
            catch (Exception ex)
            {
                sb.Append(ex.Message+"&nbsp;<a href=\""+url+"\" target=\"_blank\"><font color=blue>此处查看</font></a>");
            }
            if (sb.ToString().Trim().Equals(""))
                sb.Append("该网站相关Xpath出现改动,请根据改动及时修正匹配Xpath。" + "&nbsp;<a href=\"" + url + "\" target=\"_blank\"><font color=blue>此处查看</font></a>");
            return sb.ToString();
        }
Example #4
0
        /// <summary>
        /// 获取Google查询结果
        /// </summary>
        /// <param name="html">网络地址</param>
        /// <returns>Google查询结果</returns>
        public string GetSearchResultFromGoogle(string searchword)
        {
            StringBuilder sb = new StringBuilder();
            string url = "http://www.google.com.hk/search?hl=zh-CN&source=hp&q=" + searchword + "&meta=&aq=f&aqi=g10&aql=&oq=&gs_rfai=";
            try
            {
                WebClient wc = new WebClient
                {
                    Encoding = Encoding.GetEncoding("gb2312")
                };
                string html = wc.DownloadString(url.Trim());

                //string html = "";
                //System.IO.StreamReader sr = new System.IO.StreamReader("D:\\CnInterface\\CnWeb\\html\\googlecode.txt", Encoding.GetEncoding("utf-8"));
                //lock (sr)
                //{
                //    html = sr.ReadToEnd();
                //    sr.Close();
                //}

                var doc = new HtmlDocument();
                doc.LoadHtml(html);
                string nameValue = "";
                string urlValue = "";
                string bodyValue = "";
                string timeValue = DateTime.Now.ToString("yyyy-MM-dd");

                //sb.Append(" <div style=\"margin-top: 20px; margin-bottom: 20px;width:678px\"><a href=\"" + url + "\" target=\"_blank\">谷歌</a><span id='spangoogle' onclick='if(document.getElementById(\"divgoogle\").style.display == \"none\"){document.getElementById(\"divgoogle\").style.display = \"block\";document.getElementById(\"spangoogle\").innerHTML =\"<font color=#666699><b>第一页数据</b></font>&nbsp;<font  color=red>收起</font>\";}else{document.getElementById(\"divgoogle\").style.display = \"none\";document.getElementById(\"spangoogle\").innerHTML =\"<font color=#666699><b>第一页数据</b></font>&nbsp;<font  color=green>展开</font>\";}' style=\"cursor:hand\"><font color='#666699'><b>第一页数据</b></font>&nbsp;<font  color=green>展开</font></span>");
                //sb.Append(" <div id='divgoogle' style='display:none;'>");
                string listr = "/li[1]";
                for (int i = 0; i <= 10; i++)
                {
                    try
                    {
                        string nameXpath = "/body[1]/div[5]/div[3]/div[1]/ol[1]" + listr + "/h3[1]/a[1]";
                        string urlXpath = "/body[1]/div[5]/div[3]/div[1]/ol[1]" + listr + "/h3[1]/a[1]/@href[1]";
                        string bodyXpath = "/body[1]/div[5]/div[3]/div[1]/ol[1]" + listr + "/div[1]";

                        try
                        {
                            nameValue = doc.DocumentNode.SelectSingleNode(nameXpath).InnerText;
                        }
                        catch
                        {
                            nameValue = doc.DocumentNode.SelectSingleNode("/body[1]/div[5]/div[4]/div[1]/ol[1]" + listr + "/h3[1]/a[1]").InnerText;
                        }

                        try
                        {
                            urlValue = doc.DocumentNode.SelectSingleNode(urlXpath).Attributes["href"].Value;
                        }
                        catch
                        {
                            urlValue = doc.DocumentNode.SelectSingleNode("/body[1]/div[5]/div[4]/div[1]/ol[1]" + listr + "/h3[1]/a[1]/@href[1]").Attributes["href"].Value;
                        }
                        try
                        {
                            bodyValue = doc.DocumentNode.SelectSingleNode(bodyXpath).InnerHtml;
                            bodyValue = new HtmlToText().ConvertHtml(bodyValue).Trim();
                        }
                        catch
                        {
                            try
                            {
                                bodyValue = doc.DocumentNode.SelectSingleNode("/body[1]/div[5]/div[3]/div[1]/ol[1]"+ listr +"/table[1]").InnerHtml;
                            }
                            catch
                            {
                                try
                                {                                                                                 
                                    bodyValue = doc.DocumentNode.SelectSingleNode("/body[1]/div[5]/div[4]/div[1]/ol[1]" + listr + "/div[1]").InnerHtml;
                                }
                                catch
                                {
                                    bodyValue = "";
                                }
                            }
                        }

                        sb.Append(" <div style=\"margin-top: 20px; margin-bottom: 20px;\">");
                        sb.Append(" <div>");
                        sb.Append(" <a target=\"_blank\"  href=\"" + urlValue + "\"><font color=blue>" + nameValue.Replace(System.Web.HttpUtility.UrlDecode(searchword, Encoding.UTF8), "<font color=red>" + System.Web.HttpUtility.UrlDecode(searchword, Encoding.UTF8) + "</font>") + "</font></a>");
                        sb.Append(" </div>");
                        sb.Append(" <div style=\"font-size: 13px;\">");
                        sb.Append(" <font size=2 >" + bodyValue.Replace(System.Web.HttpUtility.UrlDecode(searchword, Encoding.UTF8), "<font color=red>" + System.Web.HttpUtility.UrlDecode(searchword, Encoding.UTF8) + "</font>") + "</font>");
                        sb.Append(" </div>");
                        sb.Append(" <div>");
                        sb.Append(" <font color=#006600>" + (urlValue.Length > 66 ? urlValue.Substring(0, 66)+"..." : urlValue) + "</font>&nbsp;&nbsp;<font size=2 color=#006600>" + timeValue + "</font>&nbsp;&nbsp;<a target=\"_blank\" href=\"" + url + "\"><font size=2  color=#666699>Google搜索</font></a>");
                        sb.Append("  </div>");
                        sb.Append(" </div>");
                    }
                    catch
                    {
                        listr += "/li[1]";
                        continue;
                    }

                    listr += "/li[1]";

                    System.Threading.Thread.Sleep(200);
                }
                //sb.Append("  </div>");
                //sb.Append(" </div>");
            }
            catch (Exception ex)
            {
                sb.Append(ex.Message + "&nbsp;<a href=\"" + url + "\" target=\"_blank\"><font color=blue>此处查看</font></a>");
            }
            if (sb.ToString().Trim().Equals(""))
                sb.Append("该网站相关Xpath出现改动,请根据改动及时修正匹配Xpath。" + "&nbsp;<a href=\"" + url + "\" target=\"_blank\"><font color=blue>此处查看</font></a>");
            return sb.ToString();
        }
        public string GetWebImagesXml(CnDataAcess.Entity.CnInterfaceVideoXpath cnXpath, string title, string alt, string src, string hrefpath, string srcpath, string htmlname,int bid,out int id)
        {
            WebClient wc = new WebClient
            {
                Encoding = Encoding.GetEncoding(cnXpath.CnWebEncode)
            };
            string html = wc.DownloadString(cnXpath.CnWebUrl);

            //string html = "";
            //System.IO.StreamReader sr = new System.IO.StreamReader("D:\\CnInterface\\CnWeb\\html\\buildhtml.txt", Encoding.GetEncoding("gb2312"));
            //lock (sr)
            //{
            //    html = sr.ReadToEnd();
            //    sr.Close();
            //}
            HtmlDocument doc = new HtmlDocument();
            doc.LoadHtml(html);
            string title001 = "";
            string title002 = "";
            string title003 = "";
            string title004 = "";
            string title005 = "";
            string title006 = "";
            if (!title.Equals("") && alt.Equals(""))
            {
                title001 = doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoTitle001).Attributes["title"].Value;
                title002 = doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoTitle002).Attributes["title"].Value;
                title003 = doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoTitle003).Attributes["title"].Value;
                title004 = doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoTitle004).Attributes["title"].Value;
                title005 = doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoTitle005).Attributes["title"].Value;
                title006 = cnXpath.CnVideoTitle006.Equals("") ? cnXpath.CnVideoTitle006 : doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoTitle006).Attributes["title"].Value;
            }
            else if (title.Equals("") && !alt.Equals(""))
            {
                title001 = doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoTitle001).Attributes["alt"].Value;
                title002 = doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoTitle002).Attributes["alt"].Value;
                title003 = doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoTitle003).Attributes["alt"].Value;
                title004 = doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoTitle004).Attributes["alt"].Value;
                title005 = doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoTitle005).Attributes["alt"].Value;
                title006 = cnXpath.CnVideoTitle006.Equals("") ? cnXpath.CnVideoTitle006 : doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoTitle006).Attributes["alt"].Value;
            }
            else
            {
                title001 = doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoTitle001).InnerText;
                title002 = doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoTitle002).InnerText;
                title003 = doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoTitle003).InnerText;
                title004 = doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoTitle004).InnerText;
                title005 = doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoTitle005).InnerText;
                title006 = cnXpath.CnVideoTitle006.Equals("") ? cnXpath.CnVideoTitle006 : doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoTitle006).InnerText;
            }
            HtmlToText ht = new HtmlToText();
            title001 = ht.ConvertHtml(title001).Trim();
            title002 = ht.ConvertHtml(title002).Trim();
            title003 = ht.ConvertHtml(title003).Trim();
            title004 = ht.ConvertHtml(title004).Trim();
            title005 = ht.ConvertHtml(title005).Trim();
            title006 = ht.ConvertHtml(title006).Trim();

            string code = htmlname.Replace("Videos", "");
            htmlname = GetWebInfoByCode(code, 0);
            string weburl = GetWebInfoByCode(code, 1);
            StringBuilder sb = new StringBuilder();
               // int id = 0;
            //第一条
            bid++;
            sb.Append("<img id=\"" + bid + "\">");
            sb.Append("<web>" + htmlname + "</web>");
            sb.Append("<title>" + title001 + "</title>");
            sb.Append("<url>" + hrefpath + (cnXpath.CnVideoHref001.Equals("") ? cnXpath.CnVideoHref001 : doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoHref001).Attributes["href"].Value).Replace("&", "$") + "</url>");
            sb.Append("<src>" + srcpath + (cnXpath.CnVideoSrc001.Equals("") ? cnXpath.CnVideoSrc001 : doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoSrc001).Attributes[src].Value).Replace("&", "$") + "</src>");
            sb.Append("<weburl>" + weburl + "</weburl>");
            sb.Append("</img>");
            //第二条
            bid++;
            sb.Append("<img id=\"" + bid + "\">");
            sb.Append("<web>" + htmlname + "</web>");
            sb.Append("<title>" + title002 + "</title>");
            sb.Append("<url>" + hrefpath + (cnXpath.CnVideoHref002.Equals("") ? cnXpath.CnVideoHref002 : doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoHref002).Attributes["href"].Value).Replace("&", "$") + "</url>");
            sb.Append("<src>" + srcpath + (cnXpath.CnVideoSrc002.Equals("") ? cnXpath.CnVideoSrc002 : doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoSrc002).Attributes[src].Value).Replace("&", "$") + "</src>");
            sb.Append("<weburl>" + weburl + "</weburl>");
            sb.Append("</img>");
            //第三条
            bid++;
            sb.Append("<img id=\"" + bid + "\">");
            sb.Append("<web>" + htmlname + "</web>");
            sb.Append("<title>" + title003 + "</title>");
            sb.Append("<url>" + hrefpath + (cnXpath.CnVideoHref003.Equals("") ? cnXpath.CnVideoHref003 : doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoHref003).Attributes["href"].Value).Replace("&", "$") + "</url>");
            sb.Append("<src>" + srcpath + (cnXpath.CnVideoSrc003.Equals("") ? cnXpath.CnVideoSrc003 : doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoSrc003).Attributes[src].Value).Replace("&", "$") + "</src>");
            sb.Append("<weburl>" + weburl + "</weburl>");
            sb.Append("</img>");
            //第四条
            bid++;
            sb.Append("<img id=\"" + bid + "\">");
            sb.Append("<web>" + htmlname + "</web>");
            sb.Append("<title>" + title004 + "</title>");
            sb.Append("<url>" + hrefpath + (cnXpath.CnVideoHref004.Equals("") ? cnXpath.CnVideoHref004 : doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoHref004).Attributes["href"].Value).Replace("&", "$") + "</url>");
            sb.Append("<src>" + srcpath + (cnXpath.CnVideoSrc004.Equals("") ? cnXpath.CnVideoSrc004 : doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoSrc004).Attributes[src].Value).Replace("&", "$") + "</src>");
            sb.Append("<weburl>" + weburl + "</weburl>");
            sb.Append("</img>");
            //第五条
            bid++;
            sb.Append("<img id=\"" + bid + "\">");
            sb.Append("<web>" + htmlname + "</web>");
            sb.Append("<title>" + title005 + "</title>");
            sb.Append("<url>" + hrefpath + (cnXpath.CnVideoHref005.Equals("") ? cnXpath.CnVideoHref005 : doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoHref005).Attributes["href"].Value).Replace("&", "$") + "</url>");
            sb.Append("<src>" + srcpath + (cnXpath.CnVideoSrc005.Equals("") ? cnXpath.CnVideoSrc005 : doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoSrc005).Attributes[src].Value).Replace("&", "$") + "</src>");
            sb.Append("<weburl>" + weburl + "</weburl>");
            sb.Append("</img>");
            //第六条
            if (!cnXpath.CnVideoHref006.Equals(""))
            {
                bid++;
                sb.Append("<img id=\"" + bid + "\">");
                sb.Append("<web>" + htmlname + "</web>");
                sb.Append("<title>" + title006 + "</title>");
                sb.Append("<url>" + hrefpath + (cnXpath.CnVideoHref006.Equals("") ? cnXpath.CnVideoHref006 : doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoHref006).Attributes["href"].Value).Replace("&", "$") + "</url>");
                sb.Append("<src>" + srcpath + (cnXpath.CnVideoSrc006.Equals("") ? cnXpath.CnVideoSrc006 : doc.DocumentNode.SelectSingleNode(cnXpath.CnVideoSrc006).Attributes[src].Value).Replace("&", "$") + "</src>");
                sb.Append("<weburl>" + weburl + "</weburl>");
                sb.Append("</img>");
            }
            id = bid;

            return sb.ToString();
        }
Example #6
0
        /// <summary>
        /// 获取有道接口查询结果
        /// </summary>
        /// <param name="html">网络地址</param>
        /// <returns>有道查询结果</returns>
        public static int GetCnInterfaceInfoFromYoudao(string url)
        {
            int insertcount = 0;

            WebClient wc = new WebClient
            {
                Encoding = Encoding.GetEncoding("gb2312")
            };
            string html = wc.DownloadString(url.Trim());

            //string html = "";
            //System.IO.StreamReader sr = new System.IO.StreamReader("D:\\CnInterface\\CnWeb\\html\\youdaocode.txt", Encoding.GetEncoding("gb2312"));
            //lock (sr)
            //{
            //    html = sr.ReadToEnd();
            //    sr.Close();
            //}

            var doc = new HtmlDocument();
            doc.LoadHtml(html);
            string nameValue = "";
            string urlValue = "";
            string bodyValue = "";
            string timeValue = "";
            //StringBuilder sb = new StringBuilder();
            for (int i = 1; i <= 10; i++)
            {
                try
                {
                    string nameXpath = "/body[1]/div[6]/div[2]/ul[1]/li["+i.ToString()+"]/h3[1]/a[1]";
                    string urlXpath = "/body[1]/div[6]/div[2]/ul[1]/li[" + i.ToString() + "]/h3[1]/a[1]/@href[1]";
                    string bodyXpath = "/body[1]/div[6]/div[2]/ul[1]/li[" + i.ToString() + "]/div[1]";
                    string timeXpath = "/body[1]/div[6]/div[2]/ul[1]/li[" + i.ToString() + "]/p[1]/span[1]";

                    nameValue = doc.DocumentNode.SelectSingleNode(nameXpath).InnerText;
                    nameValue = new HtmlToText().ConvertHtml(nameValue).Trim();
                    urlValue = doc.DocumentNode.SelectSingleNode(urlXpath).Attributes["href"].Value;
                    try
                    {
                        bodyValue = doc.DocumentNode.SelectSingleNode(bodyXpath).InnerHtml;
                        bodyValue = new HtmlToText().ConvertHtml(bodyValue).Trim();
                    }
                    catch
                    {
                        bodyValue = "";
                    }
                    try
                    {
                        timeValue = doc.DocumentNode.SelectSingleNode(timeXpath).InnerText;
                        timeValue = timeValue.Substring(timeValue.LastIndexOf("20")).Replace("-->", "");
                    }
                    catch
                    {
                        timeValue = "";
                    }

                    //sb.Append(" <div style=\"margin-top: 20px; margin-bottom: 20px; width:678px\">");
                    //sb.Append(" <div>");
                    //sb.Append(" <a target=\"_blank\"  href=\"" + urlValue + "\"><font color=blue>" + nameValue.Replace("接口", "<font color=red>接口</font>") + "</font></a>");
                    //sb.Append(" </div>");
                    //sb.Append(" <div style=\"font-size: 13px;\">");
                    //sb.Append(" <font size=2 >" + bodyValue.Replace("接口", "<font color=red>接口</font>") + "</font>");
                    //sb.Append(" </div>");
                    //sb.Append(" <div>");
                    //sb.Append(" <font color=#006600>" + urlValue + "</font>&nbsp;&nbsp;<font size=2 color=#006600>" + timeValue + "</font>&nbsp;&nbsp;<a target=\"_blank\" href=\"" + url + "\"><font size=2  color=#666699>有道搜索</font></a>");
                    //sb.Append("  </div>");
                    //sb.Append(" </div>");
                }
                catch
                {
                    continue;
                }

                try
                {
                    DateTime dt = Convert.ToDateTime(timeValue);
                }
                catch
                {
                    timeValue = DateTime.Now.ToString("yyyy-MM-dd");
                }
                CnInterfaceInfo cnf = new CnInterfaceInfo
                {
                    CnInterfaceTypeID = 2,
                    CnInterfaceName = nameValue,
                    CnInterfacebody = bodyValue,
                    CnInterfaceUrl = urlValue,
                    CnInterfaceSource = "有道搜索",
                    CnInterfaceSourceUrl = url,
                    CnInterfaceAppearTime = Convert.ToDateTime(timeValue)
                };
                InterfaceInfo IFI = new InterfaceInfo();
                int cont = IFI.InsertCnInterfaceInfo(cnf);
                if (cont > 0)
                    insertcount++;

                System.Threading.Thread.Sleep(200);
            }

            return insertcount;
        }
Example #7
0
        /// <summary>
        /// 通过URL获取接口对象.
        /// </summary>
        /// <param name="wc">WebClient 对象.</param>
        /// <param name="url">接口列表URL.</param>
        /// <param name="cnurlxp">接口地址(XPath).</param>
        /// <param name="cnurlRgx">接口地址正则表达式(Regex).</param>
        /// <param name="cnnamexp">接口名称(XPath).</param>
        /// <param name="cnnameRgx">接口名称正则表达式(Regex)</param>
        /// <param name="cnbodyXp">接口内容(XPath).</param>
        /// <param name="cnbodyRgx">接口内容正则表达式(Regex).</param>
        /// <param name="cntimeXp">接口发布时间(XPath).</param>
        /// <param name="cntimeRgx">接口发布时间正则表达式(Regex).</param>
        /// <param name="cntimeF">接口发布时间Format.</param>
        /// <param name="cnsource">接口网络来源.</param>
        /// <param name="cnsourceurl">接口网络来源地址.</param>
        /// <returns>接口对象.</returns>
        public static int GetCnInterfaceInfoEntity(WebClient wc,
                                                                                              string url,
                                                                                              string cnurlxp,
                                                                                              string cnurlRgx,
                                                                                              string cnnamexp,
                                                                                              string cnnameRgx,
                                                                                              string cnbodyXp,
                                                                                              string cnbodyRgx,
                                                                                              string cntimeXp,
                                                                                              string cntimeRgx,
                                                                                              string cntimeF,
                                                                                              string cnsource,
                                                                                              string cnsourceurl,
                                                                                              int cntypeid,
                                                                                              int sourceno,
                                                                                              int pagesize)
        {
            var html = wc.DownloadString(url);
            var doc = new HtmlDocument();
            doc.LoadHtml(html);

            int insertcount = 0;
            for (int i = 1; i <=pagesize; i++)
            {
                try
                {
                    //xpath处理
                    string urlllog = "";
                    if (sourceno == 1)
                    {
                        cnurlxp = cnurlxp.Replace("cnface", (i * 2).ToString());
                        cnnamexp = cnnamexp.Replace("cnface", (i * 2).ToString());
                        cnbodyXp = cnbodyXp.Replace("cnface", i.ToString());
                        cntimeXp = cntimeXp.Replace("cnface", (i * 3).ToString());
                    }
                    if (sourceno == 2)
                    {
                        cnurlxp = cnurlxp.Replace("cnface", i.ToString());
                        cnnamexp = cnnamexp.Replace("cnface", i.ToString());
                        cnbodyXp = cnbodyXp.Replace("cnface", i.ToString());
                        cntimeXp = cntimeXp.Replace("cnface", i.ToString());
                        urlllog = "http://www.skycn.com/";
                    }
                    //取接口地址
                    var cnurl = urlllog + doc.DocumentNode.SelectSingleNode(cnurlxp).Attributes["href"].Value;
                    if (cnurlRgx != null && !cnurlRgx.Equals(""))
                        cnurl = new Regex(cnurlRgx).Match(cnurl).Groups[1].Value;
                    //取接口名称
                    var title = doc.DocumentNode.SelectSingleNode(cnnamexp).InnerHtml;
                    if (cnnameRgx != null && !cnnameRgx.Equals(""))
                        title = new Regex(cnnameRgx).Match(title).Groups[1].Value;
                    title = new HtmlToText().ConvertHtml(title).Trim();
                    //取接口内容
                    var body = doc.DocumentNode.SelectSingleNode(cnbodyXp).InnerHtml;
                    if (cnbodyRgx != null && !cnbodyRgx.Equals(""))
                        title = new Regex(cnbodyRgx).Match(body).Groups[1].Value;
                    body = new HtmlToText().ConvertHtml(body).Trim();

                    //取接口发布时间
                    var ptStr = doc.DocumentNode.SelectSingleNode(cntimeXp).InnerHtml;
                    if (cntimeRgx != null && !cntimeRgx.Equals(""))
                        ptStr = new Regex(cntimeRgx).Match(ptStr).Groups[1].Value;

                    CnInterfaceInfo cnf = new CnInterfaceInfo
                    {
                        CnInterfaceTypeID = cntypeid,
                        CnInterfaceName = title,
                        CnInterfacebody = body,
                        CnInterfaceUrl = cnurl,
                        CnInterfaceSource = cnsource,
                        CnInterfaceSourceUrl = cnsourceurl,
                        CnInterfaceAppearTime = Convert.ToDateTime(ptStr)
                    };
                    InterfaceInfo IFI = new InterfaceInfo();
                    int cont= IFI.InsertCnInterfaceInfo(cnf);
                    if (cont > 0)
                        insertcount += cont;
                }
                catch { continue; }
                System.Threading.Thread.Sleep(200);
            }
            return insertcount;
        }