コード例 #1
0
        private void GetPageFromBaidu(string pageUrl, string keyWords)
        {
            try
            {
                Proj_CompleteCheckList completeCheckList = new Proj_CompleteCheckList();
                Proj_CompleteCheck     checkSiteName     = new Proj_CompleteCheck();
                checkSiteName.CheckType  = DocumentCompleteCheckType.TextExist;
                checkSiteName.CheckValue = "cn.linkedin.com";
                completeCheckList.Add(checkSiteName);
                Proj_CompleteCheck checkKeyWord = new Proj_CompleteCheck();
                checkKeyWord.CheckType  = DocumentCompleteCheckType.TextExist;
                checkKeyWord.CheckValue = keyWords;
                completeCheckList.Add(checkKeyWord);

                string localPageFilePath = this.RunPage.GetFilePath(pageUrl, this.RunPage.GetDetailSourceFileDir());
                if (!File.Exists(localPageFilePath))
                {
                    string responseString = null;
                    try
                    {
                        responseString = this.RunPage.GetTextByRequest(pageUrl, null, false, 3000, SysConfig.WebPageRequestTimeout, Encoding.UTF8, null, null, true, Proj_DataAccessType.OtherAccessType, completeCheckList, 1000);
                    }
                    catch (Exception ex)
                    {
                        throw ex;
                    }
                    this.RunPage.SaveFile(responseString, localPageFilePath, Encoding.UTF8);
                }
            }
            catch (Exception ex)
            {
                throw ex;
            }
        }
コード例 #2
0
        public string GetTextByRequest(string pageUrl, Dictionary <string, string> listRow, bool needProxy, decimal intervalAfterLoaded, int timeout, Encoding encoding, string cookie, string xRequestedWith, bool autoAbandonDisableProxy, Proj_DataAccessType dataAccessType, Proj_CompleteCheckList completeChecks, int intervalProxyRequest)
        {
            NDAWebClient client = null;

            try
            {
                DateTime dt1 = DateTime.Now;
                client    = new NDAWebClient();
                client.Id = pageUrl;
                client.ResponseEncoding = encoding;
                System.Net.ServicePointManager.DefaultConnectionLimit = 512;
                client.Timeout = timeout;
                if (needProxy)
                {
                    client.ProxyServer = this.RunPage.CurrentProxyServers.BeginUse(intervalProxyRequest);
                }
                //client.Headers.Add("user-agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705;)");

                string userAgent = this.RunPage.CurrentUserAgents.GetOneUserAgent();
                client.Headers.Add("user-agent", userAgent);
                if (!CommonUtil.IsNullOrBlank(cookie))
                {
                    client.Headers.Add("cookie", cookie);
                    //client.Headers.Add("connection", "keep-alive");
                }
                client.Headers.Add("x-requested-with", "XMLHttpRequest");

                client.OpenReadCompleted += client_OpenReadCompleted;
                client.OpenReadAsync(new Uri(pageUrl));

                int    waitingTime = 0;
                object data        = null;
                while (data == null && waitingTime < timeout)
                {
                    data = GetResponseString(client.Id);
                    if (data == null)
                    {
                        waitingTime = waitingTime + 3000;
                        Thread.Sleep(3000);
                    }
                }

                if (data != null)
                {
                    RemoveResponseData(client.Id);
                    if (data is Exception)
                    {
                        throw (Exception)data;
                    }
                    else
                    {
                        string s = null;
                        if (data is string)
                        {
                            s = (string)data;
                        }
                        if (data is byte[])
                        {
                            s = encoding.GetString((byte[])data);
                        }

                        this.CheckRequestCompleteFile(s, listRow);

                        if (needProxy)
                        {
                            this.RunPage.CurrentProxyServers.Success(client.ProxyServer);
                        }

                        //再增加个等待,等待异步加载的数据
                        Thread.Sleep((int)intervalAfterLoaded);

                        DateTime dt2 = DateTime.Now;
                        double   ts  = (dt2 - dt1).TotalSeconds;
                        return(s);
                    }
                }
                else
                {
                    throw new Exception("访问超时.");
                }
            }
            catch (NoneProxyException ex)
            {
                throw ex;
            }
            catch (Exception ex)
            {
                string errorInfo = "";
                if (needProxy)
                {
                    if (autoAbandonDisableProxy)
                    {
                        this.RunPage.CurrentProxyServers.Error(client.ProxyServer);
                        if (client.ProxyServer.IsAbandon)
                        {
                            errorInfo = "放弃代理服务器:" + client.ProxyServer.IP + ":" + client.ProxyServer.Port.ToString() + ". ";
                        }
                        else
                        {
                            errorInfo = "代理服务器:" + client.ProxyServer.IP + ":" + client.ProxyServer.Port.ToString() + ". ";
                        }
                    }
                    else
                    {
                        errorInfo = "代理服务器:" + client.ProxyServer.IP + ":" + client.ProxyServer.Port.ToString() + ". ";
                    }
                }

                errorInfo = "获取网页失败.\r\n" + errorInfo + " " + pageUrl;
                throw new GrabRequestException(errorInfo, ex);
            }
            finally
            {
                if (needProxy)
                {
                    this.RunPage.CurrentProxyServers.EndUse(client.ProxyServer);
                }
            }
        }