Пример #1
0
        private static Encoding GetEncoding(ConfigOfRequest requestConfig)
        {
            Encoding encoding = Encoding.Unicode;

            switch (requestConfig.Encoding)
            {
            case EEncoding.Auto:
                //TODO 从页面中分析出Encoding格式
                break;

            case EEncoding.ASCII:
            case EEncoding.Unicode:
            case EEncoding.GBK:
                encoding = Encoding.GetEncoding(requestConfig.Encoding.ToString());
                break;

            case EEncoding.UTF8:
                encoding = Encoding.UTF8;
                break;

            default:
                break;
            }

            return(encoding);
        }
Пример #2
0
        private static HttpWebRequest GetHttpWebRequest(string url, ConfigOfRequest requestConfig)
        {
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);

            request.Method    = requestConfig.Method;
            request.UserAgent = requestConfig.UserAgent;
            return(request);
        }
Пример #3
0
 public override void LoadXElement(XElement element)
 {
     SpiderName    = element.Attribute(nameof(SpiderName)).Value;
     RequestConfig = new ConfigOfRequest(element.Descendants(nameof(ConfigOfRequest)).First());
     GrabConfigs   = new List <IGrabConfig>();
     foreach (var grabConfig in element.Descendants(nameof(IGrabConfig)))
     {
         GrabConfigs.Add(IGrabConfig.GetGrabConfig(grabConfig, this));
     }
 }
Пример #4
0
        public bool StartGrabbing(ConfigOfRequest requestConfig)
        {
            bool isSuccess = true;

            if (!IsOn)
            {
                return(isSuccess);
            }
            #region GrabContentByRequestConfig
            //抓取内容
            switch (requestConfig.URLStrategy)
            {
            case URLStrategy.Default:
                var            orientURL = requestConfig.URL;
                HttpWebRequest request   = GetHttpWebRequest(orientURL, requestConfig);
                using (WebResponse response = request.GetResponse())
                {
                    try
                    {
                        Encoding encoding   = GetEncoding(requestConfig);
                        var      pageString = new StreamReader(response.GetResponseStream(), encoding).ReadToEnd();
                        Result   result     = GrabContentByGrabType(pageString);
                        TriggerOnGrabFinish(orientURL, result.ResultCode == EResultCode.Success, result.Message);
                        isSuccess = result.ResultCode == EResultCode.Success;
                    }
                    catch (Exception ex)
                    {
                        TriggerOnGrabFinish(orientURL, false, "抓取出现异常:" + ex.ToString());
                        isSuccess = false;
                    }
                }
                break;

            case URLStrategy.IncreaseByValue:
                int stopBy        = requestConfig.StopWhenLT;
                int increaseValue = requestConfig.StartAt;
                while (true)
                {
                    string increaseURL = string.Format(requestConfig.URL, increaseValue);
                    request = GetHttpWebRequest(increaseURL, requestConfig);
                    using (WebResponse response = request.GetResponse())
                    {
                        try
                        {
                            Encoding encoding   = GetEncoding(requestConfig);
                            var      pageString = new StreamReader(response.GetResponseStream(), encoding).ReadToEnd();
                            if (pageString.Length <= stopBy)
                            {
                                TriggerOnGrabFinish(increaseURL, false, "抓取达成终止条件而终止:页面数据长度(" + pageString.Length + ")未达到设定标准(" + stopBy + ")");
                                isSuccess = false;
                                break;
                            }
                            Result result = GrabContentByGrabType(pageString, SpiderConfig.SpiderName + increaseValue);
                            TriggerOnGrabFinish(increaseURL, result.ResultCode == EResultCode.Success, result.Message);
                            isSuccess = isSuccess && result.ResultCode == EResultCode.Success;
                        }
                        catch (Exception ex)
                        {
                            TriggerOnGrabFinish(increaseURL, false, "抓取出现异常:" + ex.ToString());
                            isSuccess = false;
                            break;
                        }
                    }
                    increaseValue += requestConfig.IncreaseBy;
                }
                break;

            default:
                break;
            }
            return(isSuccess);

            #endregion
        }