private static void ConvertCrawlerConfiguration(XmlDocument xmlDoc, List <RequestConfig> lstReqCfg) { XmlNodeList crawlerNodes = xmlDoc.SelectNodes("//Crawlers/Crawler"); if (crawlerNodes != null) { foreach (XmlNode configNode in crawlerNodes) { CrawlerRequestConfig curCrawlerReqConfig = new CrawlerRequestConfig(); curCrawlerReqConfig.CrawlerKey = configNode.Attributes["key"].Value; curCrawlerReqConfig.CrawlerDescription = configNode.Attributes["description"].Value; if (!string.IsNullOrEmpty(curCrawlerReqConfig.CrawlerKey)) { curCrawlerReqConfig.Source = configNode.SelectSingleNode("Request/@source").Value; if (configNode.SelectSingleNode("Request/Url/Address") != null) { curCrawlerReqConfig.RequestUrl = configNode.SelectSingleNode("Request/Url/Address").InnerText; } if (configNode.SelectSingleNode("Request/Url/Pattern") != null) { curCrawlerReqConfig.RequestUrlPattern = configNode.SelectSingleNode("Request/Url/Pattern").InnerText; } if (configNode.SelectSingleNode("Request/Method") != null && !string.IsNullOrEmpty(configNode.SelectSingleNode("Request/Method").InnerText.Trim())) { curCrawlerReqConfig.RequestMethod = configNode.SelectSingleNode("Request/Method").InnerText.Trim().ToUpper(); } else { //set HttpMethod as Get curCrawlerReqConfig.RequestMethod = HttpMethod.Get.ToString().ToUpper(); } if (configNode.SelectSingleNode("Request/ContentType") != null) { curCrawlerReqConfig.ContentType = configNode.SelectSingleNode("Request/ContentType").InnerText; } //TODO other configuration item to be done lstReqCfg.Add(curCrawlerReqConfig); } } } }
public CommonCrawler(string crawlerKey, Dictionary <String, String> dicParameters = null) { CrawlerRequestConfig reqConfig = CrawlerConfigHelper.GetCrawlerRequestConfig(crawlerKey) as CrawlerRequestConfig; reqConfig.UrlParas = dicParameters; //CrawlerRequestConfig here is to store the request configuration and it is set as readonly in case of invaild modification. CrawlerRequestConfig = reqConfig; // convert webRequest in CrawlerBase to be HttpWebRequest type this.webRequest = WebRequest.Create(reqConfig.RequestUrl) as HttpWebRequest; //unlock the limitation of http request connection counts ServicePointManager.DefaultConnectionLimit = Int32.MaxValue; //Initialize the WebRequest Client using reqConfig(request configuration items) InitWebRequest(reqConfig); }