/// <summary> /// 抓取指定平台的搜索结果网页 /// </summary> /// <returns></returns> protected virtual IDataContainer FetchPlatformSearchWebPage(IFetchWebPageArgument args) { IDataContainer result = DataContainer.CreateNullDataContainer(); //解析参数的Web蜘蛛服务 BaseWebPageService webPageService = BaseWebPageService.CreateWebPageService(args.Platform); if (null != webPageService) { try { result = webPageService.QuerySearchContent(args); } catch (Exception ex) { Logger.Error(ex); } } if (null == result) { result = DataContainer.CreateNullDataContainer(); } return(result); }
public override string LoadUrlGetSearchApiContent(IFetchWebPageArgument queryParas) { string keyWord = queryParas.KeyWord; if (string.IsNullOrEmpty(keyWord)) { return(string.Empty); } //加载Cookie var ckVisitor = new LazyCookieVistor(); var cks = ckVisitor.LoadCookies(GuomeiSiteUrl); //优先使用格式化好的查询地址 string searchUrl = ""; if (null != queryParas.ResolvedUrl) { searchUrl = queryParas.ResolvedUrl.Url; } else { searchUrl = string.Format(templateOfSearchUrl, keyWord); } var client = GuomeiHttpClient; //设置Host client.Client.DefaultRequestHeaders.Host = new Uri(searchUrl).Host; //设置跳转头 Referrer string enCodeKeyword = HttpUtility.UrlEncode(keyWord); client.Client.DefaultRequestHeaders.Referrer = new Uri(string.Format(templateOfSearchUrl, keyWord)); ////加载cookies ////获取当前站点的Cookie client.ChangeGlobleCookies(cks, GuomeiSiteUrl); // 4 发送请求 var clientProxy = new HttpServerProxy() { Client = client.Client, KeepAlive = true }; //注意:对于响应的内容 不要使用内置的文本 工具打开,这个工具有bug.看到的文本不全面 //使用json格式打开即可看到里面所有的字符串 string content = clientProxy.GetRequestTransfer(searchUrl, null); return(content); }
public override string LoadUrlGetSearchApiContent(IFetchWebPageArgument queryParas) { string keyWord = queryParas.KeyWord; if (string.IsNullOrEmpty(keyWord)) { return(string.Empty); } //加载Cookie var ckVisitor = new LazyCookieVistor(); var cks = ckVisitor.LoadCookies(tmallSiteUrl); //天猫应该是有解析的搜索url的,如果没有,那么使用基于拼接的默认关键词的检索地址 string searchUrl = ""; if (null != queryParas.ResolvedUrl) { searchUrl = queryParas.ResolvedUrl.Url; } else { string sortValue = "s";//综合排序 if (null != queryParas.OrderFiled) { sortValue = queryParas.OrderFiled.FieldValue; } searchUrl = string.Format(templateOfSearchUrl, keyWord, sortValue); } var client = TmallHttpClient; ////加载cookies ////获取当前站点的Cookie client.ChangeGlobleCookies(cks, tmallSiteUrl); // 4 发送请求 var clientProxy = new HttpServerProxy() { Client = client.Client, KeepAlive = true }; //注意:对于响应的内容 不要使用内置的文本 工具打开,这个工具有bug.看到的文本不全面 //使用json格式打开即可看到里面所有的字符串 string content = clientProxy.GetRequestTransfer(searchUrl, null); return(content); }
public override string LoadUrlGetSearchApiContent(IFetchWebPageArgument queryParas) { string keyWord = queryParas.KeyWord; if (string.IsNullOrEmpty(keyWord)) { return(string.Empty); } //加载Cookie var ckVisitor = new LazyCookieVistor(); var cks = ckVisitor.LoadCookies(PddSiteUrl); //优先使用格式化好的查询地址 string searchUrl = ""; if (null != queryParas.ResolvedUrl) { searchUrl = queryParas.ResolvedUrl.Url; } else { searchUrl = string.Format(templateOfSearchUrl , keyWord, queryParas.PageIndex + 1 , queryParas.OrderFiled.FieldValue); } var client = PddHttpClient; //设置跳转头 Referrer client.Client.DefaultRequestHeaders.Referrer = new Uri(string.Format("http://mobile.yangkeduo.com/search_result.html?search_key={0}&search_src=new&search_met_track=manual&refer_page_name=search&refer_page_id=search_1515546776782_Zeq7a7qvHN&refer_page_sn=10031&page_id=search_result_1515546785426_9C9j8sIOep&sort_type=2", keyWord)); ////加载cookies ////获取当前站点的Cookie client.ChangeGlobleCookies(cks, PddSiteUrl); // 4 发送请求 var clientProxy = new HttpServerProxy() { Client = client.Client, KeepAlive = true }; //注意:对于响应的内容 不要使用内置的文本 工具打开,这个工具有bug.看到的文本不全面 //使用json格式打开即可看到里面所有的字符串 string content = clientProxy.GetRequestTransfer(searchUrl, null); return(content); }
public override string LoadUrlGetSearchApiContent(IFetchWebPageArgument queryParas) { string keyWord = queryParas.KeyWord; if (string.IsNullOrEmpty(keyWord)) { return(string.Empty); } //加载Cookie var ckVisitor = new LazyCookieVistor(); var cks = ckVisitor.LoadCookies(MogujieSiteUrl); string timeToken = JavascriptContext.getUnixTimestamp(); //优先使用格式化好的查询地址 string searchUrl = ""; if (null != queryParas.ResolvedUrl) { searchUrl = queryParas.ResolvedUrl.Url; } else { searchUrl = string.Format(templateOfSearchUrl, timeToken, keyWord); } var client = MogujieHttpClient; client.Client.DefaultRequestHeaders.Referrer = new Uri(string.Format("http://list.mogujie.com/s?q={0}&ptp=1.eW5XD.0.0.qJUTT", keyWord)); ////加载cookies ////获取当前站点的Cookie client.ChangeGlobleCookies(cks, MogujieSiteUrl); // 4 发送请求 var clientProxy = new HttpServerProxy() { Client = client.Client, KeepAlive = true }; //注意:对于响应的内容 不要使用内置的文本 工具打开,这个工具有bug.看到的文本不全面 //使用json格式打开即可看到里面所有的字符串 string content = clientProxy.GetRequestTransfer(searchUrl, null); return(content); }
/// <summary> /// 根据传递的查询参数 进行网页抓取 /// 返回抓取后的内容 /// </summary> /// <param name="queryParas"></param> /// <returns></returns> public DataContainer QuerySearchContent(IFetchWebPageArgument queryParas) { if (null == queryParas) { return(null); } var container = new DataContainer(); string respText = RequestLoader.LoadUrlGetSearchApiContent(queryParas); //string compressedString = string.Empty; //if (!string.IsNullOrEmpty(respText)) //{ // compressedString = LZString.CompressToBase64(respText); //} //container.Result = compressedString; container.Result = respText; return(container); }
public override string LoadUrlGetSearchApiContent(IFetchWebPageArgument queryParas) { string keyWord = queryParas.KeyWord; if (string.IsNullOrEmpty(keyWord)) { return(string.Empty); } //加载Cookie var ckVisitor = new LazyCookieVistor(); var cks = ckVisitor.LoadCookies(QingTaokeSiteUrl); string searchUrl = string.Format(templateOfSearchUrl, keyWord); var client = QingTaokeHttpClient; //设置跳转头 Referrer //client.Client.DefaultRequestHeaders.Referrer = new Uri(string.Format("http://mobile.yangkeduo.com/search_result.html?search_key={0}&search_src=new&refer_page_name=search&refer_page_id=search_1500439537429_yr7sonlWB0", keyWord)); ////加载cookies ////获取当前站点的Cookie client.ChangeGlobleCookies(cks, QingTaokeSiteUrl); // 4 发送请求 var clientProxy = new HttpServerProxy() { Client = client.Client, KeepAlive = true }; //注意:对于响应的内容 不要使用内置的文本 工具打开,这个工具有bug.看到的文本不全面 //使用json格式打开即可看到里面所有的字符串 string content = clientProxy.GetRequestTransfer(searchUrl, null); return(content); }
public override string LoadUrlGetSearchApiContent(IFetchWebPageArgument queryParas) { string keyWord = queryParas.KeyWord; if (string.IsNullOrEmpty(keyWord)) { return(string.Empty); } //加载Cookie var ckVisitor = new LazyCookieVistor(); var cks = ckVisitor.LoadCookies(SuningSiteUrl); //优先使用格式化好的查询地址 string searchUrl = ""; if (null != queryParas.ResolvedUrl) { searchUrl = queryParas.ResolvedUrl.Url; } else { searchUrl = string.Format(templateOfSearchUrl, keyWord); } var client = SuningHttpClient; //设置Host client.Client.DefaultRequestHeaders.Host = new Uri(searchUrl).Host; //设置跳转头 Referrer string enCodeKeyword = HttpUtility.UrlEncode(keyWord); client.Client.DefaultRequestHeaders.Referrer = new Uri(string.Format(templateOfSearchUrl, keyWord)); //如果是ajax 请求 那么附带这两个头信息 if (searchUrl.Contains("searchProductList.do")) { client.Client.DefaultRequestHeaders.Add("X-Requested-With", "XMLHttpRequest"); client.Client.DefaultRequestHeaders.Add("X-Tingyun-Id", "p35OnrDoP8k;r=57681924"); } ////加载cookies ////获取当前站点的Cookie client.ChangeGlobleCookies(cks, SuningSiteUrl); // 4 发送请求 var clientProxy = new HttpServerProxy() { Client = client.Client, KeepAlive = true }; //注意:对于响应的内容 不要使用内置的文本 工具打开,这个工具有bug.看到的文本不全面 //使用json格式打开即可看到里面所有的字符串 string content = clientProxy.GetRequestTransfer(searchUrl, null); return(content); }
/// <summary> /// 使用指定的参数产生请求,返回请求的http响应内容 /// 抽象方法 /// </summary> /// <param name="queryParas"></param> /// <returns></returns> public abstract string LoadUrlGetSearchApiContent(IFetchWebPageArgument queryParas);
public override string LoadUrlGetSearchApiContent(IFetchWebPageArgument queryParas) { VipSearchResultBag resultBag = new VipSearchResultBag(); string keyWord = queryParas.KeyWord; if (string.IsNullOrEmpty(keyWord)) { return(string.Empty); } try { //加载Cookie var ckVisitor = new LazyCookieVistor(); var cks = ckVisitor.LoadCookies(VipSiteUrl); string searchUrl = string.Format(templateOfSearchUrl, keyWord); var client = VipHttpClient; client.Client.DefaultRequestHeaders.Referrer = new Uri(string.Format("https://m.vip.com/searchlist.html?q={0}&channel_id=", keyWord)); ////加载cookies ////获取当前站点的Cookie client.ChangeGlobleCookies(cks, VipSiteUrl); // 4 发送请求 var clientProxy = new HttpServerProxy() { Client = client.Client, KeepAlive = true }; //注意:对于响应的内容 不要使用内置的文本 工具打开,这个工具有bug.看到的文本不全面 //使用json格式打开即可看到里面所有的字符串 Task <HttpResponseMessage> brandTask; Task <HttpResponseMessage> categoryTreeTask; Task <HttpResponseMessage> searchListTask; string para_brandJson = ""; string para_categoryTreeJson = ""; string para_searchListJson = ""; if (null != queryParas.ResolvedUrl && null != queryParas.ResolvedUrl.ParasPost) { para_brandJson = queryParas.ResolvedUrl.ParasPost["para_brand"].ToString(); para_categoryTreeJson = queryParas.ResolvedUrl.ParasPost["para_categoryTree"].ToString(); para_searchListJson = queryParas.ResolvedUrl.ParasPost["para_searchList"].ToString(); } else { para_brandJson = new VipSearchParaBrand(keyWord).ToJson(); para_categoryTreeJson = new VipSearchParaCategoryTree(keyWord).ToJson(); //插件不解析的话,那么使用最简单的基本关键词过滤分页,不进行复杂过滤,复杂过滤筛选应该在插件实现 var paraDetais = new VipSearchParaSearchList(keyWord); //分页 paraDetais.paramsDetails.np = queryParas.PageIndex + 1; para_searchListJson = paraDetais.ToJson(); } if (queryParas.IsNeedResolveHeaderTags == true) { //1 查询品牌 var brandPara = new Dictionary <string, string>(); brandPara.Add("para_brand", para_brandJson); brandTask = clientProxy.PostRequestTransferAsync(queryBrandUrl, PostDataContentType.Json, brandPara, null); // 2 查询分类 var categoryTreePara = new Dictionary <string, string>(); categoryTreePara.Add("para_categoryTree", para_categoryTreeJson); categoryTreeTask = clientProxy.PostRequestTransferAsync(queryCategoryUrl, PostDataContentType.Json, categoryTreePara, null); } else { brandTask = Task.FromResult <HttpResponseMessage>(null); categoryTreeTask = Task.FromResult <HttpResponseMessage>(null); } //3检索内容 var searchListPara = new Dictionary <string, string>(); searchListPara.Add("para_searchList", para_searchListJson); searchListTask = clientProxy.PostRequestTransferAsync(templateOfSearchUrl, PostDataContentType.Json, searchListPara, null); //等待任务完毕 Task.WaitAll(new Task[] { brandTask, categoryTreeTask, searchListTask }); if (brandTask.Result != null) { resultBag.BrandStoreList = brandTask.Result.Content.ReadAsStringAsync().Result; } if (categoryTreeTask.Result != null) { resultBag.CategoryTree = categoryTreeTask.Result.Content.ReadAsStringAsync().Result; } if (searchListTask.Result != null) { resultBag.SearchList = searchListTask.Result.Content.ReadAsStringAsync().Result; } } catch (Exception ex) { throw ex; } return(resultBag.ToJson()); }
/// <summary> /// not use /// </summary> /// <param name="queryParas"></param> /// <returns></returns> public override string LoadUrlGetSearchApiContent(IFetchWebPageArgument queryParas) { return(string.Empty); }
public override string LoadUrlGetSearchApiContent(IFetchWebPageArgument queryParas) { string keyWord = queryParas.KeyWord; if (string.IsNullOrEmpty(keyWord)) { return(string.Empty); } //生成时间戳 string timestamp = JavascriptContext.getUnixTimestamp(); //加载Cookie var ckVisitor = new LazyCookieVistor(); var cks = ckVisitor.LoadCookies(eTaoSiteUrl); var _m_h5_tk_cookie = cks.FirstOrDefault(x => x.Name == "_m_h5_tk"); if (null == _m_h5_tk_cookie) { this.AutoRefeshCookie(this.RefreshCookieUrl);//从新刷新页面 获取 服务器颁发的私钥 cks = ckVisitor.LoadCookies(eTaoSiteUrl); _m_h5_tk_cookie = cks.FirstOrDefault(x => x.Name == "_m_h5_tk"); } if (null == _m_h5_tk_cookie || string.IsNullOrEmpty(_m_h5_tk_cookie.Value)) { throw new Exception("加载授权私钥失败!无法获取对应的cookie:_m_h5_tk "); } string _m_h5_tk_valueString = _m_h5_tk_cookie.Value.Split('_')[0]; string etao_appkey = "12574478"; if (null == queryParas.OrderFiled) { queryParas.OrderFiled = queryParas.GetCurrentPlatformSupportOrderFields().First(); } int pageNum = queryParas.PageIndex * 40; string paras = string.Concat("{\"s\":", pageNum, ",\"n\":40,\"q\":\"", keyWord, "\",\"needEncode\":false,\"sort\":\"", queryParas.OrderFiled.FieldValue, "\",\"maxPrice\":10000000,\"minPrice\":0,\"serviceList\":\"\",\"navigator\":\"all\",\"urlType\":2}"); //生成参数戳 string sign = JavascriptContext.getEtaoJSSDKSign(_m_h5_tk_valueString, timestamp, etao_appkey, paras); string url = string.Format("https://apie.m.etao.com/h5/mtop.etao.fe.search/1.0/?type=jsonp&api=mtop.etao.fe.search&v=1.0&appKey=12574478&data={0}&t={1}&sign={2}&callback=jsonp28861232595120323", paras, timestamp, sign); string searchUrl = string.Format(templateOfSearchUrl, keyWord); var client = etaoHttpClient; ////加载cookies ////获取当前站点的Cookie client.ChangeGlobleCookies(cks, eTaoSiteUrl); //修改client 的refer 头 client.Client.DefaultRequestHeaders.Referrer = new Uri(searchUrl); // 4 发送请求 var clientProxy = new HttpServerProxy() { Client = client.Client, KeepAlive = true }; string content = clientProxy.GetRequestTransfer(url, null); return(content); }