/// <summary>
        /// 抓取指定平台的搜索结果网页
        /// </summary>
        /// <returns></returns>
        protected virtual IDataContainer FetchPlatformSearchWebPage(IFetchWebPageArgument args)
        {
            IDataContainer result = DataContainer.CreateNullDataContainer();
            //解析参数的Web蜘蛛服务
            BaseWebPageService webPageService = BaseWebPageService.CreateWebPageService(args.Platform);


            if (null != webPageService)
            {
                try
                {
                    result = webPageService.QuerySearchContent(args);
                }
                catch (Exception ex)
                {
                    Logger.Error(ex);
                }
            }
            if (null == result)
            {
                result = DataContainer.CreateNullDataContainer();
            }


            return(result);
        }
Пример #2
0
            public override string LoadUrlGetSearchApiContent(IFetchWebPageArgument queryParas)
            {
                string keyWord = queryParas.KeyWord;

                if (string.IsNullOrEmpty(keyWord))
                {
                    return(string.Empty);
                }


                //加载Cookie
                var ckVisitor = new LazyCookieVistor();
                var cks       = ckVisitor.LoadCookies(GuomeiSiteUrl);



                //优先使用格式化好的查询地址
                string searchUrl = "";

                if (null != queryParas.ResolvedUrl)
                {
                    searchUrl = queryParas.ResolvedUrl.Url;
                }
                else
                {
                    searchUrl = string.Format(templateOfSearchUrl, keyWord);
                }


                var client = GuomeiHttpClient;

                //设置Host
                client.Client.DefaultRequestHeaders.Host = new Uri(searchUrl).Host;

                //设置跳转头 Referrer
                string enCodeKeyword = HttpUtility.UrlEncode(keyWord);

                client.Client.DefaultRequestHeaders.Referrer = new Uri(string.Format(templateOfSearchUrl, keyWord));
                ////加载cookies
                ////获取当前站点的Cookie
                client.ChangeGlobleCookies(cks, GuomeiSiteUrl);

                // 4 发送请求
                var clientProxy = new HttpServerProxy()
                {
                    Client = client.Client, KeepAlive = true
                };

                //注意:对于响应的内容 不要使用内置的文本 工具打开,这个工具有bug.看到的文本不全面
                //使用json格式打开即可看到里面所有的字符串

                string content = clientProxy.GetRequestTransfer(searchUrl, null);



                return(content);
            }
Пример #3
0
            public override string LoadUrlGetSearchApiContent(IFetchWebPageArgument queryParas)
            {
                string keyWord = queryParas.KeyWord;

                if (string.IsNullOrEmpty(keyWord))
                {
                    return(string.Empty);
                }


                //加载Cookie
                var ckVisitor = new LazyCookieVistor();
                var cks       = ckVisitor.LoadCookies(tmallSiteUrl);



                //天猫应该是有解析的搜索url的,如果没有,那么使用基于拼接的默认关键词的检索地址
                string searchUrl = "";

                if (null != queryParas.ResolvedUrl)
                {
                    searchUrl = queryParas.ResolvedUrl.Url;
                }
                else
                {
                    string sortValue = "s";//综合排序
                    if (null != queryParas.OrderFiled)
                    {
                        sortValue = queryParas.OrderFiled.FieldValue;
                    }

                    searchUrl = string.Format(templateOfSearchUrl, keyWord, sortValue);
                }


                var client = TmallHttpClient;

                ////加载cookies
                ////获取当前站点的Cookie
                client.ChangeGlobleCookies(cks, tmallSiteUrl);

                // 4 发送请求
                var clientProxy = new HttpServerProxy()
                {
                    Client = client.Client, KeepAlive = true
                };

                //注意:对于响应的内容 不要使用内置的文本 工具打开,这个工具有bug.看到的文本不全面
                //使用json格式打开即可看到里面所有的字符串

                string content = clientProxy.GetRequestTransfer(searchUrl, null);



                return(content);
            }
Пример #4
0
            public override string LoadUrlGetSearchApiContent(IFetchWebPageArgument queryParas)
            {
                string keyWord = queryParas.KeyWord;

                if (string.IsNullOrEmpty(keyWord))
                {
                    return(string.Empty);
                }



                //加载Cookie
                var ckVisitor = new LazyCookieVistor();
                var cks       = ckVisitor.LoadCookies(PddSiteUrl);



                //优先使用格式化好的查询地址
                string searchUrl = "";

                if (null != queryParas.ResolvedUrl)
                {
                    searchUrl = queryParas.ResolvedUrl.Url;
                }
                else
                {
                    searchUrl = string.Format(templateOfSearchUrl
                                              , keyWord, queryParas.PageIndex + 1
                                              , queryParas.OrderFiled.FieldValue);
                }



                var client = PddHttpClient;

                //设置跳转头 Referrer
                client.Client.DefaultRequestHeaders.Referrer = new Uri(string.Format("http://mobile.yangkeduo.com/search_result.html?search_key={0}&search_src=new&search_met_track=manual&refer_page_name=search&refer_page_id=search_1515546776782_Zeq7a7qvHN&refer_page_sn=10031&page_id=search_result_1515546785426_9C9j8sIOep&sort_type=2", keyWord));
                ////加载cookies
                ////获取当前站点的Cookie
                client.ChangeGlobleCookies(cks, PddSiteUrl);

                // 4 发送请求
                var clientProxy = new HttpServerProxy()
                {
                    Client = client.Client, KeepAlive = true
                };

                //注意:对于响应的内容 不要使用内置的文本 工具打开,这个工具有bug.看到的文本不全面
                //使用json格式打开即可看到里面所有的字符串

                string content = clientProxy.GetRequestTransfer(searchUrl, null);



                return(content);
            }
Пример #5
0
            public override string LoadUrlGetSearchApiContent(IFetchWebPageArgument queryParas)
            {
                string keyWord = queryParas.KeyWord;

                if (string.IsNullOrEmpty(keyWord))
                {
                    return(string.Empty);
                }

                //加载Cookie
                var ckVisitor = new LazyCookieVistor();
                var cks       = ckVisitor.LoadCookies(MogujieSiteUrl);

                string timeToken = JavascriptContext.getUnixTimestamp();

                //优先使用格式化好的查询地址
                string searchUrl = "";

                if (null != queryParas.ResolvedUrl)
                {
                    searchUrl = queryParas.ResolvedUrl.Url;
                }
                else
                {
                    searchUrl = string.Format(templateOfSearchUrl, timeToken, keyWord);
                }



                var client = MogujieHttpClient;

                client.Client.DefaultRequestHeaders.Referrer = new Uri(string.Format("http://list.mogujie.com/s?q={0}&ptp=1.eW5XD.0.0.qJUTT", keyWord));
                ////加载cookies
                ////获取当前站点的Cookie
                client.ChangeGlobleCookies(cks, MogujieSiteUrl);

                // 4 发送请求
                var clientProxy = new HttpServerProxy()
                {
                    Client = client.Client, KeepAlive = true
                };

                //注意:对于响应的内容 不要使用内置的文本 工具打开,这个工具有bug.看到的文本不全面
                //使用json格式打开即可看到里面所有的字符串

                string content = clientProxy.GetRequestTransfer(searchUrl, null);



                return(content);
            }
Пример #6
0
        /// <summary>
        /// 根据传递的查询参数 进行网页抓取
        /// 返回抓取后的内容
        /// </summary>
        /// <param name="queryParas"></param>
        /// <returns></returns>
        public DataContainer QuerySearchContent(IFetchWebPageArgument queryParas)
        {
            if (null == queryParas)
            {
                return(null);
            }
            var container = new DataContainer();

            string respText = RequestLoader.LoadUrlGetSearchApiContent(queryParas);

            //string compressedString = string.Empty;
            //if (!string.IsNullOrEmpty(respText))
            //{
            //    compressedString = LZString.CompressToBase64(respText);
            //}
            //container.Result = compressedString;
            container.Result = respText;
            return(container);
        }
            public override string LoadUrlGetSearchApiContent(IFetchWebPageArgument queryParas)
            {
                string keyWord = queryParas.KeyWord;

                if (string.IsNullOrEmpty(keyWord))
                {
                    return(string.Empty);
                }



                //加载Cookie
                var ckVisitor = new LazyCookieVistor();
                var cks       = ckVisitor.LoadCookies(QingTaokeSiteUrl);



                string searchUrl = string.Format(templateOfSearchUrl, keyWord);

                var client = QingTaokeHttpClient;

                //设置跳转头 Referrer
                //client.Client.DefaultRequestHeaders.Referrer = new Uri(string.Format("http://mobile.yangkeduo.com/search_result.html?search_key={0}&search_src=new&refer_page_name=search&refer_page_id=search_1500439537429_yr7sonlWB0", keyWord));
                ////加载cookies
                ////获取当前站点的Cookie
                client.ChangeGlobleCookies(cks, QingTaokeSiteUrl);

                // 4 发送请求
                var clientProxy = new HttpServerProxy()
                {
                    Client = client.Client, KeepAlive = true
                };

                //注意:对于响应的内容 不要使用内置的文本 工具打开,这个工具有bug.看到的文本不全面
                //使用json格式打开即可看到里面所有的字符串

                string content = clientProxy.GetRequestTransfer(searchUrl, null);



                return(content);
            }
Пример #8
0
            public override string LoadUrlGetSearchApiContent(IFetchWebPageArgument queryParas)
            {
                string keyWord = queryParas.KeyWord;

                if (string.IsNullOrEmpty(keyWord))
                {
                    return(string.Empty);
                }



                //加载Cookie
                var ckVisitor = new LazyCookieVistor();
                var cks       = ckVisitor.LoadCookies(SuningSiteUrl);


                //优先使用格式化好的查询地址
                string searchUrl = "";

                if (null != queryParas.ResolvedUrl)
                {
                    searchUrl = queryParas.ResolvedUrl.Url;
                }
                else
                {
                    searchUrl = string.Format(templateOfSearchUrl, keyWord);
                }



                var client = SuningHttpClient;

                //设置Host
                client.Client.DefaultRequestHeaders.Host = new Uri(searchUrl).Host;

                //设置跳转头 Referrer
                string enCodeKeyword = HttpUtility.UrlEncode(keyWord);

                client.Client.DefaultRequestHeaders.Referrer = new Uri(string.Format(templateOfSearchUrl, keyWord));

                //如果是ajax 请求 那么附带这两个头信息
                if (searchUrl.Contains("searchProductList.do"))
                {
                    client.Client.DefaultRequestHeaders.Add("X-Requested-With", "XMLHttpRequest");
                    client.Client.DefaultRequestHeaders.Add("X-Tingyun-Id", "p35OnrDoP8k;r=57681924");
                }
                ////加载cookies
                ////获取当前站点的Cookie
                client.ChangeGlobleCookies(cks, SuningSiteUrl);

                // 4 发送请求
                var clientProxy = new HttpServerProxy()
                {
                    Client = client.Client, KeepAlive = true
                };

                //注意:对于响应的内容 不要使用内置的文本 工具打开,这个工具有bug.看到的文本不全面
                //使用json格式打开即可看到里面所有的字符串

                string content = clientProxy.GetRequestTransfer(searchUrl, null);



                return(content);
            }
 /// <summary>
 /// 使用指定的参数产生请求,返回请求的http响应内容
 /// 抽象方法
 /// </summary>
 /// <param name="queryParas"></param>
 /// <returns></returns>
 public abstract string LoadUrlGetSearchApiContent(IFetchWebPageArgument queryParas);
            public override string LoadUrlGetSearchApiContent(IFetchWebPageArgument queryParas)
            {
                VipSearchResultBag resultBag = new VipSearchResultBag();

                string keyWord = queryParas.KeyWord;

                if (string.IsNullOrEmpty(keyWord))
                {
                    return(string.Empty);
                }

                try
                {
                    //加载Cookie
                    var ckVisitor = new LazyCookieVistor();
                    var cks       = ckVisitor.LoadCookies(VipSiteUrl);



                    string searchUrl = string.Format(templateOfSearchUrl, keyWord);

                    var client = VipHttpClient;
                    client.Client.DefaultRequestHeaders.Referrer = new Uri(string.Format("https://m.vip.com/searchlist.html?q={0}&channel_id=", keyWord));
                    ////加载cookies
                    ////获取当前站点的Cookie
                    client.ChangeGlobleCookies(cks, VipSiteUrl);

                    // 4 发送请求
                    var clientProxy = new HttpServerProxy()
                    {
                        Client = client.Client, KeepAlive = true
                    };

                    //注意:对于响应的内容 不要使用内置的文本 工具打开,这个工具有bug.看到的文本不全面
                    //使用json格式打开即可看到里面所有的字符串
                    Task <HttpResponseMessage> brandTask;
                    Task <HttpResponseMessage> categoryTreeTask;
                    Task <HttpResponseMessage> searchListTask;
                    string para_brandJson        = "";
                    string para_categoryTreeJson = "";
                    string para_searchListJson   = "";
                    if (null != queryParas.ResolvedUrl && null != queryParas.ResolvedUrl.ParasPost)
                    {
                        para_brandJson        = queryParas.ResolvedUrl.ParasPost["para_brand"].ToString();
                        para_categoryTreeJson = queryParas.ResolvedUrl.ParasPost["para_categoryTree"].ToString();
                        para_searchListJson   = queryParas.ResolvedUrl.ParasPost["para_searchList"].ToString();
                    }
                    else
                    {
                        para_brandJson        = new VipSearchParaBrand(keyWord).ToJson();
                        para_categoryTreeJson = new VipSearchParaCategoryTree(keyWord).ToJson();

                        //插件不解析的话,那么使用最简单的基本关键词过滤分页,不进行复杂过滤,复杂过滤筛选应该在插件实现
                        var paraDetais = new VipSearchParaSearchList(keyWord);
                        //分页
                        paraDetais.paramsDetails.np = queryParas.PageIndex + 1;

                        para_searchListJson = paraDetais.ToJson();
                    }
                    if (queryParas.IsNeedResolveHeaderTags == true)
                    {
                        //1 查询品牌
                        var brandPara = new Dictionary <string, string>();
                        brandPara.Add("para_brand", para_brandJson);
                        brandTask = clientProxy.PostRequestTransferAsync(queryBrandUrl, PostDataContentType.Json, brandPara, null);
                        // 2 查询分类
                        var categoryTreePara = new Dictionary <string, string>();
                        categoryTreePara.Add("para_categoryTree", para_categoryTreeJson);
                        categoryTreeTask = clientProxy.PostRequestTransferAsync(queryCategoryUrl, PostDataContentType.Json, categoryTreePara, null);
                    }
                    else
                    {
                        brandTask        = Task.FromResult <HttpResponseMessage>(null);
                        categoryTreeTask = Task.FromResult <HttpResponseMessage>(null);
                    }

                    //3检索内容
                    var searchListPara = new Dictionary <string, string>();
                    searchListPara.Add("para_searchList", para_searchListJson);
                    searchListTask = clientProxy.PostRequestTransferAsync(templateOfSearchUrl, PostDataContentType.Json, searchListPara, null);

                    //等待任务完毕
                    Task.WaitAll(new Task[] { brandTask, categoryTreeTask, searchListTask });
                    if (brandTask.Result != null)
                    {
                        resultBag.BrandStoreList = brandTask.Result.Content.ReadAsStringAsync().Result;
                    }
                    if (categoryTreeTask.Result != null)
                    {
                        resultBag.CategoryTree = categoryTreeTask.Result.Content.ReadAsStringAsync().Result;
                    }
                    if (searchListTask.Result != null)
                    {
                        resultBag.SearchList = searchListTask.Result.Content.ReadAsStringAsync().Result;
                    }
                }
                catch (Exception ex)
                {
                    throw ex;
                }
                return(resultBag.ToJson());
            }
 /// <summary>
 /// not use
 /// </summary>
 /// <param name="queryParas"></param>
 /// <returns></returns>
 public override string LoadUrlGetSearchApiContent(IFetchWebPageArgument queryParas)
 {
     return(string.Empty);
 }
Пример #12
0
            public override string LoadUrlGetSearchApiContent(IFetchWebPageArgument queryParas)
            {
                string keyWord = queryParas.KeyWord;

                if (string.IsNullOrEmpty(keyWord))
                {
                    return(string.Empty);
                }
                //生成时间戳
                string timestamp = JavascriptContext.getUnixTimestamp();

                //加载Cookie
                var ckVisitor = new LazyCookieVistor();
                var cks       = ckVisitor.LoadCookies(eTaoSiteUrl);

                var _m_h5_tk_cookie = cks.FirstOrDefault(x => x.Name == "_m_h5_tk");

                if (null == _m_h5_tk_cookie)
                {
                    this.AutoRefeshCookie(this.RefreshCookieUrl);//从新刷新页面 获取 服务器颁发的私钥
                    cks             = ckVisitor.LoadCookies(eTaoSiteUrl);
                    _m_h5_tk_cookie = cks.FirstOrDefault(x => x.Name == "_m_h5_tk");
                }
                if (null == _m_h5_tk_cookie || string.IsNullOrEmpty(_m_h5_tk_cookie.Value))
                {
                    throw new Exception("加载授权私钥失败!无法获取对应的cookie:_m_h5_tk ");
                }
                string _m_h5_tk_valueString = _m_h5_tk_cookie.Value.Split('_')[0];

                string etao_appkey = "12574478";

                if (null == queryParas.OrderFiled)
                {
                    queryParas.OrderFiled = queryParas.GetCurrentPlatformSupportOrderFields().First();
                }
                int pageNum = queryParas.PageIndex * 40;

                string paras = string.Concat("{\"s\":", pageNum, ",\"n\":40,\"q\":\"", keyWord, "\",\"needEncode\":false,\"sort\":\"", queryParas.OrderFiled.FieldValue, "\",\"maxPrice\":10000000,\"minPrice\":0,\"serviceList\":\"\",\"navigator\":\"all\",\"urlType\":2}");

                //生成参数戳
                string sign = JavascriptContext.getEtaoJSSDKSign(_m_h5_tk_valueString, timestamp, etao_appkey, paras);

                string url = string.Format("https://apie.m.etao.com/h5/mtop.etao.fe.search/1.0/?type=jsonp&api=mtop.etao.fe.search&v=1.0&appKey=12574478&data={0}&t={1}&sign={2}&callback=jsonp28861232595120323", paras, timestamp, sign);


                string searchUrl = string.Format(templateOfSearchUrl, keyWord);
                var    client    = etaoHttpClient;

                ////加载cookies
                ////获取当前站点的Cookie
                client.ChangeGlobleCookies(cks, eTaoSiteUrl);
                //修改client 的refer 头
                client.Client.DefaultRequestHeaders.Referrer = new Uri(searchUrl);
                // 4 发送请求
                var clientProxy = new HttpServerProxy()
                {
                    Client = client.Client, KeepAlive = true
                };
                string content = clientProxy.GetRequestTransfer(url, null);

                return(content);
            }