コード例 #1
0
        private void ParseCompanyName(HttpContentModel response)
        {
            var doc   = response.response.GetDocument();
            var lists = doc.QuerySelectorAll("div.zx-list-wrap div.zx-list-item");

            foreach (var item in lists)
            {
                Console.WriteLine(item.QuerySelector("h3 a").TextContent);
            }
            var meta = response.meta;

            if (lists.Length == 0)
            {
                meta["empty"] = (int)meta["empty"] + 1;
            }
            else
            {
                meta["empty"] = 0;
            }

            if ((int)meta["empty"] > this.threshold)
            {
                return;
            }
            meta["num"] = (int)meta["num"] + 1;
            string road = meta["road"].ToString();
            string name = road + meta["num"].ToString() + "号";
            string url  = $"https://xin.baidu.com/s?q={System.Web.HttpUtility.UrlEncode(name)}&t=0";

            this.http.Get(url, this.ParseCompanyName, meta);
        }
コード例 #2
0
ファイル: LvdunSpider.cs プロジェクト: Lenshang/MyCrawler
        public void ParseListpage(HttpContentModel response)
        {
            var r = response.response.GetHtml();
            //Console.WriteLine(DateTime.Now.ToString()+"成功访问了百度");
            var document = parser.ParseDocument(r);

            foreach (var item in document.QuerySelectorAll("div.tableWrap tr td:nth-child(1) a"))
            {
                string cName = item.TextContent;
                string url   = "https://sp0.baidu.com/8aQDcjqpAAV3otqbppnN2DJv/api.php";
                Dictionary <string, string> _params = new Dictionary <string, string>();
                _params.Add("resource_id", "6899");
                _params.Add("query", "失信被执行人名单");
                _params.Add("cardNum", "");
                _params.Add("iname", cName);
                _params.Add("areaName", "");
                _params.Add("ie", "utf-8");
                _params.Add("oe", "utf-8");
                _params.Add("format", "json");
                _params.Add("t", DateHelper.GetTimestamp(DateTime.Now));
                _params.Add("cb", "jQuery110208514957267839198_1561021902645");
                _params.Add("_", DateHelper.GetTimestamp(DateTime.Now));
                url = url + "?" + UrlHelper.ConvertUrlParams(_params);
                var meta = new MetaModel();
                meta.Add("companyName", cName);
                this.http.Get(url, ParseCompanyPage, meta);
            }
        }
コード例 #3
0
ファイル: IosStoreSpider.cs プロジェクト: Lenshang/MyCrawler
        private void ParseAppDetail(HttpContentModel response)
        {
            var meta = response.meta as MetaModel;

            Console.WriteLine(meta["appName"]);
            IosAppItem appItem = new IosAppItem();

            appItem.AppName = meta["appName"].ToString();
            appItem.AppUrl  = response.request.Url.ToString();
            string html  = response.response.GetHtml();
            string json1 = RegexHelper.RegexOne(html, "class=\"ember-view\" type=\"application/ld\\+json\"\\>([\\s\\S]*?)\\<\\/script\\>");
            string json2 = RegexHelper.RegexOne(html, "script type=\"fastboot/shoebox\" id=\"shoebox-ember-data-store\"\\>([\\s\\S]*?)\\<\\/script\\>");
            var    jObj1 = JsonConvert.DeserializeObject <JObject>(json1);
            var    jObj2 = JsonConvert.DeserializeObject <JObject>(json2);

            appItem.Description      = jObj1["description"].ToString();
            appItem.AppType          = jObj1["applicationCategory"].ToString();
            appItem.FirstReleaseDate = jObj1["datePublished"].ToString();
            appItem.Producer         = jObj1["author"]?["name"]?.ToString();
            appItem.Star             = jObj1["aggregateRating"]?["ratingValue"]?.ToString();
            appItem.FeedbackRate     = appItem.Star;
            appItem.FeedbackCount    = jObj1["aggregateRating"]?["reviewCount"]?.ToString();
            appItem.CommentCount     = appItem.FeedbackCount;
            appItem.Price            = jObj1["offers"]?["price"]?.ToString();
            appItem.Size             = jObj2["data"]?["attributes"]?["size"]?.ToString();
            appItem.ReleaseDate      = jObj2["data"]?["attributes"]?["versionHistory"]?[0]?["releaseDate"]?.ToString();
            appItem.VersionNumber    = jObj2["data"]?["attributes"]?["versionHistory"]?[0]?["versionString"]?.ToString();
            appItem.ImgUrl           = jObj1["image"]?.ToString();
            //TODO 写入数据库
        }
コード例 #4
0
        private void ParseRoadIndex(HttpContentModel response)
        {
            var letters = new string[] { "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z" };

            foreach (var letter in letters)
            {
                string url = $"http://sh.city8.com/road/{letter}/";
                this.http.Get(url, ParseRoadName);
            }
        }
コード例 #5
0
ファイル: LvdunSpider.cs プロジェクト: Lenshang/MyCrawler
        public void ParseCompanyPage(HttpContentModel response)
        {
            Console.WriteLine((response.meta)["companyName"]);
            var     r       = response.response.GetHtml();
            string  content = r;
            string  jsonStr = content.Replace("/**/jQuery110208514957267839198_1561021902645(", "").Replace(");", "");
            JObject jsonObj = JsonConvert.DeserializeObject <JObject>(jsonStr);

            foreach (var item in jsonObj?["data"]?[0]?["result"])
            {
                //自己解析
            }
        }
コード例 #6
0
ファイル: IosStoreSpider.cs プロジェクト: Lenshang/MyCrawler
        private void ParseAppList(HttpContentModel response)
        {
            var document = response.response.GetDocument();

            foreach (var item in document.QuerySelectorAll("#selectedcontent div a"))
            {
                var appName = item.TextContent;
                var appUrl  = item.GetAttribute("href");
                var meta    = response.meta.Copy();
                meta["appName"] = appName;
                this.http.Get(appUrl, ParseAppDetail, meta);
            }
        }
コード例 #7
0
        private void ParseRoadName(HttpContentModel response)
        {
            var doc   = response.response.GetDocument();
            var lists = doc.QuerySelectorAll("div.road_sahngjia.road_zm_list a");

            foreach (var item in lists)
            {
                string    road = item.TextContent.Trim();
                string    name = road + "1号";
                string    url  = $"https://xin.baidu.com/s?q={System.Web.HttpUtility.UrlEncode(name)}&t=0";
                MetaModel meta = new MetaModel();
                meta.Add("num", 1);
                meta.Add("empty", 0);
                meta.Add("road", road);
                this.http.Get(url, this.ParseCompanyName, meta);
            }
        }
コード例 #8
0
 /// <summary>
 /// 使用RequestEntity进行请求
 /// </summary>
 /// <param name="client"></param>
 /// <returns></returns>
 public override async Task <ResponseEntity> CreateHttpRequestSend(HttpContentModel model)
 {
     return(await SendAsync(model.request));
 }
コード例 #9
0
ファイル: BaseHttpClient.cs プロジェクト: Lenshang/MyCrawler
 /// <summary>
 /// 根据HttpHCModel 响应请求
 /// </summary>
 /// <param name="HttpContentModel"></param>
 /// <returns></returns>
 public abstract Task <ResponseEntity> CreateHttpRequestSend(HttpContentModel model);