Example #1
0
        /// <summary>
        /// 爬取省份
        /// </summary>
        /// <returns></returns>
        private async Task CrawlingProvinces()
        {
            var url  = BaseUrl + "index.html";
            var html = await GetResponse(url);

            var doc = new HtmlDocument();

            doc.LoadHtml(html);
            var nodeList = doc.DocumentNode.SelectNodes("//tr[@class='provincetr']//a");

            foreach (var node in nodeList)
            {
                var href  = node.Attributes["href"].Value;
                var code  = href.Split('.')[0];
                var model = new AreaCrawlingModel
                {
                    Code     = CompleteCode(code),
                    Name     = node.InnerText,
                    FullName = node.InnerText
                };

                SetPinyin(model);

                await CrawlingCoord(model);

                CrawlingCities(model, href, code);

                _list.Add(model);
            }
        }
Example #2
0
        /// <summary>
        /// 爬取坐标
        /// </summary>
        /// <param name="entity"></param>
        /// <returns></returns>
        private async Task CrawlingCoord(AreaCrawlingModel entity)
        {
            try
            {
                var url = "https://restapi.amap.com/v3/place/text?key=8325164e247e15eea68b59e89200988b&keywords=" +
                          entity.Name;
                var json = await _httpClient.GetStringAsync(url);

                if (json.NotNull())
                {
                    var model = JsonConvert.DeserializeAnonymousType(json, new { pois = new[] { new { location = "" } } });
                    if (model.pois.Any())
                    {
                        var location = model.pois.First().location;
                        if (location.NotNull())
                        {
                            var arr = location.Split(',');
                            entity.Longitude = arr[0];
                            entity.Latitude  = arr[1];
                        }
                    }
                }
            }
            catch
            {
                Thread.Sleep(1000);
                await CrawlingCoord(entity);
            }
        }
Example #3
0
        /// <summary>
        /// 爬取镇
        /// </summary>
        /// <param name="parent"></param>
        /// <param name="url"></param>
        /// <param name="provinceCode"></param>
        /// <returns></returns>
        private void CrawlingTown(AreaCrawlingModel parent, string url, string provinceCode)
        {
            try
            {
                var html = GetResponse(BaseUrl + provinceCode + "/" + url).Result;
                if (html.NotNull())
                {
                    var doc = new HtmlDocument();
                    doc.LoadHtml(html);
                    var nodeList = doc.DocumentNode.SelectNodes("//tr[@class='towntr']");
                    foreach (var node in nodeList)
                    {
                        var codeNode = node.SelectSingleNode("td[1]/a");
                        var nameNode = node.SelectSingleNode("td[2]/a");

                        var model = new AreaCrawlingModel
                        {
                            Code     = codeNode.InnerText,
                            Name     = nameNode.InnerText,
                            FullName = parent.FullName + nameNode.InnerText
                        };

                        SetPinyin(model);

                        CrawlingCoord(model).ConfigureAwait(false);

                        parent.Children.Add(model);
                        _logger.LogDebug(model.FullName);
                    }
                }
            }
            catch (Exception ex)
            {
                _logger.LogError($"爬取{parent.FullName}城镇失败");
                _logger.LogDebug(ex.Message);
                parent.Children = new List <AreaCrawlingModel>();

                Thread.Sleep(2000);
                CrawlingTown(parent, url, provinceCode);
            }
        }
Example #4
0
 /// <summary>
 /// 设置拼音
 /// </summary>
 /// <param name="entity"></param>
 private void SetPinyin(AreaCrawlingModel entity)
 {
     entity.Pinyin  = NPinyin.Pinyin.GetPinyin(entity.Name);
     entity.Jianpin = NPinyin.Pinyin.GetInitials(entity.Name);
 }
Example #5
0
        /// <summary>
        /// 爬取区县
        /// </summary>
        /// <param name="parent"></param>
        /// <param name="url"></param>
        /// <param name="provinceCode"></param>
        /// <returns></returns>
        private void CrawlingCounty(AreaCrawlingModel parent, string url, string provinceCode)
        {
            try
            {
                var isTown = false;
                var html   = GetResponse(BaseUrl + url).Result;
                if (html.NotNull())
                {
                    var doc = new HtmlDocument();
                    doc.LoadHtml(html);
                    var nodeList = doc.DocumentNode.SelectNodes("//tr[@class='countytr']");
                    if (nodeList == null)
                    {
                        nodeList = doc.DocumentNode.SelectNodes("//tr[@class='towntr']");
                        isTown   = true;
                    }

                    if (nodeList == null)
                    {
                        _logger.LogDebug("没有数据");
                        return;
                    }

                    foreach (var node in nodeList)
                    {
                        var codeNode = node.SelectSingleNode("td[1]/a");
                        var nameNode = node.SelectSingleNode("td[2]/a");
                        if (codeNode == null)
                        {
                            codeNode = node.SelectSingleNode("td[1]");
                            nameNode = node.SelectSingleNode("td[2]");
                        }

                        if (codeNode == null || nameNode == null || nameNode.InnerText == "市辖区")
                        {
                            continue;
                        }

                        var model = new AreaCrawlingModel
                        {
                            Code     = codeNode.InnerText,
                            Name     = nameNode.InnerText,
                            FullName = parent.FullName + nameNode.InnerText
                        };

                        SetPinyin(model);
                        CrawlingCoord(model).ConfigureAwait(false);

                        if (!isTown)
                        {
                            var hrefAttribute = codeNode.Attributes["href"];
                            if (hrefAttribute != null)
                            {
                                CrawlingTown(model, hrefAttribute.Value, provinceCode);
                            }
                        }

                        parent.Children.Add(model);
                    }
                }
            }
            catch (Exception ex)
            {
                Thread.Sleep(5000);
                parent.Children = new List <AreaCrawlingModel>();
                _logger.LogError($"爬取{parent.Name}下的区县失败");
                _logger.LogError(ex.Message);
                CrawlingCounty(parent, url, provinceCode);
            }
        }