/// <summary> /// 爬取省份 /// </summary> /// <returns></returns> private async Task CrawlingProvinces() { var url = BaseUrl + "index.html"; var html = await GetResponse(url); var doc = new HtmlDocument(); doc.LoadHtml(html); var nodeList = doc.DocumentNode.SelectNodes("//tr[@class='provincetr']//a"); foreach (var node in nodeList) { var href = node.Attributes["href"].Value; var code = href.Split('.')[0]; var model = new AreaCrawlingModel { Code = CompleteCode(code), Name = node.InnerText, FullName = node.InnerText }; SetPinyin(model); await CrawlingCoord(model); CrawlingCities(model, href, code); _list.Add(model); } }
/// <summary> /// 爬取坐标 /// </summary> /// <param name="entity"></param> /// <returns></returns> private async Task CrawlingCoord(AreaCrawlingModel entity) { try { var url = "https://restapi.amap.com/v3/place/text?key=8325164e247e15eea68b59e89200988b&keywords=" + entity.Name; var json = await _httpClient.GetStringAsync(url); if (json.NotNull()) { var model = JsonConvert.DeserializeAnonymousType(json, new { pois = new[] { new { location = "" } } }); if (model.pois.Any()) { var location = model.pois.First().location; if (location.NotNull()) { var arr = location.Split(','); entity.Longitude = arr[0]; entity.Latitude = arr[1]; } } } } catch { Thread.Sleep(1000); await CrawlingCoord(entity); } }
/// <summary> /// 爬取镇 /// </summary> /// <param name="parent"></param> /// <param name="url"></param> /// <param name="provinceCode"></param> /// <returns></returns> private void CrawlingTown(AreaCrawlingModel parent, string url, string provinceCode) { try { var html = GetResponse(BaseUrl + provinceCode + "/" + url).Result; if (html.NotNull()) { var doc = new HtmlDocument(); doc.LoadHtml(html); var nodeList = doc.DocumentNode.SelectNodes("//tr[@class='towntr']"); foreach (var node in nodeList) { var codeNode = node.SelectSingleNode("td[1]/a"); var nameNode = node.SelectSingleNode("td[2]/a"); var model = new AreaCrawlingModel { Code = codeNode.InnerText, Name = nameNode.InnerText, FullName = parent.FullName + nameNode.InnerText }; SetPinyin(model); CrawlingCoord(model).ConfigureAwait(false); parent.Children.Add(model); _logger.LogDebug(model.FullName); } } } catch (Exception ex) { _logger.LogError($"爬取{parent.FullName}城镇失败"); _logger.LogDebug(ex.Message); parent.Children = new List <AreaCrawlingModel>(); Thread.Sleep(2000); CrawlingTown(parent, url, provinceCode); } }
/// <summary> /// 设置拼音 /// </summary> /// <param name="entity"></param> private void SetPinyin(AreaCrawlingModel entity) { entity.Pinyin = NPinyin.Pinyin.GetPinyin(entity.Name); entity.Jianpin = NPinyin.Pinyin.GetInitials(entity.Name); }
/// <summary> /// 爬取区县 /// </summary> /// <param name="parent"></param> /// <param name="url"></param> /// <param name="provinceCode"></param> /// <returns></returns> private void CrawlingCounty(AreaCrawlingModel parent, string url, string provinceCode) { try { var isTown = false; var html = GetResponse(BaseUrl + url).Result; if (html.NotNull()) { var doc = new HtmlDocument(); doc.LoadHtml(html); var nodeList = doc.DocumentNode.SelectNodes("//tr[@class='countytr']"); if (nodeList == null) { nodeList = doc.DocumentNode.SelectNodes("//tr[@class='towntr']"); isTown = true; } if (nodeList == null) { _logger.LogDebug("没有数据"); return; } foreach (var node in nodeList) { var codeNode = node.SelectSingleNode("td[1]/a"); var nameNode = node.SelectSingleNode("td[2]/a"); if (codeNode == null) { codeNode = node.SelectSingleNode("td[1]"); nameNode = node.SelectSingleNode("td[2]"); } if (codeNode == null || nameNode == null || nameNode.InnerText == "市辖区") { continue; } var model = new AreaCrawlingModel { Code = codeNode.InnerText, Name = nameNode.InnerText, FullName = parent.FullName + nameNode.InnerText }; SetPinyin(model); CrawlingCoord(model).ConfigureAwait(false); if (!isTown) { var hrefAttribute = codeNode.Attributes["href"]; if (hrefAttribute != null) { CrawlingTown(model, hrefAttribute.Value, provinceCode); } } parent.Children.Add(model); } } } catch (Exception ex) { Thread.Sleep(5000); parent.Children = new List <AreaCrawlingModel>(); _logger.LogError($"爬取{parent.Name}下的区县失败"); _logger.LogError(ex.Message); CrawlingCounty(parent, url, provinceCode); } }