public void GetCountyArea(string url, string sheng, string shi) { var html = DownloadPage(url); HtmlDocument htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html); //////html/body/table[2]/tbody/tr[1]/td/table/tbody/tr[2]/td/table/tbody/tr/td/table/tbody/tr[2] HtmlNodeCollection countytr = htmlDocument.DocumentNode.SelectNodes("//html/body/table[2]/tbody/tr[1]/td/table/tbody/tr[2]/td/table/tbody/tr/td/table/tr[@class='countytr']"); if (countytr == null) { HtmlNodeCollection towntr = htmlDocument.DocumentNode.SelectNodes("//html/body/table[2]/tbody/tr[1]/td/table/tbody/tr[2]/td/table/tbody/tr/td/table/tr[@class='towntr']"); if (towntr != null) { GetTownArea(url, sheng, "", ""); } HtmlNodeCollection villagetr = htmlDocument.DocumentNode.SelectNodes("//html/body/table[2]/tbody/tr[1]/td/table/tbody/tr[2]/td/table/tbody/tr/td/table/tr[@class='villagetr']"); if (villagetr != null) { GetVillageArea(url, sheng, "", "", ""); } return; } var list = new List <TongJiJuArea>(); if (countytr != null && countytr.Count > 0) { foreach (HtmlNode item in countytr) { try { var nodes = item.SelectNodes(".//a"); if (nodes == null) { nodes = item.SelectNodes(".//td"); } var codeStr = nodes[0].InnerText.Trim(); var nameStr = nodes[1].InnerText.Trim(); var hrefStr = UrlBefore + codeStr.Substring(0, 2) + "/" + codeStr.Substring(2, 2) + "/" + codeStr.Substring(0, 6) + ".html"; var area = new TongJiJuArea(); area.AreaCode = codeStr; area.AreaName = nameStr; area.Sheng = sheng; area.Shi = shi; InsertDB(area); //Console.WriteLine(sheng + shi + nameStr); GetTownArea(hrefStr, sheng, shi, nameStr); } catch (Exception ex) { throw new Exception(item.InnerHtml); } } } }
public void GetVillageArea(string url, string sheng, string shi, string xian, string xiang) { var html = DownloadPage(url); HtmlDocument htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html); ///html/body/table[2]/tbody/tr[1]/td/table/tbody/tr[2]/td/table/tbody/tr/td/table/tbody/tr[2] HtmlNodeCollection villagetr = htmlDocument.DocumentNode.SelectNodes("//html/body/table[2]/tbody/tr[1]/td/table/tbody/tr[2]/td/table/tbody/tr/td/table/tr[@class='villagetr']"); if (villagetr == null) { return; } var list = new List <TongJiJuArea>(); if (villagetr != null && villagetr.Count > 0) { foreach (HtmlNode item in villagetr) { try { var tds = item.SelectNodes("td"); var codeStr = tds[0].InnerText.Trim(); var typeStr = tds[1].InnerText.Trim(); var nameStr = tds[2].InnerText.Trim(); var area = new TongJiJuArea(); area.AreaCode = codeStr; area.TypeCode = typeStr; area.AreaName = nameStr; area.Sheng = sheng; area.Shi = shi; area.Xian = xian; area.Xiang = xiang; area.Chun = nameStr; //Console.WriteLine(sheng + shi + xian + xiang + nameStr); list.Add(area); } catch (Exception ex) { throw new Exception(item.InnerHtml); } } InsertDB(list.ToArray()); } }
public override void Begin() { //var url = "https://gjdyzjb.cn/srfs/w/cinemaChainBoxOfficeInquiry/s?page=" + 0 + "&size=" + pageSize + "&s_fromDay=" + jssDate + "&s_toDay=" + jseDate + "&s_everyDay=true&s_sort=cinema_code&sort=cinema_code,asc"; var url = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2017/index.html"; var html = DownloadPage(url); HtmlDocument htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html);///html/body/table[2]/tbody/tr[1]/td/table/tbody/tr[2]/td/table/tbody/tr/td/table/tbody/tr[4]/td[1]/a HtmlNodeCollection collection = htmlDocument.DocumentNode.SelectNodes("//html/body/table[2]/tbody/tr[1]/td/table/tbody/tr[2]/td/table/tbody/tr/td/table/tr/td/a"); foreach (HtmlNode item in collection) { var name = item.InnerText.Trim(); Console.WriteLine($"{name}"); var area = new TongJiJuArea(); area.AreaName = name; InsertDB(area); var href = UrlBefore + item.GetAttributeValue("href", "").Trim(); GetCityArea(href, name); } Console.WriteLine("完成!输入任意键结束!"); }