private HMDInfo GetInfo(string url) { Func <string, string> replaceSpace = s => s.Trim().Replace(" ", ""); var hmdInfo = new HMDInfo(); var httpHelper = new HttpHelper { HttpEncoding = _httpEncoding }; var html = httpHelper.GetHtmlByGet(url); var htmlNode = HtmlAgilityPackHelper.GetDocumentNodeByHtml(html); hmdInfo.PubOrganName = replaceSpace(htmlNode.SelectSingleNode("//table[3]//tr[1]/td[2]").InnerText); hmdInfo.ProjectName = replaceSpace(htmlNode.SelectSingleNode("//table[3]//tr[2]/td[2]/nobr").InnerText); hmdInfo.NatrualName = replaceSpace(htmlNode.SelectSingleNode("//table[5]//tr[1]/td[2]").InnerText); hmdInfo.IdentityNumber = replaceSpace(htmlNode.SelectSingleNode("//table[5]//tr[1]/td[4]").InnerText); hmdInfo.OrganName = replaceSpace(htmlNode.SelectSingleNode("//table[5]//tr[2]/td[2]").InnerText); hmdInfo.OrganCode = replaceSpace(htmlNode.SelectSingleNode("//table[5]//tr[3]/td[2]").InnerText); hmdInfo.PubTime = replaceSpace(htmlNode.SelectSingleNode("//table[5]//tr[4]/td[2]").InnerText); hmdInfo.PubDeadline = replaceSpace(htmlNode.SelectSingleNode("//table[5]//tr[4]/td[4]").InnerText); hmdInfo.PunishmentNumber = replaceSpace(htmlNode.SelectSingleNode("//table[5]//tr[5]/td[2]").InnerText); hmdInfo.PunishmentTime = replaceSpace(htmlNode.SelectSingleNode("//table[5]//tr[5]/td[4]").InnerText); hmdInfo.PunishmentFact = replaceSpace(htmlNode.SelectSingleNode("//table[5]//tr[6]/td[2]").InnerText); hmdInfo.PunishmentBasis = replaceSpace(htmlNode.SelectSingleNode("//table[5]//tr[7]/td[2]").InnerText); hmdInfo.PunishmentResult = replaceSpace(htmlNode.SelectSingleNode("//table[5]//tr[8]/td[2]").InnerText); return(hmdInfo); }
public void Run() { var firstUrl = "http://www.zjcredit.gov.cn/hmd/hmd.do"; var httpHelper = new HttpHelper(); var html = httpHelper.GetHtmlByGet(firstUrl); //得到网页编码 _httpEncoding = httpHelper.HttpEncoding = HttpHelper.GetHtmlEncoding(html); html = httpHelper.GetHtmlByGet(firstUrl); var url = $"http://www.zjcredit.gov.cn/hmd/{Regex.Match(Regex.Match(Regex.Match(html, "initData.*?]").Value, "\".*?\"").Value, @"(?<=\$)[^\$]*(?="")").Value}"; html = httpHelper.GetHtmlByGet(url); var htmlNode = HtmlAgilityPackHelper.GetDocumentNodeByHtml(html); var htmlNodeCollection = htmlNode.SelectNodes("//table[2]//a"); foreach (var node in htmlNodeCollection) { _urlQueue.Enqueue($"http://www.zjcredit.gov.cn{node.Attributes["href"].Value}"); } while (_urlQueue.Count != 0) { var threadTotalNum = _urlQueue.Count > 5?5:_urlQueue.Count; var taskArray = new Task[threadTotalNum]; for (var i = 0; i < threadTotalNum; i++) { //判断队列是否已经取完 若取完则退出循环 if (_urlQueue.Count == 0) { break; } url = _urlQueue.Dequeue(); taskArray[i] = new Task(GetInfoInsertDb, url); taskArray[i].Start(); } //等待这几个线程结束 for (var j = 0; j < threadTotalNum; j++) { taskArray[j].Wait(); } } }