Пример #1
0
        private HMDInfo GetInfo(string url)
        {
            Func <string, string> replaceSpace = s => s.Trim().Replace("&nbsp;", "");

            var hmdInfo    = new HMDInfo();
            var httpHelper = new HttpHelper {
                HttpEncoding = _httpEncoding
            };
            var html     = httpHelper.GetHtmlByGet(url);
            var htmlNode = HtmlAgilityPackHelper.GetDocumentNodeByHtml(html);

            hmdInfo.PubOrganName     = replaceSpace(htmlNode.SelectSingleNode("//table[3]//tr[1]/td[2]").InnerText);
            hmdInfo.ProjectName      = replaceSpace(htmlNode.SelectSingleNode("//table[3]//tr[2]/td[2]/nobr").InnerText);
            hmdInfo.NatrualName      = replaceSpace(htmlNode.SelectSingleNode("//table[5]//tr[1]/td[2]").InnerText);
            hmdInfo.IdentityNumber   = replaceSpace(htmlNode.SelectSingleNode("//table[5]//tr[1]/td[4]").InnerText);
            hmdInfo.OrganName        = replaceSpace(htmlNode.SelectSingleNode("//table[5]//tr[2]/td[2]").InnerText);
            hmdInfo.OrganCode        = replaceSpace(htmlNode.SelectSingleNode("//table[5]//tr[3]/td[2]").InnerText);
            hmdInfo.PubTime          = replaceSpace(htmlNode.SelectSingleNode("//table[5]//tr[4]/td[2]").InnerText);
            hmdInfo.PubDeadline      = replaceSpace(htmlNode.SelectSingleNode("//table[5]//tr[4]/td[4]").InnerText);
            hmdInfo.PunishmentNumber = replaceSpace(htmlNode.SelectSingleNode("//table[5]//tr[5]/td[2]").InnerText);
            hmdInfo.PunishmentTime   = replaceSpace(htmlNode.SelectSingleNode("//table[5]//tr[5]/td[4]").InnerText);
            hmdInfo.PunishmentFact   = replaceSpace(htmlNode.SelectSingleNode("//table[5]//tr[6]/td[2]").InnerText);
            hmdInfo.PunishmentBasis  = replaceSpace(htmlNode.SelectSingleNode("//table[5]//tr[7]/td[2]").InnerText);
            hmdInfo.PunishmentResult = replaceSpace(htmlNode.SelectSingleNode("//table[5]//tr[8]/td[2]").InnerText);
            return(hmdInfo);
        }
Пример #2
0
        public void Run()
        {
            var firstUrl   = "http://www.zjcredit.gov.cn/hmd/hmd.do";
            var httpHelper = new HttpHelper();
            var html       = httpHelper.GetHtmlByGet(firstUrl);

            //得到网页编码
            _httpEncoding = httpHelper.HttpEncoding = HttpHelper.GetHtmlEncoding(html);
            html          = httpHelper.GetHtmlByGet(firstUrl);
            var url = $"http://www.zjcredit.gov.cn/hmd/{Regex.Match(Regex.Match(Regex.Match(html, "initData.*?]").Value, "\".*?\"").Value, @"(?<=\$)[^\$]*(?="")").Value}";

            html = httpHelper.GetHtmlByGet(url);

            var htmlNode = HtmlAgilityPackHelper.GetDocumentNodeByHtml(html);

            var htmlNodeCollection = htmlNode.SelectNodes("//table[2]//a");

            foreach (var node in htmlNodeCollection)
            {
                _urlQueue.Enqueue($"http://www.zjcredit.gov.cn{node.Attributes["href"].Value}");
            }


            while (_urlQueue.Count != 0)
            {
                var threadTotalNum = _urlQueue.Count > 5?5:_urlQueue.Count;
                var taskArray      = new Task[threadTotalNum];

                for (var i = 0; i < threadTotalNum; i++)
                {
                    //判断队列是否已经取完 若取完则退出循环
                    if (_urlQueue.Count == 0)
                    {
                        break;
                    }
                    url          = _urlQueue.Dequeue();
                    taskArray[i] = new Task(GetInfoInsertDb, url);
                    taskArray[i].Start();
                }

                //等待这几个线程结束
                for (var j = 0; j < threadTotalNum; j++)
                {
                    taskArray[j].Wait();
                }
            }
        }