Пример #1
0
        public void StartCrawler()
        {
            var list = GetHugeList();

            Parallel.ForEach(list, new ParallelOptions()
            {
                MaxDegreeOfParallelism = 2
            }, (item) =>
            {
                var listJcDetail = GetJiaocaiDetails(item.JiaocaiId);
                foreach (var jcdetail in listJcDetail)
                {
                    var url  = $"http://zujuan.xkw.com/{item.Prefix}/zj{jcdetail.JiaoCaiDetailId}/a{item.AreaId}/";
                    var html = HttpWebResponseUtility.ExecuteCreateGetHttpResponse(url, 50000, null);

                    var doc        = NSoupClient.Parse(html);
                    var totalCount = doc.GetElementById("questioncount").Text().NullToInt();
                    AddJiaocaiDetaiSource(item.AreaId, item.JiaocaiId, jcdetail.JiaoCaiDetailId, totalCount, url);
                }
            });
        }
Пример #2
0
        public void Recrusion(int jiaocaiId, int id)
        {
            var url    = $"http://zujuan.xkw.com/Web/Handler1.ashx?action=categorytreewithchild&parentid={id}&iszsd=0";
            var result = HttpWebResponseUtility.ExecuteCreateGetHttpResponse(url, 10000, null);

            if (string.IsNullOrEmpty(result))
            {
                return;
            }
            var doc      = NSoupClient.Parse(result);
            var elements = doc.Select("body>ul>li");

            foreach (var element in elements)
            {
                var currentId   = element.Attr("id").NullToInt();
                var currentText = element.Select(">a").Text;
                //add
                AddJiaocaiDetail(jiaocaiId, id, currentId, currentText);


                Recrusion(jiaocaiId, currentId);
            }
        }