Пример #1
0
        public void Crawler()
        {
            var listSubjectGrade = GetSubjectGrade();
            var listArea         = GetAreas();

            while (ExistCrawlerJC())
            {
                var listJiaocai = GetRandom10CrawlerJC();

                foreach (var jc in listJiaocai)
                {
                    var grades = listSubjectGrade.Where(t => t.SubjectId == jc.SubjectId);
                    Parallel.ForEach(grades, (grade) =>
                    {
                        Parallel.ForEach(listArea, (area) =>
                        {
                            if (ExistGrabPageSource(area.AreaId, jc.JiaoCaiDetailId, grade.GradeId))
                            {
                                return;
                            }
                            var url =
                                $"http://zujuan.xkw.com/{jc.Prefix}/zj{jc.JiaoCaiDetailId}/a{area.AreaId}g{grade.GradeId}/";


                            Console.WriteLine(url);
                            var html = HttpWebResponseProxyMayi.ExecuteCreateGetHttpResponseProxy(url, 3000, null);



                            var totalCount = doc.GetElementById("questioncount").Text().NullToInt();

                            var pageCount = totalCount / 10 + 1;

                            AddGrabPageSource(area.AreaId, jc.JiaoCaiDetailId, grade.GradeId, totalCount);
                            //for (int i = 1; i <= pageCount; i++)
                            //{
                            //    AddGrabPageList(area.AreaId, jc.JiaoCaiDetailId, grade.GradeId, totalCount, i);
                            //}
                        });
                    });


                    UpdateJiaocaiDetailStatus(jc.JiaoCaiDetailId);
                }
            }
        }
Пример #2
0
        public static JArray CrawlerSingleQuestion(string questionId)
        {
            try
            {
#if DEBUG
                var a =
                    HttpWebResponseProxyMayi.ExecuteCreateGetHttpResponseProxy(
                        $"https://www.zujuan.com/question/detail-{questionId}.shtml", 6000, null);
                //var a = HttpClientHolder.Proxy_GetRequest2();
#else
                var a = HttpClientHolder.Proxy_GetRequestAbyyun($"https://www.zujuan.com/question/detail-{questionId}.shtml");
#endif

                //
                if (a.IndexOf("试题已经被删除") >= 0)
                {
                    Console.WriteLine($"{questionId} has delete!!!");
                    using (var db = new CrawlerEntities())
                    {
                        var id     = questionId.NullToInt();
                        var entity = db.Question.FirstOrDefault(t => t.Question_Id == id);
                        if (entity != null)
                        {
                            entity.IsGrabAns      = true;
                            entity.IsRemoteDelete = true;
                            db.SaveChanges();
                        }
                    }
                    return(null);
                }
                if (a.IndexOf("限制访问试题") >= 0)
                {
                    using (var db = new CrawlerEntities())
                    {
                        var id     = questionId.NullToInt();
                        var entity = db.Question.FirstOrDefault(t => t.Question_Id == id);
                        if (entity != null)
                        {
                            entity.IsGrabAns = false;
                            db.SaveChanges();
                        }
                    }
                    return(null);
                }


                Console.WriteLine($"start crawler https://www.zujuan.com/question/detail-{questionId}.shtml");
                //例如我想提取记录中的NAME值
                string value = GetValue(a, "var MockDataTestPaper =", "OT2.renderQList").TrimEnd(new char[] { ';' });
                value = value.Trim().TrimEnd(new char[] { ';' }).Trim();

                JArray jObject = JArray.Parse(value);

                if (!string.IsNullOrWhiteSpace(jObject[0]["questions"][0]["list"].NullToString()))
                {
                    if (string.IsNullOrWhiteSpace(jObject[0]["questions"][0]["list"][0]["answer"].NullToString()))
                    {
                        Console.WriteLine("明细题答案抓取错误");
                        Debug.WriteLine("明细题答案抓取错误");
                        throw new Exception("明细题答案抓取错误");
                    }
                }
                //; UpdateProxGrabyime(proxy.Id);
                Console.WriteLine($"aleady get {questionId} return value");
                //更新代理时间

                return(JArray.Parse(value));
            }
            catch (IOException io)
            {
                using (var db = new CrawlerEntities())
                {
                    var id     = questionId.NullToInt();
                    var entity = db.Question.FirstOrDefault(t => t.Question_Id == id);
                    if (entity != null)
                    {
                        entity.IsRemoteDelete = false;
                        entity.IsGrabAns      = false;
                        db.SaveChanges();
                    }
                }
            }
            catch (JsonReaderException je)
            {
                using (var db = new CrawlerEntities())
                {
                    var id     = questionId.NullToInt();
                    var entity = db.Question.FirstOrDefault(t => t.Question_Id == id);
                    if (entity != null)
                    {
                        entity.IsRemoteDelete = false;
                        entity.IsGrabAns      = false;
                        db.SaveChanges();
                    }
                }
            }
            catch (WebException we)
            {
                using (var db = new CrawlerEntities())
                {
                    var id     = questionId.NullToInt();
                    var entity = db.Question.FirstOrDefault(t => t.Question_Id == id);
                    if (entity != null)
                    {
                        entity.IsRemoteDelete = false;
                        entity.IsGrabAns      = false;
                        db.SaveChanges();
                    }
                }
            }
            catch (Exception e)
            {
                using (var db = new CrawlerEntities())
                {
                    var id     = questionId.NullToInt();
                    var entity = db.Question.FirstOrDefault(t => t.Question_Id == id);
                    if (entity != null)
                    {
                        entity.IsRemoteDelete = false;
                        entity.IsGrabAns      = false;
                        db.SaveChanges();
                    }
                }
            }
            //UpdateQuestionGrabStatus(questionId.NullToInt());
            return(null);
        }