public void Crawler() { var listSubjectGrade = GetSubjectGrade(); var listArea = GetAreas(); while (ExistCrawlerJC()) { var listJiaocai = GetRandom10CrawlerJC(); foreach (var jc in listJiaocai) { var grades = listSubjectGrade.Where(t => t.SubjectId == jc.SubjectId); Parallel.ForEach(grades, (grade) => { Parallel.ForEach(listArea, (area) => { if (ExistGrabPageSource(area.AreaId, jc.JiaoCaiDetailId, grade.GradeId)) { return; } var url = $"http://zujuan.xkw.com/{jc.Prefix}/zj{jc.JiaoCaiDetailId}/a{area.AreaId}g{grade.GradeId}/"; Console.WriteLine(url); var html = HttpWebResponseProxyMayi.ExecuteCreateGetHttpResponseProxy(url, 3000, null); var totalCount = doc.GetElementById("questioncount").Text().NullToInt(); var pageCount = totalCount / 10 + 1; AddGrabPageSource(area.AreaId, jc.JiaoCaiDetailId, grade.GradeId, totalCount); //for (int i = 1; i <= pageCount; i++) //{ // AddGrabPageList(area.AreaId, jc.JiaoCaiDetailId, grade.GradeId, totalCount, i); //} }); }); UpdateJiaocaiDetailStatus(jc.JiaoCaiDetailId); } } }
public static JArray CrawlerSingleQuestion(string questionId) { try { #if DEBUG var a = HttpWebResponseProxyMayi.ExecuteCreateGetHttpResponseProxy( $"https://www.zujuan.com/question/detail-{questionId}.shtml", 6000, null); //var a = HttpClientHolder.Proxy_GetRequest2(); #else var a = HttpClientHolder.Proxy_GetRequestAbyyun($"https://www.zujuan.com/question/detail-{questionId}.shtml"); #endif // if (a.IndexOf("试题已经被删除") >= 0) { Console.WriteLine($"{questionId} has delete!!!"); using (var db = new CrawlerEntities()) { var id = questionId.NullToInt(); var entity = db.Question.FirstOrDefault(t => t.Question_Id == id); if (entity != null) { entity.IsGrabAns = true; entity.IsRemoteDelete = true; db.SaveChanges(); } } return(null); } if (a.IndexOf("限制访问试题") >= 0) { using (var db = new CrawlerEntities()) { var id = questionId.NullToInt(); var entity = db.Question.FirstOrDefault(t => t.Question_Id == id); if (entity != null) { entity.IsGrabAns = false; db.SaveChanges(); } } return(null); } Console.WriteLine($"start crawler https://www.zujuan.com/question/detail-{questionId}.shtml"); //例如我想提取记录中的NAME值 string value = GetValue(a, "var MockDataTestPaper =", "OT2.renderQList").TrimEnd(new char[] { ';' }); value = value.Trim().TrimEnd(new char[] { ';' }).Trim(); JArray jObject = JArray.Parse(value); if (!string.IsNullOrWhiteSpace(jObject[0]["questions"][0]["list"].NullToString())) { if (string.IsNullOrWhiteSpace(jObject[0]["questions"][0]["list"][0]["answer"].NullToString())) { Console.WriteLine("明细题答案抓取错误"); Debug.WriteLine("明细题答案抓取错误"); throw new Exception("明细题答案抓取错误"); } } //; UpdateProxGrabyime(proxy.Id); Console.WriteLine($"aleady get {questionId} return value"); //更新代理时间 return(JArray.Parse(value)); } catch (IOException io) { using (var db = new CrawlerEntities()) { var id = questionId.NullToInt(); var entity = db.Question.FirstOrDefault(t => t.Question_Id == id); if (entity != null) { entity.IsRemoteDelete = false; entity.IsGrabAns = false; db.SaveChanges(); } } } catch (JsonReaderException je) { using (var db = new CrawlerEntities()) { var id = questionId.NullToInt(); var entity = db.Question.FirstOrDefault(t => t.Question_Id == id); if (entity != null) { entity.IsRemoteDelete = false; entity.IsGrabAns = false; db.SaveChanges(); } } } catch (WebException we) { using (var db = new CrawlerEntities()) { var id = questionId.NullToInt(); var entity = db.Question.FirstOrDefault(t => t.Question_Id == id); if (entity != null) { entity.IsRemoteDelete = false; entity.IsGrabAns = false; db.SaveChanges(); } } } catch (Exception e) { using (var db = new CrawlerEntities()) { var id = questionId.NullToInt(); var entity = db.Question.FirstOrDefault(t => t.Question_Id == id); if (entity != null) { entity.IsRemoteDelete = false; entity.IsGrabAns = false; db.SaveChanges(); } } } //UpdateQuestionGrabStatus(questionId.NullToInt()); return(null); }