public void InitCategory()
        {
            var listBook = DataService.GetBooklist();

            foreach (var book in listBook)
            {
                var url = string.Format("https://www.zujuan.com/question?bookversion={0}&chid={1}&xd={2}",
                                        book.BookVersionId, book.Child, book.Degree);

                var html = new HttpUnitHelper().GetRealHtmlTrice(url);
                var doc  = NSoupClient.Parse(html);

                //获取当前dgree下的科目下的教材版本
                var categoryDoc = doc.Select("div.search-type div.con-items")[1].GetElementsByTag("a");
                //
                var total = doc.Select("div.total b")[0].Text().NullToInt();
                foreach (var element in categoryDoc)
                {
                    var elementId = element.Attr("data-bcaid");
                    var name      = element.Text();

                    DataService.AddCategory(book.Id, elementId.NullToInt(), name, total);
                }
            }
            //https://www.zujuan.com/question?chid=2&xd=1
        }
        public void InitBook()
        {
            Parallel.ForEach(typeof(ChildEnum).GetEnumSource(), (child) =>
            {
                Parallel.ForEach(typeof(DegreeEnum).GetEnumSource(), (degree) =>
                {
                });
            });
            foreach (var child in typeof(ChildEnum).GetEnumSource())
            {
                foreach (var degree in typeof(DegreeEnum).GetEnumSource())
                {
                    var childId  = child.Item1.NullToInt();
                    var degreeId = degree.Item1.NullToInt();
                    var url      = string.Format("https://www.zujuan.com/question?chid={0}&xd={1}", child.Item1, degree.Item1);

                    var html = new HttpUnitHelper().GetRealHtmlTrice(url);

                    var doc = NSoupClient.Parse(html);

                    //获取当前dgree下的科目下的教材版本
                    var bookTypeDoc = doc.Select("div.search-type div.con-items")[0].GetElementsByTag("a");

                    foreach (var element in bookTypeDoc)
                    {
                        var elementId = element.Attr("data-bcaid");
                        var name      = element.Text();

                        DataService.AddBook(childId, degreeId, name, elementId.NullToInt());
                    }
                }
            }
        }
Exemple #3
0
        public JObject ParseAnswer(string questionId)
        {
            // questionId = "8630746";
            JObject answer = new JObject();
            string  url    = string.Format("https://www.zujuan.com/question/detail-{0}.shtml", questionId);
            var     html   = new HttpUnitHelper().GetRealHtmlOnce(url);

            if (
                NSoupClient.Parse(html).GetElementById("J_QuestionList").GetElementsByClass("exam-con")[0]
                .GetElementsByClass("exam-qlist").Count > 0)
            {
                var qlist = NSoupClient.Parse(html).GetElementById("J_QuestionList").GetElementsByClass("exam-con")[0].GetElementsByClass("exam-qlist")[0];

                var exam_cons = qlist.GetElementsByClass("exam-con");

                var exam_cons_answers = qlist.Select("div.analyticbox.replace_anawer");

                JArray answer_list = new JArray();
                for (int i = 0; i < exam_cons.Count; i++)
                {
                    JObject result = new JObject();
                    result["question"] = exam_cons[i].GetElementsByClass("analyticbox")[0].ProcessHtmlImageElement().Html();
                    result["answer"]   = exam_cons_answers[i].ProcessHtmlImageElement().Html();
                    answer_list.Add(result);
                }
                answer["answer_list"] = answer_list;
            }


            var analyticbox_brick = NSoupClient.Parse(html).GetElementById("J_QuestionList").GetElementsByClass("analyticbox-brick")[0];


            answer["kaodian"] = analyticbox_brick.Children[0].ProcessHtmlImageElement().Html();// ProcessHtmlImageElement(qlist.GetElementsByClass("analyticbox")[0].GetElementsByTag("div")[0]).Html();
            answer["jiexi"]   = analyticbox_brick.Children[1].ProcessHtmlImageElement().Html();

            return(answer);
        }
        /// <summary>
        /// 整站采集,如果整体数据过一遍,需要重新生成这些category下每页的URL、
        /// </summary>
        public void InitCatePageUrl()
        {
            var listCategory = DataService.GetCategorylist();

            var hasCateID = DataService.GetCrawleredCatelist();

            //https://www.zujuan.com/question?categories=47854&bookversion=47832&nianji=47854&chid=3&xd=1
            foreach (var category in listCategory)

            //Parallel.ForEach(listCategory, new ParallelOptions() { MaxDegreeOfParallelism = 10 }, (category) =>
            {
                if (hasCateID.Any(t => t == category.CategoryId))
                {
                    continue;
                }
                Stopwatch sw = new Stopwatch();
                sw.Start();

                //通过页面采集方式,放弃
                //var url = $"https://www.zujuan.com/question?categories={category.CategoryId}&bookversion={category.BookVersionId}&nianji={category.CategoryId}&chid={category.Child}&xd={category.Degree}";
                //var html = new HttpUnitHelperNosingleton().GetRealHtmlTrice(url);
                //var doc = NSoupClient.Parse(html);
                //var total = doc.Select("div.total b")[0].Text().NullToInt();

                //DataService.UpdateCategoryCount(category.Id, total);

                //var pageNum = total / 10 + 1;
                //for (int i = 1; i <= pageNum; i++)
                //{
                //    var grabUrl = "";
                //    if (i > 1)
                //    {
                //        grabUrl =
                //            $"https://www.zujuan.com/question/index?categories={category.CategoryId}&bookversion={category.BookVersionId}&nianji=7741&chid={category.Child}&xd={category.Degree}&page={i}&per-page=10";
                //    }
                //    else
                //    {
                //        grabUrl = url;
                //    }

                //    DataService.AddCateUrl(grabUrl, category.CategoryId, i);
                //}
                var api =
                    $"https://www.zujuan.com/question/list?categories={category.CategoryId}&sortField=time&page=1&_=1540841532659";

                var url           = $"https://www.zujuan.com/question?categories={category.CategoryId}&bookversion={category.BookVersionId}&nianji={category.CategoryId}&chid={category.Child}&xd={category.Degree}";
                var html          = new HttpUnitHelper().GetRealHtmlOnceNotWaitJs(url);
                var questionCount = NSoupClient.Parse(html).GetElementById("J_QuestionList").GetElementsByTag("li").Count;

                long start    = 1000000000000;
                long end      = 1540849999999;
                long randomId = 1540840000000 + new Random().Next(0000000, 9999999);
                //todo 题型筛选

                var cookies      = new HttpUnitHelper().webClient.GetCookies(new URL("https://www.zujuan.com/"));
                var headerCookie = string.Empty;
                foreach (var cookie in cookies)
                {
                    headerCookie += $"{cookie.Name}={cookie.Value};";
                }

                var json = HttpClientHolder.Execute(api, headerCookie);

                var total      = JObject.Parse(json);
                var totalCount = total["total"].NullToInt();
                if (totalCount <= 0)
                {
                    continue;
                }
                var pageNum = totalCount / 10 + 1;
                Parallel.For(1, pageNum, (i) =>
                {
                    var currentApi =
                        $"https://www.zujuan.com/question/list?categories={category.CategoryId}&sortField=time&page={i}&_={randomId}";
                    var currentJson = HttpClientHolder.Execute(currentApi, headerCookie);
                    var grabUrl     = "";
                    if (i > 1)
                    {
                        grabUrl =
                            $"https://www.zujuan.com/question/index?categories={category.CategoryId}&bookversion={category.BookVersionId}&nianji=7741&chid={category.Child}&xd={category.Degree}&page={i}&per-page=10";
                    }
                    else
                    {
                        grabUrl = url;
                    }
                    Action actoin = () =>
                    {
                        DataService.AddCateUrl(grabUrl, category.CategoryId, i, currentApi, currentJson);
                    };
                    actoin.BeginInvoke(null, null);
                });

                sw.Stop();

                Debug.WriteLine("cost" + sw.ElapsedMilliseconds);
            }
            //);
        }
        public JObject ParseAnswer(string questionId)
        {
            // questionId = "8630746";
            JObject answer       = new JObject();
            string  url          = string.Format("https://www.zujuan.com/question/detail-{0}.shtml", questionId);
            var     cookies      = new HttpUnitHelper().webClient.GetCookies(new URL("https://www.zujuan.com/"));
            var     headerCookie = string.Empty;

            foreach (var cookie in cookies)
            {
                headerCookie += $"{cookie.Name}={cookie.Value};";
            }


            var html = new HttpUnitHelper().GetRealHtmlOnce(url, wait: 3000);
            var doc  = NSoupClient.Parse(html);

            //var loginStatus = doc.Select("div.analyticbox-tips").Count > 0 ? (doc.Select("div.analyticbox-tips")[0].Html().IndexOf("未登录") >= 0 ? false : true) : false;
            //if (!loginStatus)
            //{
            //    throw new Exception("login error");
            //}            var doc = NSoupClient.Parse(html);
            answer["question_title"] = doc.GetElementById("J_QuestionList").GetElementsByClass("exam-con")[0]
                                       .GetElementsByClass("exam-q")[0].ProcessHtmlImageElement().Html();

            if (
                NSoupClient.Parse(html).GetElementById("J_QuestionList").GetElementsByClass("exam-con")[0]
                .GetElementsByClass("exam-qlist").Count > 0)
            {
                var qlist =
                    NSoupClient.Parse(html).GetElementById("J_QuestionList").GetElementsByClass("exam-con")[0]
                    .GetElementsByClass("exam-qlist")[0];

                var exam_cons = qlist.GetElementsByClass("exam-con");

                var exam_cons_answers = qlist.Select("div.analyticbox.replace_anawer");

                JArray answer_list = new JArray();
                for (int i = 0; i < exam_cons.Count; i++)
                {
                    JObject result = new JObject();
                    result["question"] =
                        exam_cons[i].GetElementsByClass("analyticbox")[0].ProcessHtmlImageElement().Html();
                    result["answer"] = exam_cons_answers[i].ProcessHtmlImageElement().Html();
                    answer_list.Add(result);
                }
                answer["answer_list"] = answer_list;

                var analyticbox_brick =
                    NSoupClient.Parse(html).GetElementById("J_QuestionList").GetElementsByClass("analyticbox-brick")[0];


                answer["kaodian"] = analyticbox_brick.Children[0].ProcessHtmlImageElement().Html();
                // ProcessHtmlImageElement(qlist.GetElementsByClass("analyticbox")[0].GetElementsByTag("div")[0]).Html();
                answer["jiexi"] = analyticbox_brick.Children[1].ProcessHtmlImageElement().Html();
            }
            else
            {
                var analyticbox_brick =
                    NSoupClient.Parse(html).GetElementById("J_QuestionList").GetElementsByClass("analyticbox-brick")[0];


                answer["kaodian"] = analyticbox_brick.Children[0].ProcessHtmlImageElement().Html();
                // ProcessHtmlImageElement(qlist.GetElementsByClass("analyticbox")[0].GetElementsByTag("div")[0]).Html();
                answer["daan"]  = analyticbox_brick.Children[1].ProcessHtmlImageElement().Html();
                answer["jiexi"] = analyticbox_brick.Children[2].ProcessHtmlImageElement().Html();
            }


            return(answer);
        }