示例#1
0
        public static (List <Test> tests, List <TestErrorJob> errorJobs) Take(string url, ClassifyIdEnum classifyId)
        {
            var chromeDriver = GetChromeDriver(url, true);
            var webElements  = chromeDriver.FindElementsByClassName("post-style-card");

            var dateTimeNow = DateTime.Now;
            var tests       = new List <Test>();
            var errorJobs   = new List <TestErrorJob>();

            foreach (var item in webElements)
            {
                var test = new Test();
                try
                {
                    var detailUrl = item.FindElement(By.XPath("./a")).GetAttribute("href");

                    test.Title = item.FindElement(By.ClassName("post-title"))?.Text;
                    //test.SmallImage = item.FindElement(By.XPath("./a")).GetAttribute("style").Split('"')[1];
                    test.LabelName      = item.FindElement(By.XPath("./div[1]/div[2]/span[1]/a")).Text;
                    test.Time           = item.FindElement(By.ClassName("post-time")).Text;
                    test.LikeCount      = int.Parse(item.FindElement(By.XPath("./div[2]/ul/li[4]/span")).Text);
                    test.ViewCount      = int.Parse(item.FindElement(By.XPath("./div[2]/ul/li[2]")).Text.Trim());
                    test.CreateTime     = dateTimeNow;
                    test.CreateUnixTime = DateTimeHelper.GetUnixTimestamp(dateTimeNow);
                    test.OtherId        = long.Parse(detailUrl.Split('/').Last());
                    test.SourceId       = (int)SourceIdEnum.心评测;
                    test.SourceName     = SourceIdEnum.心评测.ToString();
                    test.TypeId         = (int)classifyId;
                    test.TypeName       = classifyId.ToString();
                    test.UnixTime       = DateTimeHelper.GetUnixTimestamp(DateTime.Parse(test.Time));

                    //获取详细信息
                    chromeDriver.ExecuteScript("window.open()");
                    chromeDriver.SwitchTo().Window(chromeDriver.WindowHandles[chromeDriver.WindowHandles.Count - 1]);
                    chromeDriver.Navigate().GoToUrl(detailUrl);

                    test.BigImage   = chromeDriver.FindElementByXPath("//*[@class='article-img']/img").GetAttribute("src");
                    test.SmallImage = test.BigImage;
                    test.Content    = chromeDriver.FindElementByXPath("//*[@class='article-body']/p[1]").Text;
                    test.TestCount  = test.ViewCount / 4;

                    #region 获取测试标签

                    var testLabels        = new List <TestLabel>();
                    var testLabelElements = chromeDriver.FindElementsByXPath("/html/body/main/div/div[1]/div/article/div[3]/div/a");
                    foreach (var labelItem in testLabelElements)
                    {
                        testLabels.Add(new TestLabel()
                        {
                            Label = labelItem.Text
                        });
                    }

                    test.TestLabels = testLabels;

                    #endregion 获取测试标签

                    #region 测试题目和选项及答案

                    var testTitles = new List <TestTitle>();

                    var titleUrl = chromeDriver.FindElementByXPath("/html/body/main/div/div[1]/div/article/div[4]/p[2]/a").GetAttribute("href");
                    chromeDriver.Url = titleUrl;
                    var testTitleElements = chromeDriver.FindElementsById("question_box");
                    foreach (var titleItem in testTitleElements)
                    {
                        var testOptionElemens = chromeDriver.FindElementsByClassName("icheckbox_div");
                        var testOptions       = new List <TestOption>();
                        var testAnswers       = new List <TestAnswer>();

                        var nextOptionIndex = 0;
                        for (int optionIndex = 0; optionIndex < testOptionElemens.Count; optionIndex++)
                        {
                            //在新标签页打开题目页面,防止刷新页面后元素失效
                            chromeDriver.ExecuteScript("window.open()");
                            chromeDriver.SwitchTo().Window(chromeDriver.WindowHandles[chromeDriver.WindowHandles.Count - 1]);
                            chromeDriver.Navigate().GoToUrl(titleUrl);

                            var newTestOptionElemens = chromeDriver.FindElementsByClassName("icheckbox_div");
                            testOptions.Add(new TestOption()
                            {
                                FlagId = nextOptionIndex,
                                Option = newTestOptionElemens[optionIndex].FindElement(By.XPath("./label")).Text
                            });

                            newTestOptionElemens[optionIndex].FindElement(By.XPath("./span/a")).Click();
                            chromeDriver.FindElement(By.Id("next_button")).Click();

                            var answer = string.Empty;
                            try
                            {
                                answer = chromeDriver.FindElementByXPath("//*[@id='end_desc']/p/span").Text;
                            }
                            catch (NoSuchElementException ex)
                            {
                                answer = chromeDriver.FindElementByXPath("//*[@id='end_desc']/p").Text;
                            }

                            testAnswers.Add(new TestAnswer
                            {
                                FlagId = nextOptionIndex,
                                Answer = answer
                            });

                            chromeDriver.ExecuteScript("window.close()");
                            chromeDriver.SwitchTo().Window(chromeDriver.WindowHandles[chromeDriver.WindowHandles.Count - 1]);
                            nextOptionIndex++;
                        }

                        testTitles.Add(new TestTitle()
                        {
                            Title       = titleItem.FindElement(By.XPath("./div[2]/div[1]")).Text,
                            TestOptions = testOptions,
                            TestAnswers = testAnswers
                        });
                    }

                    test.TestTitles = testTitles;

                    #endregion 测试题目和选项及答案

                    tests.Add(test);

                    chromeDriver.Close();
                    chromeDriver.SwitchTo().Window(chromeDriver.WindowHandles[0]);
                }
                catch (Exception ex)
                {
                    errorJobs.Add(new TestErrorJob()
                    {
                        Url = url,
                        ExceptionMessage   = ex.Message,
                        ExceptionStackInfo = ex.StackTrace,
                        CreateTime         = dateTimeNow,
                        CreateUnixTime     = DateTimeHelper.GetUnixTimestamp(dateTimeNow),
                        OtherId            = test?.OtherId ?? 0,
                        SourceId           = (int)SourceIdEnum.心评测,
                        SourceName         = SourceIdEnum.心评测.ToString(),
                        TypeId             = (int)classifyId,
                        TypeName           = classifyId.ToString()
                    });
                    continue;
                }
            }

            chromeDriver.Quit();
            return(tests, errorJobs);
        }
        private async Task <string> PullData(int pageStartIndex, int pageEndIndex, string dataUrl, ClassifyIdEnum classifyId)
        {
            var successCount = 0;
            var errorCount   = 0;

            for (int pageIndex = pageStartIndex; pageIndex <= pageEndIndex; pageIndex++)
            {
                var(tests, errorJobs) = XinCePingSpider.Take($"{dataUrl}{pageIndex}", classifyId);
                if (errorJobs != null && errorJobs.Any())
                {
                    errorCount += await _testErrorJobRepository.Add(errorJobs);
                }

                foreach (var item in tests)
                {
                    var isExist = await _testRepository.QueryAsQueryable(a => a.OtherId == item.OtherId && a.SourceId == (int)SourceIdEnum.心评测).AnyAsync();

                    if (isExist)
                    {
                        continue;
                    }

                    if (!CheckTest(item))
                    {
                        continue;
                    }

                    var testId = _testRepository.Add(item).Result;
                    SetTestId(item, testId);
                    var insertCount = await _testLabelRepository.Add(item.TestLabels);

                    foreach (var title in item.TestTitles)
                    {
                        var testTitleId = await _testTitleRepository.Add(title);

                        SetTestTitleId(title, testTitleId);

                        foreach (var option in title.TestOptions)
                        {
                            var optionId = await _testOptionRepository.Add(option);

                            var answer = title.TestAnswers.Where(a => a.FlagId == option.FlagId).FirstOrDefault();
                            SetTestOptionId(answer, optionId);
                            await _testAnswerRepository.Add(answer);
                        }
                    }
                    successCount += 1;
                }
            }

            return($"本次爬取页数为:{pageEndIndex - pageStartIndex} 成功采集数据:{successCount}条 异常采集:{errorCount}条");
        }