public override IList <ExamItem> Process(string strResponse, int moduleId) { var result = new List <ExamItem>(); if (string.IsNullOrEmpty(strResponse)) { return(result); } var document = new JumonyParser().Parse(strResponse); //所有题目 var htmlExamItems = document.Descendants(@"div.st"); foreach (var item in htmlExamItems) { var model = BuildEntity(moduleId, item); if (model == null) { continue; } result.Add(model); } if (htmlExamItems.Count() > result.Count) { string msg = string.Format("Html:[{0}]个,解析:[{1}]个。", result.Count, htmlExamItems.Count()); WriteLog(strResponse, msg); } return(result); }
public override List <SubjectModule> Process(string strResponse, int moduleId) { var list = new List <SubjectModule>(); var document = new JumonyParser().Parse(strResponse); var trs = document.Descendants("tbody>tr"); foreach (IHtmlElement tr in trs) { string title = tr.FindFirst("td").InnerText(); string href = tr.FindLast("td a").Attribute("href").Value(); list.Add(new SubjectModule() { Id = GetId(moduleId, title), Handler = "SweetFly.Job.Handler.NormalHandler,SweetFly.Job", HtmlDataSource = new HtmlDataSource() { Encoding = "GB2312", Uri = @"http://learning.cmr.com.cn/student/acourse/HomeworkCenter/" + href } }); } Console.WriteLine("{0} - {1}", trs.Count(), list.Count); return(list); }
public override IList <ExamItem> Process(string strResponse, int moduleId) { var result = new List <ExamItem>(); if (string.IsNullOrEmpty(strResponse)) { return(result); } var document = new JumonyParser().Parse(strResponse); //所有题目 var dataTable = document.Descendants("body>div>table").ElementAt(1); var AllTrs = dataTable.Elements("tr"); var htmlExamItems = AllTrs.Where(x => x.InnerHtml().Contains("【")); foreach (var item in htmlExamItems) { var model = BuildEntity(moduleId, item); if (model == null) { continue; } result.Add(model); } if (htmlExamItems.Count() > result.Count) { string msg = string.Format("Html:[{0}]个,解析:[{1}]个。", result.Count, htmlExamItems.Count()); WriteLog(strResponse, msg); } return(result); }
static void Main(string[] args) { var document = new JumonyParser().LoadDocument("http://www.sina.com.cn/", Encoding.GetEncoding("GB2312")); Stopwatch watch = new Stopwatch(); watch.Restart(); for (int i = 0; i < 200; i++) { var elements = document.Descendants().ToArray(); document.Descendants().FilterBy("body p a").FirstOrDefault(); document.Descendants().FilterBy("p > a").FirstOrDefault(); document.Descendants().FilterBy("p[class] a").FirstOrDefault(); document.Descendants().FilterBy("p a[href]").FirstOrDefault(); document.Descendants().FilterBy("p + a").FirstOrDefault(); document.Descendants().FilterBy("div a").FirstOrDefault(); document.Descendants().FilterBy("p div a").FirstOrDefault(); document.Descendants().FilterBy("a img[src]").FirstOrDefault(); document.Descendants().FilterBy("div img").FirstOrDefault(); document.Descendants().FilterBy("body img[src]").FirstOrDefault(); } watch.Stop(); Console.WriteLine(watch.Elapsed); watch.Restart(); for (int i = 0; i < 200; i++) { var elements = document.Descendants().ToArray(); document.Descendants().ToArray().FilterBy("body p a").FirstOrDefault(); document.Descendants().ToArray().FilterBy("p > a").FirstOrDefault(); document.Descendants().ToArray().FilterBy("p[class] a").FirstOrDefault(); document.Descendants().ToArray().FilterBy("p a[href]").FirstOrDefault(); document.Descendants().ToArray().FilterBy("p + a").FirstOrDefault(); document.Descendants().ToArray().FilterBy("div a").FirstOrDefault(); document.Descendants().ToArray().FilterBy("p div a").FirstOrDefault(); document.Descendants().ToArray().FilterBy("a img[src]").FirstOrDefault(); document.Descendants().ToArray().FilterBy("div img").FirstOrDefault(); document.Descendants().ToArray().FilterBy("body img[src]").FirstOrDefault(); } watch.Stop(); Console.WriteLine(watch.Elapsed); watch.Restart(); for (int i = 0; i < 200; i++) { var elements = document.Descendants().ToArray(); document.Find("body p a").FirstOrDefault(); document.Find("p > a").FirstOrDefault(); document.Find("p[class] a").FirstOrDefault(); document.Find("p a[href]").FirstOrDefault(); document.Find("p + a").FirstOrDefault(); document.Find("div a").FirstOrDefault(); document.Find("p div a").FirstOrDefault(); document.Find("a img[src]").FirstOrDefault(); document.Find("div img").FirstOrDefault(); document.Find("body img[src]").FirstOrDefault(); } watch.Stop(); Console.WriteLine(watch.Elapsed); Console.ReadKey(); }