public override IList<ExamItem> Process(string strResponse, int moduleId) { var result = new List<ExamItem>(); if (string.IsNullOrEmpty(strResponse)) { return result; } var document = new JumonyParser().Parse(strResponse); //所有题目 var dataTable = document.Descendants("body>div>table").ElementAt(1); var AllTrs = dataTable.Elements("tr"); var htmlExamItems = AllTrs.Where(x => x.InnerHtml().Contains("【")); foreach (var item in htmlExamItems) { var model = BuildEntity(moduleId, item); if (model == null) { continue; } result.Add(model); } if (htmlExamItems.Count() > result.Count) { string msg = string.Format("Html:[{0}]个,解析:[{1}]个。", result.Count, htmlExamItems.Count()); WriteLog(strResponse, msg); } return result; }
public override List<SubjectModule> Process(string strResponse, int moduleId) { var list = new List<SubjectModule>(); var document = new JumonyParser().Parse(strResponse); var trs = document.Descendants("tr[onmouseout]"); foreach (IHtmlElement tr in trs) { string title = tr.FindFirst("td").InnerText(); string href = tr.FindLast("td a").Attribute("href").Value(); list.Add(new SubjectModule() { Id = GetId(moduleId, title), Handler = "SweetFly.Job.Handler.OldHandler,SweetFly.Job", HtmlDataSource = new HtmlDataSource() { Encoding = "GB2312", Uri = @"http://learning.cmr.com.cn/subject/stupage/" + href } }); } Console.WriteLine("{0} - {1}", trs.Count(), list.Count); return list; }
public override IList<ExamItem> Process(string strResponse, int moduleId) { var result = new List<ExamItem>(); if (string.IsNullOrEmpty(strResponse)) { return result; } var document = new JumonyParser().Parse(strResponse); //所有题目 var htmlExamItems = document.Descendants(@"div.st"); foreach (var item in htmlExamItems) { var model = BuildEntity(moduleId, item); if (model == null) { continue; } result.Add(model); } if (htmlExamItems.Count() > result.Count) { string msg = string.Format("Html:[{0}]个,解析:[{1}]个。", result.Count, htmlExamItems.Count()); WriteLog(strResponse, msg); } return result; }
static void Main( string[] args ) { var document = new JumonyParser().LoadDocument( "http://www.sina.com.cn/", Encoding.GetEncoding( "GB2312" ) ); Stopwatch watch = new Stopwatch(); watch.Restart(); for ( int i = 0; i < 200; i++ ) { var elements = document.Descendants().ToArray(); document.Descendants().FilterBy( "body p a" ).FirstOrDefault(); document.Descendants().FilterBy( "p > a" ).FirstOrDefault(); document.Descendants().FilterBy( "p[class] a" ).FirstOrDefault(); document.Descendants().FilterBy( "p a[href]" ).FirstOrDefault(); document.Descendants().FilterBy( "p + a" ).FirstOrDefault(); document.Descendants().FilterBy( "div a" ).FirstOrDefault(); document.Descendants().FilterBy( "p div a" ).FirstOrDefault(); document.Descendants().FilterBy( "a img[src]" ).FirstOrDefault(); document.Descendants().FilterBy( "div img" ).FirstOrDefault(); document.Descendants().FilterBy( "body img[src]" ).FirstOrDefault(); } watch.Stop(); Console.WriteLine( watch.Elapsed ); watch.Restart(); for ( int i = 0; i < 200; i++ ) { var elements = document.Descendants().ToArray(); document.Descendants().ToArray().FilterBy( "body p a" ).FirstOrDefault(); document.Descendants().ToArray().FilterBy( "p > a" ).FirstOrDefault(); document.Descendants().ToArray().FilterBy( "p[class] a" ).FirstOrDefault(); document.Descendants().ToArray().FilterBy( "p a[href]" ).FirstOrDefault(); document.Descendants().ToArray().FilterBy( "p + a" ).FirstOrDefault(); document.Descendants().ToArray().FilterBy( "div a" ).FirstOrDefault(); document.Descendants().ToArray().FilterBy( "p div a" ).FirstOrDefault(); document.Descendants().ToArray().FilterBy( "a img[src]" ).FirstOrDefault(); document.Descendants().ToArray().FilterBy( "div img" ).FirstOrDefault(); document.Descendants().ToArray().FilterBy( "body img[src]" ).FirstOrDefault(); } watch.Stop(); Console.WriteLine( watch.Elapsed ); watch.Restart(); for ( int i = 0; i < 200; i++ ) { var elements = document.Descendants().ToArray(); document.Find( "body p a" ).FirstOrDefault(); document.Find( "p > a" ).FirstOrDefault(); document.Find( "p[class] a" ).FirstOrDefault(); document.Find( "p a[href]" ).FirstOrDefault(); document.Find( "p + a" ).FirstOrDefault(); document.Find( "div a" ).FirstOrDefault(); document.Find( "p div a" ).FirstOrDefault(); document.Find( "a img[src]" ).FirstOrDefault(); document.Find( "div img" ).FirstOrDefault(); document.Find( "body img[src]" ).FirstOrDefault(); } watch.Stop(); Console.WriteLine( watch.Elapsed ); Console.ReadKey(); }