protected override Task <DataFlowResult> Parse(DataFlowContext context) { var selectable = context.Selectable; // 解析数据 var author = selectable.XPath("//span[@class='p-name vcard-fullname d-block overflow-hidden']") .GetValue(); var name = selectable.XPath("//span[@class='p-nickname vcard-username d-block']") .GetValue(); context.AddData("author", author); context.AddData("username", name); // 添加目标链接 var urls = selectable.Links().Regex("(https://github\\.com/[\\w\\-]+/[\\w\\-]+)").GetValues(); foreach (var url in urls) { context.AddExtraRequests(CreateFromRequest(context.Response.Request, url)); } // 如果解析为空,跳过后续步骤(存储 etc) if (string.IsNullOrWhiteSpace(name)) { context.ClearData(); return(Task.FromResult(DataFlowResult.Terminated)); } return(Task.FromResult(DataFlowResult.Success)); }
protected override Task <DataFlowResult> Parse(DataFlowContext context) { ISelectable selectable = context.Selectable; string next = selectable.XPath("//div[@class='ew-page']/a[last()]").GetValue().TrimEnd(">".ToCharArray()); // 解析数据 List <string> data = selectable.XPath("//li[@class='item']").GetValues(); if (null != data && data.Count > 0) { List <VideoInfo> videos = new List <VideoInfo>(); Selectable st1 = null; List <Request> reqs = new List <Request>(); foreach (string item in data) { st1 = new Selectable(item); //临时视频地址 string url = st1.XPath("//a/@href").GetValue(); VideoInfo video = new VideoInfo { Name = st1.XPath("//span[@class='s1']").GetValue(), Cover = st1.XPath("//img/@src").GetValue(), Year = st1.XPath("//span[@class='hint']").GetValue(), Description = st1.XPath("//p[@class='star']").GetValue(), IsPay = string.IsNullOrWhiteSpace(st1.XPath("//span[@class='pay']").GetValue()) || !st1.XPath("//span[@class='pay']").GetValue().Contains("付费") ? false : true, Type = 1, ParentUrl = url }; videos.Add(video); if (!string.IsNullOrWhiteSpace(video.ParentUrl)) { string tm = new JsHttpHelper().GetPageContent(video.ParentUrl); if (!string.IsNullOrWhiteSpace(tm)) { Selectable stt = new Selectable(tm); var urls = stt.XPath("//div[@class='top-list-zd g-clear']//a[@data-daochu]").GetValues(ValueOption.OuterHtml); foreach (var i in urls) { stt = new Selectable(i); string u = stt.XPath("//a/@href").GetValue(); if (!string.IsNullOrWhiteSpace(u)) { string n = stt.XPath("//a").GetValue(); video.Details.Add(new VideoDetail { PlayUrl = u, Number = "1", IsPay = video.IsPay, PlayName = n }); } } stt = null; } Request req = CreateFromRequest(context.Response.Request, url); req.DownloaderType = DownloaderType.WebDriver; reqs.Add(req); } //videos.AsParallel().ForAll(m => //{ // if (!string.IsNullOrWhiteSpace(m.ParentUrl)) // { // string tm = new JsHttpHelper().GetPageContent(m.ParentUrl); // if (!string.IsNullOrWhiteSpace(tm)) // { // Selectable stt = new Selectable(tm); // var urls = stt.XPath("//div[@class='top-list-zd g-clear']//a['@data-daochu']").GetValues(ValueOption.OuterHtml); // foreach (var i in urls) // { // stt = new Selectable(i); // string u = stt.XPath("//a/@href").GetValue(); // if (!string.IsNullOrWhiteSpace(u)) // { // string n = stt.XPath("//a").GetValue(); // video.Details.Add(new VideoDetail { PlayUrl = u, Number = "1", IsPay = video.IsPay, PlayName = n }); // } // } // stt = null; // } // //reqs.Add(CreateFromRequest(context.Response.Request, url)); // } //}); } st1 = null; context.AddData("v", videos); //if (reqs.Count > 0) //{ // context.AddExtraRequests(reqs.ToArray()); //} } //如果解析为空,跳过后续步骤(存储 etc) if (data == null || data.Count == 0) { context.ClearData(); return(Task.FromResult(DataFlowResult.Terminated)); } if (next != "下一页") { FollowRequestQuerier = null; } return(Task.FromResult(DataFlowResult.Success)); }