/// <summary> /// 在 CSS 中,选择器是一种模式,用于选择需要添加样式的元素。 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void btn_Query_Click(object sender, RoutedEventArgs e) { TextRange tr = new TextRange(rbox_Input.Document.ContentStart, rbox_Input.Document.ContentEnd); var selector = this.tbox_CSSSelector.Text.Trim(); var html = tr.Text; if (string.IsNullOrEmpty(selector)) { EMessageBox.Show("请输入CSS选择器"); return; } angleSharpHelper.Init(html); var angleSharpObj = angleSharpHelper.CSSQuery(selector); if (angleSharpObj != null) { //目前只输出一个结果 //过完年再搞 ╮(-_-)╭ this.rbox_Output.Document = new FlowDocument(new Paragraph(new Run(angleSharpObj.OuterHtml))); } else { this.rbox_Output.Document = new FlowDocument(new Paragraph(new Run("未匹配到结果"))); } }
private Good GetGood(string source) { try { if (string.IsNullOrEmpty(source)) { return(null); } //AngleSharp貌似是不支持中文 有乱码啊 Good good = new Good(); angleSharpHelper.Init(source); //大部分是使用h1标签做为商品标题 ////如果h1没有找到,用name去找 //忽略大小写 var goodNameElement = angleSharpHelper.CSSQueryRange("h1", "[class~=name i]", "[id~=name i]"); //评价数/销量 //Func<AngleSharp.Dom.IElement, bool> predicate = x => x.ClassName.ToLower().Contains("comment") || x.ClassName.ToLower().Contains("pinglun"); var salesElement = angleSharpHelper.CSSQueryRange("[class~=comment i]", "[id~=pinglun i]", "[class~=pinglun i]"); //价格 var priceElement = angleSharpHelper.CSSQueryRange("[class~=price i]", "[id~=price i]"); //商品详情一般会包含detail var goodDetailElement = angleSharpHelper.CSSQueryRange("[id~=detail i]", "[class~=detail i]"); //评论列表 //这里一般需要执行动态请求 //用抓包工具分析一下就出来的,一般返回json数据 //由于和评论数一样 都是用的comment //这里需要定制抓取 就不做实现了 good.Name = goodNameElement?.TextContent; good.Sales = RegexUtil.ExtractDigit(salesElement?.TextContent); good.Price = RegexUtil.ExtractDigit(priceElement?.TextContent); good.DetailContent = goodDetailElement?.TextContent; good.DetailImageList = goodDetailElement?.QuerySelectorAll("img").Select(x => x.Attributes["src"]?.Value).ToList(); return(good); } catch { return(null); } }
private Task ExtractLink(string source) { return(Task.Run(() => { AngleSharpHelper helper = new AngleSharpHelper(); helper.Init(source); var tagAList = helper.CSSQueryAll("a"); foreach (var item in tagAList) { var url = item.Attributes["href"]?.Value; if (string.IsNullOrEmpty(url) == false) { if (RegexUtil.IsUrl(url) == true) { //TODO 需要使用分组构造 排除文件路径 如http://abc.com/test.exe 今天太累了 想不动了 AppendText(url); queue.Enqueue(url); } } } })); }