Exemplo n.º 1
0
        /// <summary>
        /// 在 CSS 中,选择器是一种模式,用于选择需要添加样式的元素。
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void btn_Query_Click(object sender, RoutedEventArgs e)
        {
            TextRange tr = new TextRange(rbox_Input.Document.ContentStart, rbox_Input.Document.ContentEnd);

            var selector = this.tbox_CSSSelector.Text.Trim();
            var html     = tr.Text;

            if (string.IsNullOrEmpty(selector))
            {
                EMessageBox.Show("请输入CSS选择器");
                return;
            }

            angleSharpHelper.Init(html);
            var angleSharpObj = angleSharpHelper.CSSQuery(selector);

            if (angleSharpObj != null)
            {
                //目前只输出一个结果
                //过完年再搞 ╮(-_-)╭
                this.rbox_Output.Document = new FlowDocument(new Paragraph(new Run(angleSharpObj.OuterHtml)));
            }
            else
            {
                this.rbox_Output.Document = new FlowDocument(new Paragraph(new Run("未匹配到结果")));
            }
        }
Exemplo n.º 2
0
        private Good GetGood(string source)
        {
            try
            {
                if (string.IsNullOrEmpty(source))
                {
                    return(null);
                }

                //AngleSharp貌似是不支持中文 有乱码啊
                Good good = new Good();
                angleSharpHelper.Init(source);

                //大部分是使用h1标签做为商品标题
                ////如果h1没有找到,用name去找
                //忽略大小写
                var goodNameElement = angleSharpHelper.CSSQueryRange("h1", "[class~=name i]", "[id~=name i]");

                //评价数/销量
                //Func<AngleSharp.Dom.IElement, bool> predicate = x => x.ClassName.ToLower().Contains("comment") || x.ClassName.ToLower().Contains("pinglun");
                var salesElement = angleSharpHelper.CSSQueryRange("[class~=comment i]", "[id~=pinglun i]", "[class~=pinglun i]");

                //价格
                var priceElement = angleSharpHelper.CSSQueryRange("[class~=price i]", "[id~=price i]");

                //商品详情一般会包含detail
                var goodDetailElement = angleSharpHelper.CSSQueryRange("[id~=detail i]", "[class~=detail i]");

                //评论列表
                //这里一般需要执行动态请求
                //用抓包工具分析一下就出来的,一般返回json数据
                //由于和评论数一样 都是用的comment
                //这里需要定制抓取 就不做实现了


                good.Name            = goodNameElement?.TextContent;
                good.Sales           = RegexUtil.ExtractDigit(salesElement?.TextContent);
                good.Price           = RegexUtil.ExtractDigit(priceElement?.TextContent);
                good.DetailContent   = goodDetailElement?.TextContent;
                good.DetailImageList = goodDetailElement?.QuerySelectorAll("img").Select(x => x.Attributes["src"]?.Value).ToList();

                return(good);
            }
            catch
            {
                return(null);
            }
        }
Exemplo n.º 3
0
 private Task ExtractLink(string source)
 {
     return(Task.Run(() => {
         AngleSharpHelper helper = new AngleSharpHelper();
         helper.Init(source);
         var tagAList = helper.CSSQueryAll("a");
         foreach (var item in tagAList)
         {
             var url = item.Attributes["href"]?.Value;
             if (string.IsNullOrEmpty(url) == false)
             {
                 if (RegexUtil.IsUrl(url) == true)
                 {
                     //TODO 需要使用分组构造 排除文件路径 如http://abc.com/test.exe 今天太累了 想不动了
                     AppendText(url);
                     queue.Enqueue(url);
                 }
             }
         }
     }));
 }