Example #1
0
        private async void ExtractImageWithHtmlAgilityPack(string html)
        {
            try
            {
                string value = "";

                var imageList = await HtmlAgilityPackUtil.GetImgFromHtmlAsync(html);

                for (int i = 0; i < imageList.Count; i++)
                {
                    value = imageList[i];
                    if (value.StartsWith("//"))
                    {
                        value = "http:" + value;
                    }

                    if (value.Contains(":") == false)
                    {
                        value = BaseUrl + value;
                    }
                    AddToCollection(new UrlStruct()
                    {
                        Id = i + 1, Status = "", Title = "", Url = value
                    });
                }
                ShowStatusText($"已抓取到{imageCollection.Count}个图像");
            }
            catch (Exception ex)
            {
                ShowStatusText(ex.Message);
            }
        }
        /// <summary>
        /// XPath 是一门在 XML 文档中查找信息的语言。XPath 用于在 XML 文档中通过元素和属性进行导航。
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void btn_XPathQuery_Click(object sender, RoutedEventArgs e)
        {
            TextRange tr   = new TextRange(rbox_XPathInput.Document.ContentStart, rbox_XPathInput.Document.ContentEnd);
            var       html = tr.Text;

            if (string.IsNullOrEmpty(html))
            {
                EMessageBox.Show("请输入html");
                return;
            }

            var result = HtmlAgilityPackUtil.XPathQuery(html, this.tbox_XPath.Text.Trim());

            if (result != null)
            {
                Paragraph paragraph = new Paragraph();
                foreach (var item in result)
                {
                    paragraph.Inlines.Add(new Run(item.OuterHtml + Environment.NewLine));
                }
                this.rbox_XPathOutput.Document = new FlowDocument(paragraph);
            }
            else
            {
                this.rbox_XPathOutput.Document.Blocks.Clear();
            }
        }
Example #3
0
        private async Task <List <TagImg> > SearchBingImage(string keyword, int page = 1)
        {
            List <TagImg> searchImgList = new List <TagImg>();
            var           start         = 1;

            if (page > 1)
            {
                start = page * PageImageNum + 1;
            }
            var url = UrlUtil.CNBingImageDetailUrl.Replace("[keyword]", keyword).Replace("[start]", start.ToString());

            searchImgList = await HtmlAgilityPackUtil.GetBingImgFromUrlAsync(url);

            return(searchImgList);
        }
Example #4
0
        private async void ExtractBackgroundImage(object html)
        {
            //我这里是写的div,可能页面上用来显示图片的不一定是div,是其它元素也说不定,如li ol ul
            var xpath  = "//div";
            var result = HtmlAgilityPackUtil.XPathQuery(html.ToString(), xpath);

            foreach (var item in result)
            {
                var classAttribute = item.Attributes["class"];
                if (classAttribute == null)
                {
                    continue;
                }

                var className = classAttribute.Value;

                var script = $"getComputedStyle(document.getElementsByClassName('{className}')[0]).backgroundImage";
                //执行js
                var backgroundImage = await globalData.Browser.browser.EvaluateScriptAsync(script);

                if (backgroundImage.Result != null && backgroundImage.Result.ToString() != "none")
                {
                    var mathch = RegexUtil.RegexMatch(backgroundImage.Result.ToString(), RegexPattern.MatchImgPattern);
                    if (mathch.Success)
                    {
                        lock (obj)
                        {
                            Dispatcher.Invoke(() =>
                            {
                                backgroundImageList.Add(mathch.Value);
                                ShowStatusText($"已抓取到{backgroundImageList.Count}个图像");
                            });
                        }
                    }
                }
            }

            if (backgroundImageList.Count == 0)
            {
                ShowStatusText("解析已完成,未抓取到任何图像");
            }
        }
Example #5
0
        private async void StartScroll(string html)
        {
            //第一次抓取内容完成,开始滚动页面

            //获取高度 document.body.clientHeight
            var getHeightJs = "document.body.clientHeight";

            //用js控制滚动
            //这里也可以直接用Selenium去驱动浏览器滚动
            var scrollJs = "window.scroll(0,{0})";
            var height   = await globalData.Browser.EvaluateJavaScriptAsync(getHeightJs);

            //无限循环滚动
            while (true)
            {
                globalData.Browser.ExecuteJavaScript(string.Format(scrollJs, height));
                var oldHeight = height;
                height = await globalData.Browser.EvaluateJavaScriptAsync(getHeightJs);

                if (height == oldHeight)
                {
                    break;
                }

                //todo 登录操作
                //使用js填入登录框内容 模拟点击登录
                //由于这里仅做示例不针对任何网站
                await Task.Delay(1000);
            }

            //到这里可以提取页面上的图片了
            html = await globalData.Browser.GetHtmlSource();

            var list = await HtmlAgilityPackUtil.GetImgFromHtmlAsync(html);

            this.Dispatcher.Invoke(() => {
                this.listbox_ImageDynamic.ItemsSource = list;
            });
        }
Example #6
0
        private async void LoadHotSpots()
        {
            //不加载
            if (this.grid_Content.Children.Count > 0)
            {
                return;
            }

            List <TagImg> hotSpotsImgList = new List <TagImg>();

            this.Dispatcher.BeginInvoke(new Action(() => {
                dialog = new WaitingDailog("正在加载每日热图");
                dialog.ShowDialog();
            }));

            hotSpotsImgList = await HtmlAgilityPackUtil.GetBingImgFromUrlAsync(UrlUtil.CNBingImageUrl, true);

            //去除
            hotSpotsImgList = hotSpotsImgList.Where(x => x.Src.Contains("tse1-mm")).ToList();

            //显示
            ShowImage(hotSpotsImgList, true);
            dialog.Close();
        }