Example #1
0
        private async void ExtractImageWithHtmlAgilityPack(string html)
        {
            try
            {
                string value = "";

                var imageList = await HtmlAgilityPackUtil.GetImgFromHtmlAsync(html);

                for (int i = 0; i < imageList.Count; i++)
                {
                    value = imageList[i];
                    if (value.StartsWith("//"))
                    {
                        value = "http:" + value;
                    }

                    if (value.Contains(":") == false)
                    {
                        value = BaseUrl + value;
                    }
                    AddToCollection(new UrlStruct()
                    {
                        Id = i + 1, Status = "", Title = "", Url = value
                    });
                }
                ShowStatusText($"已抓取到{imageCollection.Count}个图像");
            }
            catch (Exception ex)
            {
                ShowStatusText(ex.Message);
            }
        }
Example #2
0
        private async void StartScroll(string html)
        {
            //第一次抓取内容完成,开始滚动页面

            //获取高度 document.body.clientHeight
            var getHeightJs = "document.body.clientHeight";

            //用js控制滚动
            //这里也可以直接用Selenium去驱动浏览器滚动
            var scrollJs = "window.scroll(0,{0})";
            var height   = await globalData.Browser.EvaluateJavaScriptAsync(getHeightJs);

            //无限循环滚动
            while (true)
            {
                globalData.Browser.ExecuteJavaScript(string.Format(scrollJs, height));
                var oldHeight = height;
                height = await globalData.Browser.EvaluateJavaScriptAsync(getHeightJs);

                if (height == oldHeight)
                {
                    break;
                }

                //todo 登录操作
                //使用js填入登录框内容 模拟点击登录
                //由于这里仅做示例不针对任何网站
                await Task.Delay(1000);
            }

            //到这里可以提取页面上的图片了
            html = await globalData.Browser.GetHtmlSource();

            var list = await HtmlAgilityPackUtil.GetImgFromHtmlAsync(html);

            this.Dispatcher.Invoke(() => {
                this.listbox_ImageDynamic.ItemsSource = list;
            });
        }