private async void ExtractImageWithHtmlAgilityPack(string html) { try { string value = ""; var imageList = await HtmlAgilityPackUtil.GetImgFromHtmlAsync(html); for (int i = 0; i < imageList.Count; i++) { value = imageList[i]; if (value.StartsWith("//")) { value = "http:" + value; } if (value.Contains(":") == false) { value = BaseUrl + value; } AddToCollection(new UrlStruct() { Id = i + 1, Status = "", Title = "", Url = value }); } ShowStatusText($"已抓取到{imageCollection.Count}个图像"); } catch (Exception ex) { ShowStatusText(ex.Message); } }
/// <summary> /// XPath 是一门在 XML 文档中查找信息的语言。XPath 用于在 XML 文档中通过元素和属性进行导航。 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void btn_XPathQuery_Click(object sender, RoutedEventArgs e) { TextRange tr = new TextRange(rbox_XPathInput.Document.ContentStart, rbox_XPathInput.Document.ContentEnd); var html = tr.Text; if (string.IsNullOrEmpty(html)) { EMessageBox.Show("请输入html"); return; } var result = HtmlAgilityPackUtil.XPathQuery(html, this.tbox_XPath.Text.Trim()); if (result != null) { Paragraph paragraph = new Paragraph(); foreach (var item in result) { paragraph.Inlines.Add(new Run(item.OuterHtml + Environment.NewLine)); } this.rbox_XPathOutput.Document = new FlowDocument(paragraph); } else { this.rbox_XPathOutput.Document.Blocks.Clear(); } }
private async Task <List <TagImg> > SearchBingImage(string keyword, int page = 1) { List <TagImg> searchImgList = new List <TagImg>(); var start = 1; if (page > 1) { start = page * PageImageNum + 1; } var url = UrlUtil.CNBingImageDetailUrl.Replace("[keyword]", keyword).Replace("[start]", start.ToString()); searchImgList = await HtmlAgilityPackUtil.GetBingImgFromUrlAsync(url); return(searchImgList); }
private async void ExtractBackgroundImage(object html) { //我这里是写的div,可能页面上用来显示图片的不一定是div,是其它元素也说不定,如li ol ul var xpath = "//div"; var result = HtmlAgilityPackUtil.XPathQuery(html.ToString(), xpath); foreach (var item in result) { var classAttribute = item.Attributes["class"]; if (classAttribute == null) { continue; } var className = classAttribute.Value; var script = $"getComputedStyle(document.getElementsByClassName('{className}')[0]).backgroundImage"; //执行js var backgroundImage = await globalData.Browser.browser.EvaluateScriptAsync(script); if (backgroundImage.Result != null && backgroundImage.Result.ToString() != "none") { var mathch = RegexUtil.RegexMatch(backgroundImage.Result.ToString(), RegexPattern.MatchImgPattern); if (mathch.Success) { lock (obj) { Dispatcher.Invoke(() => { backgroundImageList.Add(mathch.Value); ShowStatusText($"已抓取到{backgroundImageList.Count}个图像"); }); } } } } if (backgroundImageList.Count == 0) { ShowStatusText("解析已完成,未抓取到任何图像"); } }
private async void StartScroll(string html) { //第一次抓取内容完成,开始滚动页面 //获取高度 document.body.clientHeight var getHeightJs = "document.body.clientHeight"; //用js控制滚动 //这里也可以直接用Selenium去驱动浏览器滚动 var scrollJs = "window.scroll(0,{0})"; var height = await globalData.Browser.EvaluateJavaScriptAsync(getHeightJs); //无限循环滚动 while (true) { globalData.Browser.ExecuteJavaScript(string.Format(scrollJs, height)); var oldHeight = height; height = await globalData.Browser.EvaluateJavaScriptAsync(getHeightJs); if (height == oldHeight) { break; } //todo 登录操作 //使用js填入登录框内容 模拟点击登录 //由于这里仅做示例不针对任何网站 await Task.Delay(1000); } //到这里可以提取页面上的图片了 html = await globalData.Browser.GetHtmlSource(); var list = await HtmlAgilityPackUtil.GetImgFromHtmlAsync(html); this.Dispatcher.Invoke(() => { this.listbox_ImageDynamic.ItemsSource = list; }); }
private async void LoadHotSpots() { //不加载 if (this.grid_Content.Children.Count > 0) { return; } List <TagImg> hotSpotsImgList = new List <TagImg>(); this.Dispatcher.BeginInvoke(new Action(() => { dialog = new WaitingDailog("正在加载每日热图"); dialog.ShowDialog(); })); hotSpotsImgList = await HtmlAgilityPackUtil.GetBingImgFromUrlAsync(UrlUtil.CNBingImageUrl, true); //去除 hotSpotsImgList = hotSpotsImgList.Where(x => x.Src.Contains("tse1-mm")).ToList(); //显示 ShowImage(hotSpotsImgList, true); dialog.Close(); }