public object ExtractProperties(CrawlResult crawlResult, IHtmlCollection <IElement> elements) { Directory.CreateDirectory(@"c:\temp\WebScraper"); using (var client = new HttpClient()) { foreach (var img in elements.SelectMany(e => e.QuerySelectorAll("img"))) { var src = new Uri(crawlResult.RequestUrl, new Uri(img.Attributes["src"].Value, UriKind.RelativeOrAbsolute)); var fileName = Path.Combine(@"c:\temp\WebScraper", Path.GetFileName(src.LocalPath)); if (File.Exists(fileName) == false) { Console.WriteLine($"Downloading {src} to {fileName}"); using (var f = File.OpenWrite(fileName)) using (var s = client.GetStreamAsync(src).Result) { s.CopyTo(f); } } else { Console.WriteLine($"Skipping download of {src} to {fileName}"); } img.SetAttribute("data-local-src", fileName); } } return(elements.Select(this.htmlSelector).Aggregate((prod, next) => prod + "\n" + next)); }
public static IEnumerable <IElement> QuerySelectorAll(this IHtmlCollection <IElement> source, string selector) { return(source.SelectMany(x => x.QuerySelectorAll(selector))); }