public async override Task <ProductMetadata> GetProductMetadataFromUrl(ChromeDriver driver, Product product) { var html = await HtmlHelpers.GetHtml(driver, product.Link); var images = html.QuerySelectorAll(".pDetails__slide") .Select(s => { var fragment = s.InnerHtml; var source = s.QuerySelectorAll("source").Last(); var srcset = source.GetAttribute("srcset") ?? source.GetAttribute("data-srcset"); var parsed = SourceSet.Parse(srcset); return(parsed.Last().Url); }) .Where(p => p != null) .ToArray(); var tags = html.QuerySelectorAll("#pdp_details .tab__item li") .Select(p => Regex.Replace(p.TextContent, @"\s+", " ").Trim()) .ToArray(); return(new ProductMetadata { Images = images, Tags = tags, }); }
public async override Task <ProductMetadata> GetProductMetadataFromUrl(ChromeDriver driver, Product product) { var html = await HtmlHelpers.GetHtml(driver, product.Link); var tags = html.QuerySelectorAll(".d-none.features li") .Select(p => p.TextContent.Trim()) .ToArray(); return(new ProductMetadata { Tags = tags, Images = Enumerable.Range(1, 9) .Select(i => Regex.Replace(product.Image, @"_(\d)\.jpg\?sw=(\d+)&sh=(\d+)", $"_{i}.jpg?sw=1200&sh=1200")) .ToArray(), }); }
public async override Task <ProductMetadata> GetProductMetadataFromUrl(ChromeDriver driver, Product product) { var html = await HtmlHelpers.GetHtml(driver, product.Link); var images = html.QuerySelectorAll("div[data-tstid=slideshow] img") .Select(p => p.GetAttribute("src")) .Select(p => Regex.Replace(p, @"_\d+\.jpg", "_1000.jpg")); var tags = html.QuerySelectorAll("[data-tstid=productDetails] li") .Select(p => p.TextContent.Trim()) .ToArray(); return(new ProductMetadata { Images = images.Take(images.Count() - 1).ToArray(), Tags = tags, }); }
public async override Task <ProductMetadata> GetProductMetadataFromUrl(ChromeDriver driver, Product product) { var html = await HtmlHelpers.GetHtml(driver, product.Link, 2); var images = html.QuerySelectorAll("[data-id$=detailsGalleryThumbnails] img") .Select(i => i.GetAttribute("src") .Replace("60x68", "1556x1770") ).ToArray(); var tags = html.QuerySelectorAll("li[class^=desc]") .Select(p => p.TextContent.Trim()) .ToArray(); return(new ProductMetadata { Images = images, Tags = tags, }); }
public async override Task <ProductMetadata> GetProductMetadataFromUrl(ChromeDriver driver, Product product) { var html = await HtmlHelpers.GetHtml(driver, product.Link, 2); var images = html.QuerySelectorAll(".pdp-gallery button img") .Select(i => "https:" + i.GetAttribute("src")) .Select(p => new Uri(p).GetLeftPart(UriPartial.Path).ToString() + "?wid=1000&op_sharpen=1&resMode=sharp2&qlt=100") .ToArray(); var tags = html.QuerySelector(".detail") .TextContent .Replace("•", "") .Split('\n') .Select(p => p.Trim()) .ToArray(); return(new ProductMetadata { Images = images, Tags = tags, }); }
public async override Task <ProductMetadata> GetProductMetadataFromUrl(ChromeDriver driver, Product product) { var html = await HtmlHelpers.GetHtml(driver, product.Link); var images = html.QuerySelectorAll(".product-main picture img") .Select(i => i.GetAttribute("src")) .Where(p => p != null) .GroupBy(p => p) .OrderByDescending(g => g.Count()) .Select(g => "https:" + g.First()) .OrderBy(p => p) .ToArray(); var tags = html.QuerySelectorAll(".product-utility-content li") .Select(p => p.TextContent.Trim()) .ToArray(); return(new ProductMetadata { Images = images, Tags = tags, }); }
public async override Task <ProductMetadata> GetProductMetadataFromUrl(ChromeDriver driver, Product product) { var html = await HtmlHelpers.GetHtml(driver, product.Link); var images = html.QuerySelectorAll(".thumbnails img") .Select(p => p.GetAttribute("data-src")) .Select(p => p.GetGroupMatches(@"\/(http.+)").FirstOrDefault()) .Select(p => System.Web.HttpUtility.UrlDecode(p)) .ToArray(); var tags = html.QuerySelector(".product-description") .TextContent .Split('\n') .Where(p => !string.IsNullOrWhiteSpace(p)) .Select(p => p.Trim(new char[] { '-', ' ' }).Trim()) .ToArray(); return(new ProductMetadata { Tags = tags, Images = images, }); }
public async override Task <ProductMetadata> GetProductMetadataFromUrl(ChromeDriver driver, Product product) { var html = await HtmlHelpers.GetHtml(driver, product.Link, 2); var images = html.QuerySelectorAll("img[class*=product-gallery]") .Select(i => i.GetAttribute("data-lazy") ?? i.GetAttribute("src")) .GroupBy(p => p) .OrderByDescending(g => g.Count()) .Select(g => g.First()) .OrderBy(p => p) .ToArray(); var parser = new HtmlParser(); var tags = html.QuerySelectorAll("[class^=product-detail__list] li:not([class])") .Select(p => Regex.Replace(p.TextContent, @"\s+", " ").Trim()) .ToArray(); return(new ProductMetadata { Images = images, Tags = tags, }); }