private static void CheckHtmlElements( IHtmlDivElement htmlDivElement, Action <IElement> onInvalidElement, Action <IElement, IAttr> onInvalidAttr) { EnsureArg.IsNotNull(htmlDivElement, nameof(htmlDivElement)); EnsureArg.IsNotNull(onInvalidElement, nameof(onInvalidElement)); EnsureArg.IsNotNull(onInvalidAttr, nameof(onInvalidAttr)); // Ensure only allowed elements and attributes are used foreach (var element in htmlDivElement.QuerySelectorAll("*")) { if (!AllowedElements.Contains(element.NodeName)) { onInvalidElement(element); } foreach (var attr in element.Attributes.ToArray()) { if (!AllowedAttributes.Contains(attr.Name)) { onInvalidAttr(element, attr); } if (string.Equals("src", attr.Name, StringComparison.OrdinalIgnoreCase)) { if (!Src.Any(x => attr.Value.StartsWith(x, StringComparison.OrdinalIgnoreCase))) { onInvalidAttr(element, attr); } } } } }
private void AssertSearchResult(IHtmlDivElement searchResults) { var tLevelProvider = searchResults.QuerySelector("#tl-provider"); tLevelProvider.TextContent.Should().Be("T level provider"); tLevelProvider.ClassName.Should().Be("tl-search-results-flag"); var venueDetailList = searchResults.QuerySelector("#tl-venue-detail-list"); var providerNameListItem = venueDetailList.Children[0] as IHtmlListItemElement; providerNameListItem.TextContent.Should().Be("Part of SQL Search Provider Display Name"); var townAndPostcodeListItem = venueDetailList.Children[1] as IHtmlListItemElement; townAndPostcodeListItem.TextContent.Should().Be("Coventry CV1 2WT"); var journeyInfo = searchResults.QuerySelector("#tl-journey-info"); journeyInfo.Children.Length.Should().Be(1); var distanceItem = journeyInfo.Children[0] as IHtmlParagraphElement; distanceItem.TextContent.Should().Be("0.0 miles"); var qualificationList = searchResults.QuerySelector("#tl-qualification-list") as IHtmlUnorderedListElement; var routeName1 = qualificationList.Children[0] as IHtmlListItemElement; routeName1.QuerySelector("details > summary > span").TextContent.Should().Be("Agriculture, environmental and animal care"); var shortTitleList = routeName1.QuerySelector("details > div > ul") as IHtmlUnorderedListElement; var shortTitle1 = shortTitleList.Children[0] as IHtmlListItemElement; shortTitle1.TextContent.Should().Be("Short Title"); }
private static (string trackName, string trackPath) FindTrackInfo(IHtmlDivElement parent) { var trackAnchor = parent.QuerySelector <IHtmlAnchorElement>("div.item-info div.item-title a"); if (trackAnchor != null) { return(trackAnchor.Text.Trim(), trackAnchor.PathName); } var trackOverlayInfo = parent.QuerySelector <IHtmlDivElement>("div.item-info div.item-title"); return(trackOverlayInfo.TextContent.Trim(), $"/track/{parent.Dataset["id"]}"); }
private static void CheckHtmlElements( IHtmlDivElement htmlDivElement, Action <IElement> onInvalidElement, Action <IElement, IAttr> onInvalidAttr) { EnsureArg.IsNotNull(htmlDivElement, nameof(htmlDivElement)); EnsureArg.IsNotNull(onInvalidElement, nameof(onInvalidElement)); EnsureArg.IsNotNull(onInvalidAttr, nameof(onInvalidAttr)); ValidateAttributes(htmlDivElement, onInvalidAttr); // Ensure only allowed elements and attributes are used foreach (IElement element in htmlDivElement.QuerySelectorAll("*")) { if (!AllowedElements.Contains(element.NodeName)) { onInvalidElement(element); } ValidateAttributes(element, onInvalidAttr); } }
private static void VisitParser(IHtmlDivElement element, StringBuilder sb) { if (element == null) { return; } var attributes = new AnsiTextAttribute(); ParseAnsiClass(element.ClassList, attributes); if (attributes.IsCls) { sb.Append(ClearScreenAndHomeCursor); } foreach (var child in element.Children) { VisitParser(child as IHtmlSpanElement, sb); VisitParser(child as IHtmlDivElement, sb); } sb.Append(Environment.NewLine); }
public async Task <IEnumerable <IDeal> > Scrape(CancellationToken token) { List <IDeal> deals = new List <IDeal>(); DocumentRequest request = DocumentRequest.Get(Url.Create(URL)); IDocument document = await context.OpenAsync(request, token); token.ThrowIfCancellationRequested(); IHtmlElement body = document.Body; IEnumerable <IHtmlListItemElement> items = body.QuerySelectorAll <IHtmlListItemElement>(".grid-tile"); foreach (IHtmlListItemElement element in items) { IHtmlDivElement titleElement = element.QuerySelector <IHtmlDivElement>(".card-title"); string title = titleElement.TextContent.Trim(); IHtmlAnchorElement linkElement = element.QuerySelector <IHtmlAnchorElement>(".thumb-link"); IHtmlImageElement imageElement = element.QuerySelector <IHtmlImageElement>(".product_image"); IHtmlDivElement priceElement = element.QuerySelector <IHtmlDivElement>(".card-price"); IHtmlDivElement availabilityElement = element.QuerySelector <IHtmlDivElement>(".product-availability-label"); string offerStartDate = availabilityElement.GetAttribute("data-freeofferstartdate"); string offerEndDate = availabilityElement.GetAttribute("data-freeofferenddate"); bool hasStartDate = DateTime.TryParseExact(offerStartDate, "ddd MMM dd HH:mm:ss Z yyyy", CultureInfo.InvariantCulture, DateTimeStyles.None, out DateTime startDate); bool hasEndDate = DateTime.TryParseExact(offerEndDate, "ddd MMM dd HH:mm:ss Z yyyy", CultureInfo.InvariantCulture, DateTimeStyles.None, out DateTime endDate); if (!hasStartDate && !hasEndDate) { logger.Info($"{title} has no start or end date"); continue; } DateTime now = DateTime.Now; if ((hasStartDate && now < startDate) || (hasEndDate && now > endDate)) { logger.Info($"{title} is not active right now"); continue; } string price = priceElement.TextContent.Trim(); if (price.ToLower() != "free to play") { logger.Info($"{title} is not free"); continue; } deals.Add(new Deal { Discount = 100, End = hasEndDate ? endDate : (DateTime?)null, Start = hasStartDate ? startDate : (DateTime?)null, Title = title, Link = $"https://store.ubi.com/{linkElement.GetAttribute("href")}", Image = imageElement.GetAttribute("data-desktop-src") }); } return(deals); }
/// <summary> /// Заполнение данных о товаре с сайта поставщика /// </summary> public async Task <bool> FillByUrl() { bool res = false; if (await Loader.LoadPageAsync(Url)) { // Получение наименования товара IHtmlHeadingElement pageHeader = (IHtmlHeadingElement)Loader.Document.QuerySelectorAll("h1") .Where(item => item.TextContent.Trim().Length > 0) .FirstOrDefault(); if (pageHeader != null) { Name = pageHeader.TextContent.Trim(); } // Получение цены товара IHtmlSpanElement mainPrice = (IHtmlSpanElement)Loader.Document.QuerySelectorAll("span") .Where(item => item.Id != null && item.Id.Trim() == @"main-price") .FirstOrDefault(); if (mainPrice != null) { int mainPriceValue = 0; if (int.TryParse(mainPrice.TextContent.Trim().Replace(" ", string.Empty), out mainPriceValue)) { Price = mainPriceValue; } } // Получение старой цены товара IHtmlDivElement oldPriceElement = (IHtmlDivElement)Loader.Document.QuerySelectorAll("div") .Where(item => item.ClassName != null && item.ClassName.Contains(@"old-price")) .FirstOrDefault(); if (oldPriceElement != null) { Regex rgOldPrice = new Regex(@"^(?<value>\d+)"); Match mtOldPrice = rgOldPrice.Match(oldPriceElement.TextContent.Trim().Replace(" ", string.Empty)); if (mtOldPrice.Success) { int oldPriceValue = 0; if (int.TryParse(mtOldPrice.Groups["value"].ToString(), out oldPriceValue)) { OldPrice = oldPriceValue; } } } // Загрузка списка технических характеристик IHtmlElement techSpecSection = (IHtmlElement)Loader.Document.QuerySelectorAll("section") .Where(item => item.ClassName != null && item.ClassName.Contains(@"tech-specs")) .FirstOrDefault(); if (techSpecSection != null) { var techSpecRowsCollection = techSpecSection.QuerySelectorAll("div") .Where(item => item.ClassName != null && item.ClassName.Contains(@"row")); foreach (IHtmlDivElement techSpecRow in techSpecRowsCollection) { IHtmlSpanElement techSpecName = (IHtmlSpanElement)techSpecRow.QuerySelectorAll("span") .FirstOrDefault(); IHtmlParagraphElement techSpecValue = (IHtmlParagraphElement)techSpecRow.QuerySelectorAll("p") .Where(item => item.ClassName != null && item.ClassName.Contains(@"p-style2")) .FirstOrDefault(); if (techSpecName != null && techSpecValue != null && techSpecName.TextContent.Trim().Length > 0 && techSpecValue.TextContent.Trim().Length > 0) { Options.Add(new InventItemOption(techSpecName.TextContent.Trim(), techSpecValue.TextContent.Trim())); } } } // Загрузка описания товара IHtmlDivElement descrElement = (IHtmlDivElement)Loader.Document.QuerySelectorAll("div") .Where(item => item.Id != null && item.Id.Trim() == @"descr") .FirstOrDefault(); if (descrElement != null) { Descr = descrElement.TextContent.Trim(); } // Формирование списка изображений товара IHtmlUnorderedListElement imagesListElement = (IHtmlUnorderedListElement)Loader.Document.QuerySelectorAll("ul") .Where(item => item.ClassName != null && item.ClassName.Contains(@"pagination")) .FirstOrDefault(); if (imagesListElement != null) { var imagesListCallection = imagesListElement.QuerySelectorAll("img"); foreach (IHtmlImageElement imageItem in imagesListCallection) { if (imageItem.Source != null && imageItem.Source.Length > 0) { Images.Add(Site.PrepareUrl(imageItem.Source)); } } } res = true; } RegisterInventItem(); return(res); }