private static List <EventItem> extractEvents(AngleSharp.Dom.IDocument document) { var elements = document.QuerySelectorAll(".c-content-three table tr"); var eventItems = new List <EventItem>(); foreach (var element in elements) { var content = element.QuerySelectorAll("td"); if ( content.Length != 2 || String.IsNullOrWhiteSpace(content[0].TextContent) || String.IsNullOrWhiteSpace(content[1].TextContent) ) { continue; } var title = content[0].TextContent.Trim(); var date = content[1].TextContent.Trim(); eventItems.Add(new EventItem(date, title)); } return(eventItems); }
/// <summary> /// Определяет текущуюю страницу из параметра внутри DocumentElement.BaseUrl.Query /// Получает ссылку на следующую страницу из пагинатора /// </summary> /// <param name="document"></param> /// <returns>Возвращает ссылку на следующую страницу. String.Empty если страница последняя или единственная.</returns> private static string GetNextPageUrl(AngleSharp.Dom.IDocument document) { string nextPageUrl = ""; if (document?.BaseUrl?.Query == null) { return(nextPageUrl); } var nextPageButton = document.QuerySelector("span[data-marker='pagination-button/next']"); //проверяем наличие кнопки "След." на странице if (nextPageButton == null) { return(nextPageUrl); } //т.к. в самой кнопке "След." url не хранится нужно лезть скрытый пагинатор с ссылками //сначала проверяем текущий query в url на наличие параметра текущей страницы, если нет по умолчанию 1 ushort currentPageId = ushort.TryParse( HttpUtility.ParseQueryString(document.DocumentElement.BaseUrl.Query).Get("p"), out currentPageId) ? currentPageId : (ushort)1; //находим в скрытом пагинаторе ссылку на следующую страницу var pageButton = document.QuerySelectorAll("a.pagination-page").Where(e => e.InnerHtml == $"{currentPageId + 1}").FirstOrDefault(); if (pageButton != null) { nextPageUrl = document.Origin + pageButton.GetAttribute("href"); } return(nextPageUrl); }
public IEnumerable <string> ExtractTextSections(AngleSharp.Dom.IDocument doc, bool isMobile) { var cells = doc.QuerySelectorAll(Constants.Wikipedia.Paragraphs.PARAGRAPHS); IEnumerable <string> textSections = Enumerable.Empty <string>(); if (cells != null && cells.Length > 0) { textSections = cells.Select(m => m.TextContent); } return(textSections); }
private static void ExtractCategoriesFromDocument(List <Category> categories, AngleSharp.Dom.IDocument document) { var checkboxes = document.QuerySelectorAll("input").Where(m => { return(m.HasAttribute("type") && m.GetAttribute("type") == "checkbox" && m.HasAttribute("name") && m.GetAttribute("name") == "abfart[]"); }); categories.Clear(); foreach (var checkbox in checkboxes) { var elementId = checkbox.GetAttribute("id"); var value = checkbox.GetAttribute("value"); var label = document.QuerySelectorAll("label").First(m => m.HasAttribute("for") && m.GetAttribute("for") == elementId); var category = new Category { Id = value, Name = label.InnerHtml }; categories.Add(category); } }
public Topic[][] GetPack(string link) { if (!link.ToLower().EndsWith("/print")) { if (link.EndsWith("/")) { link += "print"; } else { link += "/print"; } } List <Topic> Themes = new List <Topic>(); var config = Configuration.Default.WithDefaultLoader(); AngleSharp.Dom.IDocument document = BrowsingContext.New(config).OpenAsync(link).Result; IEnumerable <AngleSharp.Dom.IElement> themeBlocks = document.QuerySelectorAll("div").Where(m => m.GetAttribute("style") == "margin-top:20px;"); foreach (var themeBlock in themeBlocks) { IEnumerable <AngleSharp.Dom.IElement> ps = themeBlock.QuerySelectorAll("p").Where(p => (!p.QuerySelector("i")?.TextContent.Contains("Источники:") ?? true) && (!p.QuerySelector("i")?.TextContent.Contains("Комментарий:") ?? true)); var collapses = themeBlock.QuerySelectorAll("div").Where(m => m.ClassName == "collapsible collapsed").ToList(); string themeName = themeBlock.TextContent.Split('\n')?[2] ?? "Неизвестно"; var size = ps.Count() / 2; var questions = new Question[size]; for (int i = 0; i < size; i++) { string objective = ps.ElementAt(i * 2).TextContent.Replace("\n", " ").Trim(); string answer = ps.ElementAt(i * 2 + 1).TextContent.Replace("\n", " ").Replace("Ответ:", "").Trim(); questions[i] = new Question(objective, answer, (i + 1) * 10); } Topic theme = new Topic(themeName, questions); Themes.Add(theme); } return(split(Themes.ToArray())); }
private static void ExtractAvailableYearsFromDocument(List <string> years, AngleSharp.Dom.IDocument document) { var select = document.QuerySelectorAll("select").First(m => m.HasAttribute("name") && m.GetAttribute("name") == "vJ"); var options = select.QuerySelectorAll("option"); years.Clear(); foreach (var option in options) { var value = option.GetAttribute("value"); if (string.IsNullOrWhiteSpace(value)) { continue; } years.Add(value); } }
private static List <ScheduleItem> extractSchedule(AngleSharp.Dom.IDocument document) { var elements = document.QuerySelectorAll(".stream-plan>table>tbody>tr>td"); var scheduleItems = new List <ScheduleItem>(); foreach (var element in elements) { if (element.Attributes.GetNamedItem("free-streaming-slot") != null) { continue; } var content = element.QuerySelectorAll("p"); if (content.Length < 1) { continue; } var title = content.ElementAtOrDefault(0).TextContent.Trim(); var caster = content.ElementAtOrDefault(1)?.TextContent.Trim() ?? ""; var date = element.Attributes.GetNamedItem("data-date")?.Value; var hourStart = element.Attributes.GetNamedItem("data-hour-start")?.Value; var hourEnd = element.Attributes.GetNamedItem("data-hour-end")?.Value; var cancelled = element.ClassList.Contains("cancelled-streaming-slot"); var dateParts = date.Split('-'); var year = dateParts[0]; var month = dateParts[1].Length == 2 ? dateParts[1] : dateParts[1].Insert(0, "0"); var day = dateParts[2].Length == 2 ? dateParts[2] : dateParts[2].Insert(0, "0"); hourStart = hourStart.Length == 2 ? hourStart : hourStart.Insert(0, "0"); hourEnd = hourEnd.Length == 2 ? hourEnd : hourEnd.Insert(0, "0"); var startOverlap = false; var endOverlap = false; if (hourStart == "24") { startOverlap = true; hourStart = "00"; } if (hourEnd == "24") { endOverlap = true; hourEnd = "00"; } var tz = TimeZoneInfo.FindSystemTimeZoneById("Europe/Berlin"); var offsetHours = tz.GetUtcOffset(DateTime.UtcNow).Hours; var startDate = DateTime.Parse($"{year}-{month}-{day}T{hourStart}:00:00.000+{offsetHours}:00", CultureInfo.InvariantCulture); var endDate = DateTime.Parse($"{year}-{month}-{day}T{hourEnd}:00:00.000+{offsetHours}:00", CultureInfo.InvariantCulture); startDate = startOverlap ? startDate.AddDays(1) : startDate; endDate = endOverlap ? endDate.AddDays(1) : endDate; scheduleItems.Add(new ScheduleItem(title, caster, startDate, endDate, cancelled)); } return(scheduleItems); }
public static bool IsEnd(this AngleSharp.Dom.IDocument document) { var items = document.QuerySelectorAll("*").ToList().Where(_ => _.TextContent.Contains("Страницы с указанным вами адресом в каталоге не существует.")); return(items.Any()); }
private async Task GetProjectNuGetDataAsync(IDocument input, IExecutionContext context, ConcurrentDictionary <string, object> metadata) { List <Package> packageData = new List <Package>(); IReadOnlyList <string> packages = input.GetList(SiteKeys.NuGet, Array.Empty <string>()); foreach (string package in packages.Where(x => !string.IsNullOrWhiteSpace(x))) { context.LogInformation($"Getting NuGet data for {package}"); try { IBrowsingContext browsingContext = BrowsingContext.New(AngleSharpConfig); AngleSharp.Dom.IDocument document = await browsingContext.OpenAsync($"https://www.nuget.org/packages/{package}"); if (document.StatusCode != System.Net.HttpStatusCode.OK) { context.LogWarning($"Bad status code for {package}: {document.StatusCode}"); } else if (document == null) { context.LogWarning($"Could not get document for {package}"); } else { Package data = new Package { Id = package }; // Get statistics AngleSharp.Dom.IElement statistics = document .QuerySelectorAll(".package-details-info h2") .First(x => x.TextContent == "Statistics") .NextElementSibling; data.TotalDownloads = statistics.Children .First(x => x.TextContent.Contains("total downloads")) .TextContent.Trim().Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)[0]; data.PerDayDownloads = statistics.Children .First(x => x.TextContent.Contains("per day")) .TextContent.Trim().Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)[0]; // Get versions data.Versions = document .QuerySelectorAll("#version-history table tbody tr") .Select(x => new PackageVersion(x)) .ToList(); // Add the data packageData.Add(data); } } catch (Exception ex) { context.LogWarning($"Error getting NuGet data for {package}: {ex.Message}"); } } if (packageData.Count > 0) { metadata.TryAdd("NuGetPackages", packageData); } }
private async Task <List <CarModelDto> > GetBrandData(AngleSharp.Dom.IDocument document, string carBrand) { var config = Configuration.Default.WithDefaultLoader().WithCss(); return(await GenerateModelList(document.QuerySelectorAll("ul.modelsLinks li a"), carBrand)); }
// 商品ページのHTMLから商品の更新可能性のある情報を取得、更新する public async Task <Product> Update(Product product, AngleSharp.Dom.IDocument document = null) { // 新品/中古品のクエリを読み出す New/Used int newStockCount = 0; int?newStockPrice = null; int usedStockCount = 0; int?usedStockPrice = null; // ドキュメントが指定されていない場合、新しいドキュメントを取りに行く if (document == null) { document = await Tools.GetDocument(product.ASIN); } var stockNodes = document.QuerySelectorAll("#olp_feature_div > div > span:not(.a-color-base)"); if (stockNodes.Length != 0) { foreach (var stockNode in stockNodes) { // 新品のクエリだけを読み出し、在庫と最低価格を抜き出す(存在しない場合有り) if (stockNode.QuerySelector("a").TextContent.IndexOf("新品の出品:") > -1) { var newStock = stockNode; newStockCount = Convert.ToInt32(newStock.QuerySelector("a").TextContent.Trim().Replace("新品の出品:", "")); newStockPrice = newStock.QuerySelector("span.a-color-price").TextContent.PriceToValue(); } // 中古品のクエリだけを読み出し、在庫と最低価格を抜き出す(存在しない場合有り) if (stockNode.QuerySelector("a").TextContent.IndexOf("中古品の出品:") > -1) { var usedStock = stockNode; usedStockCount = Convert.ToInt32(usedStock.QuerySelector("a").TextContent.Trim().Replace("中古品の出品:", "")); usedStockPrice = usedStock.QuerySelector("span.a-color-price").TextContent.PriceToValue(); } } } else { stockNodes = document.QuerySelectorAll("#olp_feature_div > div > a"); if (stockNodes.Length != 0) { // 取得エラー (在庫表記がおかしい) newStockCount = -1; newStockPrice = null; usedStockCount = -1; usedStockPrice = null; } else { // 新品/中古品表記が無い = 在庫が無い newStockCount = 0; newStockPrice = null; usedStockCount = 0; usedStockPrice = null; } } // 現在価格を抜き出す var amazonPriceNode = document.QuerySelector("#priceblock_ourprice"); int?amazonPrice = null; if (amazonPriceNode != null) { amazonPrice = amazonPriceNode.TextContent.PriceToValue(); } // 割引価格を抜き出す(存在しない場合有り) var priceSavingNode = document.QuerySelector(".priceBlockSavingsString"); int?priceSaving = null; if (priceSavingNode != null) { string _priceSaving = priceSavingNode.TextContent.Trim(); // 割引価格から値のみ抜き出す Match match = Regex.Match(_priceSaving, @"¥\d{1,3}(,\d{1,3})*\b"); priceSaving = match.Value.Substring(1).PriceToValue(); } // Amazonからの出品が使用可能かどうか調べる string merchantText = document.QuerySelector("#merchant-info").TextContent; // 総合ランキング var rankInfo = document.QuerySelector("#SalesRank > .value").FirstChild; string ranking = rankInfo.TextContent.Trim(); ranking = ranking.Remove(ranking.Length - 1, 1); // 商品在庫状態 string status = document.QuerySelector("#availability > span").TextContent.Trim(); // Amazon出品 > 新品 > 中古の順で表示する価格を決定する int?price = amazonPrice; if (amazonPrice == null) { if (newStockPrice == null) { price = usedStockPrice; } else { price = newStockPrice; } } product.Price = price; product.PriceHistory[DateTime.Now] = price; product.PriceSaving = priceSaving; product.SetStockInfo(newStockPrice, newStockCount, usedStockPrice, usedStockCount); product.MerchantStatus = this.Status(merchantText); product.Ranking = ranking; product.Status = status; return(product); }
private static List <Apartment> ParseApartments(AngleSharp.Dom.IDocument document, string disctrict) { List <Apartment> apartments = new List <Apartment>(); var ads = document.QuerySelectorAll("article.product"); foreach (var ad in ads) { var apartment = new Apartment { Status = Status.Available, District = disctrict }; string idAttribute = ad.GetAttribute("id"); int id = int.Parse(idAttribute.Substring(idAttribute.IndexOf("-") + 1)); apartment.Id = id; var detailElement = ad.QuerySelector("h3 strong"); string street = detailElement.TextContent.Trim(); apartment.Address = street; string[] metadata = null; var element = ad.QuerySelector("p.product__note"); if (element.TextContent.Contains("Pronájem bytu")) { var priceElement = ad.QuerySelector("strong.product__value"); string price = priceElement.TextContent; if (price.Contains("+")) { string[] prices = price.Split('+'); apartment.Rent = decimal.Parse(prices[0].Replace("Kč", "").Replace(".", "").Trim()); apartment.Fees = decimal.Parse(prices[1].Replace("Kč", "").Replace(".", "").Trim()); } metadata = element.TextContent.Replace("Pronájem bytu ", "").Split(','); } if (element.TextContent.Contains("Prodej bytu")) { var priceElement = ad.QuerySelector("strong.product__value"); string price = priceElement.TextContent; if (price.Contains("+")) { string[] prices = price.Split('+'); price = prices[0]; } apartment.PurchasePrice = decimal.Parse(price.Replace("Kč", "").Replace(".", "").Trim()); metadata = element.TextContent.Replace("Prodej bytu ", "").Split(','); } if (metadata.Length == 2) { apartment.Disposition = metadata[0].Trim(); apartment.Area = int.Parse(metadata[1].Replace(" m²", "").Trim()); } element = ad.QuerySelector("p.product__info-text"); apartment.Description = element.TextContent.Trim(); element = ad.QuerySelector("a.product__link"); apartment.DetailsLink = element.GetAttribute("href"); // additional attributes element = ad.QuerySelector("div.product__header span.product__label span.badge"); if (element != null) { switch (element.TextContent) { case "Nabídka Premium uživatele": apartment.IsPremiumOffer = true; break; case "Rezervováno": apartment.Status = Status.Reserved; break; } } apartments.Add(apartment); } return(apartments); }
public async Task Stats(int story_id = 0) { string fullQueryLink = royalroadl_domain + "fiction/" + story_id; var embed = new EmbedBuilder(); StringBuilder error_message = new StringBuilder(); AngleSharp.Dom.IDocument document = null; try { var config = Configuration.Default.WithDefaultLoader(); document = await BrowsingContext.New(config).OpenAsync(fullQueryLink); } catch (Exception ex) { error_message.AppendLine($"Message: {ex.Message}\nSource: searching the site"); } var existElem = document.QuerySelector("div.col-md-12.page-404"); if (existElem != null) { embed.WithErrorColor().WithDescription($"The story with the ID \"{story_id}\" does not exist."); } else { // title var titleElem = document.QuerySelector("h2.font-white"); string titleText = ""; try { titleText = titleElem.Text(); } catch (Exception ex) { error_message.AppendLine($"Message: {ex.Message}\nSource: title"); } // author var authorElem = document.QuerySelector("h4.font-white"); string authorText = ""; try { authorText = authorElem.Text(); } catch (Exception ex) { error_message.AppendLine($"Message: {ex.Message}\nSource: author"); } // cover art var imageElem = document.QuerySelector("img.img-offset"); string imageUrl = ""; try { imageUrl = ((IHtmlImageElement)imageElem).Source; } catch (Exception ex) { error_message.AppendLine($"Message: {ex.Message}\nSource: image"); } var display = new StringBuilder(); display.AppendLine($"**{titleText}** {authorText}\n"); // stats base var div_stats_elems = document.QuerySelectorAll("div.stats-content > div.col-sm-6"); // score var score_parser = new HtmlParser(); var score_p_elem = score_parser.Parse(div_stats_elems[0].InnerHtml); var score_elems = score_p_elem.QuerySelectorAll("span.star"); display.AppendLine("**Scores**"); try { foreach (var score_elm in score_elems) { var s_title = score_elm.GetAttribute("data-original-title"); var s_stars = score_elm.GetAttribute("data-content"); display.AppendLine($"`{s_title}:` {s_stars} 🌟"); } } catch (Exception ex) { error_message.AppendLine($"Message: {ex.Message}\nSource: scores"); } display.AppendLine(""); // count var count_parser = new HtmlParser(); var count_p_elem = score_parser.Parse(div_stats_elems[1].InnerHtml); var count_elems = count_p_elem.QuerySelectorAll("li.bold.uppercase"); display.AppendLine("**Count**"); try { for (var i = 0; i < count_elems.Count();) { var str = $"`{count_elems[i++].Text()}`"; str += $" {count_elems[i++].Text()}"; display.AppendLine(str); } } catch (Exception ex) { error_message.AppendLine($"Message: {ex.Message}\nSource: stats"); } embed.WithOkColor().WithTitle(titleText); embed.WithDescription(display.ToString()); embed.WithUrl(fullQueryLink); embed.WithImageUrl(imageUrl); } try { await Context.Channel.EmbedAsync(embed).ConfigureAwait(false); } catch (Exception ex) { error_message.AppendLine($"Message: {ex.Message}\nSource: sending story message"); } if (!string.IsNullOrWhiteSpace(error_message.ToString().Trim())) { await Context.Channel.EmbedAsync(new EmbedBuilder().WithErrorColor().WithTitle("Error").WithDescription(error_message.ToString())).ConfigureAwait(false); } }