private static List <EventItem> extractEvents(AngleSharp.Dom.IDocument document)
        {
            var elements = document.QuerySelectorAll(".c-content-three table tr");

            var eventItems = new List <EventItem>();

            foreach (var element in elements)
            {
                var content = element.QuerySelectorAll("td");
                if (
                    content.Length != 2 ||
                    String.IsNullOrWhiteSpace(content[0].TextContent) ||
                    String.IsNullOrWhiteSpace(content[1].TextContent)
                    )
                {
                    continue;
                }

                var title = content[0].TextContent.Trim();
                var date  = content[1].TextContent.Trim();

                eventItems.Add(new EventItem(date, title));
            }

            return(eventItems);
        }
示例#2
0
        /// <summary>
        /// Определяет текущуюю страницу из параметра внутри DocumentElement.BaseUrl.Query
        /// Получает ссылку на следующую страницу из пагинатора
        /// </summary>
        /// <param name="document"></param>
        /// <returns>Возвращает ссылку на следующую страницу. String.Empty если страница последняя или единственная.</returns>
        private static string GetNextPageUrl(AngleSharp.Dom.IDocument document)
        {
            string nextPageUrl = "";

            if (document?.BaseUrl?.Query == null)
            {
                return(nextPageUrl);
            }

            var nextPageButton = document.QuerySelector("span[data-marker='pagination-button/next']");

            //проверяем наличие кнопки "След." на странице
            if (nextPageButton == null)
            {
                return(nextPageUrl);
            }

            //т.к. в самой кнопке "След." url не хранится нужно лезть скрытый пагинатор с ссылками
            //сначала проверяем текущий query в url на наличие параметра текущей страницы, если нет по умолчанию 1
            ushort currentPageId = ushort.TryParse(
                HttpUtility.ParseQueryString(document.DocumentElement.BaseUrl.Query).Get("p"),
                out currentPageId) ? currentPageId : (ushort)1;

            //находим в скрытом пагинаторе ссылку на следующую страницу
            var pageButton = document.QuerySelectorAll("a.pagination-page").Where(e => e.InnerHtml == $"{currentPageId + 1}").FirstOrDefault();

            if (pageButton != null)
            {
                nextPageUrl = document.Origin + pageButton.GetAttribute("href");
            }

            return(nextPageUrl);
        }
        public IEnumerable <string> ExtractTextSections(AngleSharp.Dom.IDocument doc, bool isMobile)
        {
            var cells = doc.QuerySelectorAll(Constants.Wikipedia.Paragraphs.PARAGRAPHS);
            IEnumerable <string> textSections = Enumerable.Empty <string>();

            if (cells != null && cells.Length > 0)
            {
                textSections = cells.Select(m => m.TextContent);
            }
            return(textSections);
        }
        private static void ExtractCategoriesFromDocument(List <Category> categories, AngleSharp.Dom.IDocument document)
        {
            var checkboxes = document.QuerySelectorAll("input").Where(m =>
            {
                return(m.HasAttribute("type") && m.GetAttribute("type") == "checkbox" &&
                       m.HasAttribute("name") && m.GetAttribute("name") == "abfart[]");
            });

            categories.Clear();
            foreach (var checkbox in checkboxes)
            {
                var elementId = checkbox.GetAttribute("id");
                var value     = checkbox.GetAttribute("value");

                var label    = document.QuerySelectorAll("label").First(m => m.HasAttribute("for") && m.GetAttribute("for") == elementId);
                var category = new Category {
                    Id = value, Name = label.InnerHtml
                };
                categories.Add(category);
            }
        }
示例#5
0
    public Topic[][] GetPack(string link)
    {
        if (!link.ToLower().EndsWith("/print"))
        {
            if (link.EndsWith("/"))
            {
                link += "print";
            }
            else
            {
                link += "/print";
            }
        }
        List <Topic> Themes = new List <Topic>();
        var          config = Configuration.Default.WithDefaultLoader();

        AngleSharp.Dom.IDocument document = BrowsingContext.New(config).OpenAsync(link).Result;
        IEnumerable <AngleSharp.Dom.IElement> themeBlocks = document.QuerySelectorAll("div").Where(m => m.GetAttribute("style") == "margin-top:20px;");

        foreach (var themeBlock in themeBlocks)
        {
            IEnumerable <AngleSharp.Dom.IElement> ps = themeBlock.QuerySelectorAll("p").Where(p =>
                                                                                              (!p.QuerySelector("i")?.TextContent.Contains("Источники:") ?? true) &&
                                                                                              (!p.QuerySelector("i")?.TextContent.Contains("Комментарий:") ?? true));

            var    collapses = themeBlock.QuerySelectorAll("div").Where(m => m.ClassName == "collapsible collapsed").ToList();
            string themeName = themeBlock.TextContent.Split('\n')?[2] ?? "Неизвестно";

            var size      = ps.Count() / 2;
            var questions = new Question[size];
            for (int i = 0; i < size; i++)
            {
                string objective = ps.ElementAt(i * 2).TextContent.Replace("\n", " ").Trim();
                string answer    = ps.ElementAt(i * 2 + 1).TextContent.Replace("\n", " ").Replace("Ответ:", "").Trim();
                questions[i] = new Question(objective, answer, (i + 1) * 10);
            }

            Topic theme = new Topic(themeName, questions);

            Themes.Add(theme);
        }


        return(split(Themes.ToArray()));
    }
        private static void ExtractAvailableYearsFromDocument(List <string> years, AngleSharp.Dom.IDocument document)
        {
            var select  = document.QuerySelectorAll("select").First(m => m.HasAttribute("name") && m.GetAttribute("name") == "vJ");
            var options = select.QuerySelectorAll("option");

            years.Clear();

            foreach (var option in options)
            {
                var value = option.GetAttribute("value");
                if (string.IsNullOrWhiteSpace(value))
                {
                    continue;
                }

                years.Add(value);
            }
        }
        private static List <ScheduleItem> extractSchedule(AngleSharp.Dom.IDocument document)
        {
            var elements = document.QuerySelectorAll(".stream-plan>table>tbody>tr>td");

            var scheduleItems = new List <ScheduleItem>();

            foreach (var element in elements)
            {
                if (element.Attributes.GetNamedItem("free-streaming-slot") != null)
                {
                    continue;
                }

                var content = element.QuerySelectorAll("p");

                if (content.Length < 1)
                {
                    continue;
                }

                var title     = content.ElementAtOrDefault(0).TextContent.Trim();
                var caster    = content.ElementAtOrDefault(1)?.TextContent.Trim() ?? "";
                var date      = element.Attributes.GetNamedItem("data-date")?.Value;
                var hourStart = element.Attributes.GetNamedItem("data-hour-start")?.Value;
                var hourEnd   = element.Attributes.GetNamedItem("data-hour-end")?.Value;
                var cancelled = element.ClassList.Contains("cancelled-streaming-slot");

                var dateParts = date.Split('-');
                var year      = dateParts[0];
                var month     = dateParts[1].Length == 2 ? dateParts[1] : dateParts[1].Insert(0, "0");
                var day       = dateParts[2].Length == 2 ? dateParts[2] : dateParts[2].Insert(0, "0");
                hourStart = hourStart.Length == 2 ? hourStart : hourStart.Insert(0, "0");
                hourEnd   = hourEnd.Length == 2 ? hourEnd : hourEnd.Insert(0, "0");

                var startOverlap = false;
                var endOverlap   = false;

                if (hourStart == "24")
                {
                    startOverlap = true;
                    hourStart    = "00";
                }

                if (hourEnd == "24")
                {
                    endOverlap = true;
                    hourEnd    = "00";
                }


                var tz          = TimeZoneInfo.FindSystemTimeZoneById("Europe/Berlin");
                var offsetHours = tz.GetUtcOffset(DateTime.UtcNow).Hours;

                var startDate = DateTime.Parse($"{year}-{month}-{day}T{hourStart}:00:00.000+{offsetHours}:00", CultureInfo.InvariantCulture);
                var endDate   = DateTime.Parse($"{year}-{month}-{day}T{hourEnd}:00:00.000+{offsetHours}:00", CultureInfo.InvariantCulture);

                startDate = startOverlap ? startDate.AddDays(1) : startDate;
                endDate   = endOverlap ? endDate.AddDays(1) : endDate;

                scheduleItems.Add(new ScheduleItem(title, caster, startDate, endDate, cancelled));
            }

            return(scheduleItems);
        }
示例#8
0
        public static bool IsEnd(this AngleSharp.Dom.IDocument document)
        {
            var items = document.QuerySelectorAll("*").ToList().Where(_ => _.TextContent.Contains("Страницы с указанным вами адресом в каталоге не существует."));

            return(items.Any());
        }
        private async Task GetProjectNuGetDataAsync(IDocument input, IExecutionContext context, ConcurrentDictionary <string, object> metadata)
        {
            List <Package>         packageData = new List <Package>();
            IReadOnlyList <string> packages    = input.GetList(SiteKeys.NuGet, Array.Empty <string>());

            foreach (string package in packages.Where(x => !string.IsNullOrWhiteSpace(x)))
            {
                context.LogInformation($"Getting NuGet data for {package}");
                try
                {
                    IBrowsingContext         browsingContext = BrowsingContext.New(AngleSharpConfig);
                    AngleSharp.Dom.IDocument document        = await browsingContext.OpenAsync($"https://www.nuget.org/packages/{package}");

                    if (document.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        context.LogWarning($"Bad status code for {package}: {document.StatusCode}");
                    }
                    else if (document == null)
                    {
                        context.LogWarning($"Could not get document for {package}");
                    }
                    else
                    {
                        Package data = new Package
                        {
                            Id = package
                        };

                        // Get statistics
                        AngleSharp.Dom.IElement statistics = document
                                                             .QuerySelectorAll(".package-details-info h2")
                                                             .First(x => x.TextContent == "Statistics")
                                                             .NextElementSibling;
                        data.TotalDownloads = statistics.Children
                                              .First(x => x.TextContent.Contains("total downloads"))
                                              .TextContent.Trim().Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)[0];
                        data.PerDayDownloads = statistics.Children
                                               .First(x => x.TextContent.Contains("per day"))
                                               .TextContent.Trim().Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)[0];

                        // Get versions
                        data.Versions = document
                                        .QuerySelectorAll("#version-history table tbody tr")
                                        .Select(x => new PackageVersion(x))
                                        .ToList();

                        // Add the data
                        packageData.Add(data);
                    }
                }
                catch (Exception ex)
                {
                    context.LogWarning($"Error getting NuGet data for {package}: {ex.Message}");
                }
            }

            if (packageData.Count > 0)
            {
                metadata.TryAdd("NuGetPackages", packageData);
            }
        }
示例#10
0
        private async Task <List <CarModelDto> > GetBrandData(AngleSharp.Dom.IDocument document, string carBrand)
        {
            var config = Configuration.Default.WithDefaultLoader().WithCss();

            return(await GenerateModelList(document.QuerySelectorAll("ul.modelsLinks li a"), carBrand));
        }
        // 商品ページのHTMLから商品の更新可能性のある情報を取得、更新する
        public async Task <Product> Update(Product product, AngleSharp.Dom.IDocument document = null)
        {
            // 新品/中古品のクエリを読み出す New/Used
            int newStockCount  = 0;
            int?newStockPrice  = null;
            int usedStockCount = 0;
            int?usedStockPrice = null;

            // ドキュメントが指定されていない場合、新しいドキュメントを取りに行く
            if (document == null)
            {
                document = await Tools.GetDocument(product.ASIN);
            }

            var stockNodes = document.QuerySelectorAll("#olp_feature_div > div > span:not(.a-color-base)");

            if (stockNodes.Length != 0)
            {
                foreach (var stockNode in stockNodes)
                {
                    // 新品のクエリだけを読み出し、在庫と最低価格を抜き出す(存在しない場合有り)
                    if (stockNode.QuerySelector("a").TextContent.IndexOf("新品の出品:") > -1)
                    {
                        var newStock = stockNode;
                        newStockCount = Convert.ToInt32(newStock.QuerySelector("a").TextContent.Trim().Replace("新品の出品:", ""));
                        newStockPrice = newStock.QuerySelector("span.a-color-price").TextContent.PriceToValue();
                    }

                    // 中古品のクエリだけを読み出し、在庫と最低価格を抜き出す(存在しない場合有り)
                    if (stockNode.QuerySelector("a").TextContent.IndexOf("中古品の出品:") > -1)
                    {
                        var usedStock = stockNode;
                        usedStockCount = Convert.ToInt32(usedStock.QuerySelector("a").TextContent.Trim().Replace("中古品の出品:", ""));
                        usedStockPrice = usedStock.QuerySelector("span.a-color-price").TextContent.PriceToValue();
                    }
                }
            }
            else
            {
                stockNodes = document.QuerySelectorAll("#olp_feature_div > div > a");
                if (stockNodes.Length != 0)
                {
                    // 取得エラー (在庫表記がおかしい)
                    newStockCount  = -1;
                    newStockPrice  = null;
                    usedStockCount = -1;
                    usedStockPrice = null;
                }
                else
                {
                    // 新品/中古品表記が無い = 在庫が無い
                    newStockCount  = 0;
                    newStockPrice  = null;
                    usedStockCount = 0;
                    usedStockPrice = null;
                }
            }



            // 現在価格を抜き出す
            var amazonPriceNode = document.QuerySelector("#priceblock_ourprice");
            int?amazonPrice     = null;

            if (amazonPriceNode != null)
            {
                amazonPrice = amazonPriceNode.TextContent.PriceToValue();
            }

            // 割引価格を抜き出す(存在しない場合有り)
            var priceSavingNode = document.QuerySelector(".priceBlockSavingsString");
            int?priceSaving     = null;

            if (priceSavingNode != null)
            {
                string _priceSaving = priceSavingNode.TextContent.Trim();
                // 割引価格から値のみ抜き出す
                Match match = Regex.Match(_priceSaving, @"¥\d{1,3}(,\d{1,3})*\b");
                priceSaving = match.Value.Substring(1).PriceToValue();
            }

            // Amazonからの出品が使用可能かどうか調べる
            string merchantText = document.QuerySelector("#merchant-info").TextContent;
            // 総合ランキング
            var    rankInfo = document.QuerySelector("#SalesRank > .value").FirstChild;
            string ranking  = rankInfo.TextContent.Trim();

            ranking = ranking.Remove(ranking.Length - 1, 1);
            // 商品在庫状態
            string status = document.QuerySelector("#availability > span").TextContent.Trim();

            // Amazon出品 > 新品 > 中古の順で表示する価格を決定する
            int?price = amazonPrice;

            if (amazonPrice == null)
            {
                if (newStockPrice == null)
                {
                    price = usedStockPrice;
                }
                else
                {
                    price = newStockPrice;
                }
            }

            product.Price = price;
            product.PriceHistory[DateTime.Now] = price;
            product.PriceSaving = priceSaving;
            product.SetStockInfo(newStockPrice, newStockCount, usedStockPrice, usedStockCount);
            product.MerchantStatus = this.Status(merchantText);
            product.Ranking        = ranking;
            product.Status         = status;

            return(product);
        }
        private static List <Apartment> ParseApartments(AngleSharp.Dom.IDocument document, string disctrict)
        {
            List <Apartment> apartments = new List <Apartment>();

            var ads = document.QuerySelectorAll("article.product");

            foreach (var ad in ads)
            {
                var apartment = new Apartment
                {
                    Status   = Status.Available,
                    District = disctrict
                };

                string idAttribute = ad.GetAttribute("id");
                int    id          = int.Parse(idAttribute.Substring(idAttribute.IndexOf("-") + 1));
                apartment.Id = id;

                var    detailElement = ad.QuerySelector("h3 strong");
                string street        = detailElement.TextContent.Trim();
                apartment.Address = street;

                string[] metadata = null;
                var      element  = ad.QuerySelector("p.product__note");
                if (element.TextContent.Contains("Pronájem bytu"))
                {
                    var    priceElement = ad.QuerySelector("strong.product__value");
                    string price        = priceElement.TextContent;
                    if (price.Contains("+"))
                    {
                        string[] prices = price.Split('+');
                        apartment.Rent = decimal.Parse(prices[0].Replace("Kč", "").Replace(".", "").Trim());
                        apartment.Fees = decimal.Parse(prices[1].Replace("Kč", "").Replace(".", "").Trim());
                    }

                    metadata = element.TextContent.Replace("Pronájem bytu ", "").Split(',');
                }
                if (element.TextContent.Contains("Prodej bytu"))
                {
                    var    priceElement = ad.QuerySelector("strong.product__value");
                    string price        = priceElement.TextContent;
                    if (price.Contains("+"))
                    {
                        string[] prices = price.Split('+');
                        price = prices[0];
                    }
                    apartment.PurchasePrice = decimal.Parse(price.Replace("Kč", "").Replace(".", "").Trim());

                    metadata = element.TextContent.Replace("Prodej bytu ", "").Split(',');
                }

                if (metadata.Length == 2)
                {
                    apartment.Disposition = metadata[0].Trim();
                    apartment.Area        = int.Parse(metadata[1].Replace(" m²", "").Trim());
                }

                element = ad.QuerySelector("p.product__info-text");
                apartment.Description = element.TextContent.Trim();

                element = ad.QuerySelector("a.product__link");
                apartment.DetailsLink = element.GetAttribute("href");

                // additional attributes
                element = ad.QuerySelector("div.product__header span.product__label span.badge");
                if (element != null)
                {
                    switch (element.TextContent)
                    {
                    case "Nabídka Premium uživatele":
                        apartment.IsPremiumOffer = true;
                        break;

                    case "Rezervováno":
                        apartment.Status = Status.Reserved;
                        break;
                    }
                }

                apartments.Add(apartment);
            }

            return(apartments);
        }
示例#13
0
        public async Task Stats(int story_id = 0)
        {
            string        fullQueryLink = royalroadl_domain + "fiction/" + story_id;
            var           embed         = new EmbedBuilder();
            StringBuilder error_message = new StringBuilder();

            AngleSharp.Dom.IDocument document = null;
            try
            {
                var config = Configuration.Default.WithDefaultLoader();
                document = await BrowsingContext.New(config).OpenAsync(fullQueryLink);
            }
            catch (Exception ex)
            {
                error_message.AppendLine($"Message: {ex.Message}\nSource: searching the site");
            }

            var existElem = document.QuerySelector("div.col-md-12.page-404");

            if (existElem != null)
            {
                embed.WithErrorColor().WithDescription($"The story with the ID \"{story_id}\" does not exist.");
            }
            else
            {
                //  title
                var    titleElem = document.QuerySelector("h2.font-white");
                string titleText = "";
                try
                {
                    titleText = titleElem.Text();
                }
                catch (Exception ex)
                {
                    error_message.AppendLine($"Message: {ex.Message}\nSource: title");
                }

                //  author
                var    authorElem = document.QuerySelector("h4.font-white");
                string authorText = "";
                try
                {
                    authorText = authorElem.Text();
                }
                catch (Exception ex)
                {
                    error_message.AppendLine($"Message: {ex.Message}\nSource: author");
                }

                //  cover art
                var    imageElem = document.QuerySelector("img.img-offset");
                string imageUrl  = "";
                try
                {
                    imageUrl = ((IHtmlImageElement)imageElem).Source;
                }
                catch (Exception ex)
                {
                    error_message.AppendLine($"Message: {ex.Message}\nSource: image");
                }

                var display = new StringBuilder();
                display.AppendLine($"**{titleText}** {authorText}\n");

                //  stats base
                var div_stats_elems = document.QuerySelectorAll("div.stats-content > div.col-sm-6");

                //  score
                var score_parser = new HtmlParser();
                var score_p_elem = score_parser.Parse(div_stats_elems[0].InnerHtml);
                var score_elems  = score_p_elem.QuerySelectorAll("span.star");
                display.AppendLine("**Scores**");
                try
                {
                    foreach (var score_elm in score_elems)
                    {
                        var s_title = score_elm.GetAttribute("data-original-title");
                        var s_stars = score_elm.GetAttribute("data-content");

                        display.AppendLine($"`{s_title}:` {s_stars} 🌟");
                    }
                }
                catch (Exception ex)
                {
                    error_message.AppendLine($"Message: {ex.Message}\nSource: scores");
                }

                display.AppendLine("");

                //  count
                var count_parser = new HtmlParser();
                var count_p_elem = score_parser.Parse(div_stats_elems[1].InnerHtml);
                var count_elems  = count_p_elem.QuerySelectorAll("li.bold.uppercase");
                display.AppendLine("**Count**");
                try
                {
                    for (var i = 0; i < count_elems.Count();)
                    {
                        var str = $"`{count_elems[i++].Text()}`";
                        str += $" {count_elems[i++].Text()}";
                        display.AppendLine(str);
                    }
                }
                catch (Exception ex)
                {
                    error_message.AppendLine($"Message: {ex.Message}\nSource: stats");
                }

                embed.WithOkColor().WithTitle(titleText);
                embed.WithDescription(display.ToString());
                embed.WithUrl(fullQueryLink);
                embed.WithImageUrl(imageUrl);
            }

            try
            {
                await Context.Channel.EmbedAsync(embed).ConfigureAwait(false);
            }
            catch (Exception ex)
            {
                error_message.AppendLine($"Message: {ex.Message}\nSource: sending story message");
            }

            if (!string.IsNullOrWhiteSpace(error_message.ToString().Trim()))
            {
                await Context.Channel.EmbedAsync(new EmbedBuilder().WithErrorColor().WithTitle("Error").WithDescription(error_message.ToString())).ConfigureAwait(false);
            }
        }