public IEnumerable <string> Cards(Uri archetypeUrl) { var cardList = new List <string>(); var archetypeWebPage = _htmlWebPage.Load(archetypeUrl); var tableCollection = archetypeWebPage.DocumentNode .SelectNodes("//table") .Where(t => t.Attributes["class"] != null && t.Attributes["class"].Value.Contains("card-list")) .ToList(); foreach (var tb in tableCollection) { var cardLinks = tb.SelectNodes("./tr/td[position() = 1]/a"); cardList.AddRange(cardLinks.Select(cn => cn.InnerText)); } var furtherResultsUrl = GetFurtherResultsUrl(archetypeWebPage); if (!string.IsNullOrEmpty(furtherResultsUrl)) { if (!furtherResultsUrl.Contains("http")) { furtherResultsUrl = _config.WikiaDomainUrl + furtherResultsUrl; } cardList = cardList.Union(CardsFromFurtherResultsUrl(furtherResultsUrl)).ToList(); } return(cardList); }
public void Given_BanlistType_And_A_BanlistUrl_Should_Return_All_Banlists_GroupedBy_Year() { // Arrange var banlistType = BanlistType.Tcg; var banlistUrl = "http://www.youtube.com"; _banlistWebPage .GetBanlistUrlList(Arg.Any <BanlistType>(), Arg.Any <string>()) .Returns(new Dictionary <string, List <Uri> > { ["2017"] = new List <Uri> { new Uri("http://www.youtube.com") } }); var htmlDocument = new HtmlDocument(); htmlDocument.DocumentNode.InnerHtml = "\"<script>wgArticleId=296,</script>\""; _htmlWebPage.Load(Arg.Any <Uri>()).Returns(htmlDocument); // Act var result = _sut.GetBanlists(banlistType, banlistUrl); // Assert result.Should().NotBeEmpty(); }
public IDictionary <int, List <int> > GetBanlists(BanlistType banlistType, string banlistUrl) { var articleIdsList = new Dictionary <int, List <int> >(); var banlistUrlsByYear = _banlistWebPage.GetBanlistUrlList(banlistType, banlistUrl); foreach (var banlist in banlistUrlsByYear) { var banlistYear = int.Parse(banlist.Key); var articleIds = new List <int>(); foreach (var url in banlist.Value) { var banlistPageHtml = _htmlWebPage.Load(url).DocumentNode.InnerHtml; Regex regex = new Regex("wgArticleId=([^,]*),"); Match match = regex.Match(banlistPageHtml); var wgArticleId = int.Parse(match.Groups[1].Value); articleIds.Add(wgArticleId); } articleIdsList.Add(banlistYear, articleIds); } return(articleIdsList); }
public string FromWebPage(string url) { var archetypeWebPage = _htmlWebPage.Load(_config.WikiaDomainUrl + url); var srcElement = archetypeWebPage.DocumentNode.SelectSingleNode("//img[@class='pi-image-thumbnail']"); var srcAttribute = srcElement?.Attributes?["src"].Value; return(srcAttribute != null?ImageHelper.ExtractImageUrl(srcAttribute) : null); }
public List <SemanticCard> CardsByUrl(string url) { HtmlNode nextLink; var semanticCardList = new List <SemanticCard>(); do { var doc = _htmlWebPage.Load(url); var tableRows = doc.DocumentNode.SelectNodes("//table[@class='sortable wikitable smwtable']/tbody/tr") ?? doc.DocumentNode.SelectNodes("//table[@class='sortable wikitable smwtable card-list']/tbody/tr"); foreach (var row in tableRows) { var semanticCard = new SemanticCard { Name = row.SelectSingleNode("td[position() = 1]")?.InnerText.Trim(), Url = row.SelectSingleNode("td[position() = 1]/a")?.Attributes["href"]?.Value, }; if (!string.IsNullOrWhiteSpace(semanticCard.Name)) { semanticCardList.Add(semanticCard); } } nextLink = doc.DocumentNode.SelectSingleNode("//a[contains(text(), 'Next')]"); if (nextLink != null) { var hrefLink = $"{_config.WikiaDomainUrl}{nextLink.Attributes["href"].Value}"; hrefLink = WebUtility.HtmlDecode(hrefLink); url = hrefLink; } } while (nextLink != null); return(semanticCardList); }
public YugiohCard GetYugiohCard(Uri url) { var htmlDocument = _htmlWebPage.Load(url); return(GetYugiohCard(htmlDocument)); }
public HtmlNode GetBanlistHtmlNode(BanlistType banlistType, string banlistUrl) { return(GetBanlistHtmlNode(banlistType, _htmlWebPage.Load(banlistUrl))); }
public void Load(string url) { CurrentWebPageUri = new Uri(url); _currentWebPage = _htmlWebPage.Load(url); }