public DraftRatings Scrape(string setFilter = "") { var allCardsNames = allCards.Select(i => i.name).ToArray(); var ret = new DraftRatings(); foreach (var set in modelBySet.Where(i => setFilter == "" || i.Key == setFilter)) { var setInfo = new DraftRatingScraperResultForSet(); foreach (var c in set.Value.DictUrlPartColor) { var draftRatings = new List <DraftRating>(); var urlFormatted = string.Format(UrlToScrapeModel.UrlTemplate, set.Value.UrlPartSet, c.Value); HtmlWeb hw = new HtmlWeb(); HtmlDocument doc = hw.Load(urlFormatted); var postContentSelector = "//div[contains(concat(' ', normalize-space(@class), ' '), ' entry-content ')]"; var elemsH2 = doc.DocumentNode.SelectNodes($"{postContentSelector}/h2"); var elemsH1 = doc.DocumentNode.SelectNodes($"{postContentSelector}/h1"); var elemsCards = ((elemsH1?.Count ?? 0) > (elemsH2?.Count ?? 0)) ? elemsH1 : elemsH2; var elemsCardName = elemsCards.ToArray(); foreach (var elemCardName in elemsCardName) { //try //{ var cardName = WebUtility.HtmlDecode(elemCardName.InnerText).Replace("’", "'").Trim() // TYPOS ON THE WEBSITE TO MAINTAIN //GRN .Replace("DEAFENING CLARION", "Deafening Clarion") // RNA .Replace("Sagittar's Volley", "Sagittars' Volley") // WAR .Replace("Sarkhan, the Masterless", "Sarkhan the Masterless") // M20 .Replace("Gruesome Scourge", "Gruesome Scourger") //ELD .Replace("Ardvenale", "Ardenvale") .Replace("Lochthwain Paladin", "Locthwain Paladin") .Replace("Bake Into a Pie", "Bake into a Pie") .Replace("Edgewall Inkeeper", "Edgewall Innkeeper") .Replace("Torbran, Thane of the Red Fell", "Torbran, Thane of Red Fell"); // Skip those elements that don't correspond to actual cards if (allCardsNames.Contains(cardName) == false) { if (cardName.Contains("LSV") == false && cardName != "Ratings Scale" && cardName != "Artifacts" && cardName != "Gates/Shocks" && cardName != "Lockets" && cardName.StartsWith("Top ") == false && cardName.StartsWith("Lifegain Lands ") == false && cardName != "Temples" && cardName.EndsWith("Reviews") == false && cardName.EndsWith("Rankings") == false && cardName.StartsWith("Most Important") == false && cardName != "Dimir Locket, Golgari Locket, Izzet Locket, Selesnya Locket" && cardName != "Boros Guildgate, Izzet Guildgate, Golgari Guildgate, Dimir Guildgate, Selesnya Guildgate" && cardName != "Boros" && cardName != "Azorius" && cardName != "Dimir" && cardName != "Gruul" && cardName != "Selesnya" && cardName != "Simic" && cardName != "Orzhov" && cardName != "Rakdos" && cardName != "Golgari" && cardName != "Izzet") { System.Diagnostics.Debugger.Break(); } continue; } var nextSibling = elemCardName.NextSibling; string rating = ""; string desc = ""; var lookingForDesc = true; while (rating == "" || lookingForDesc) { if ((nextSibling.Name == "h3" || nextSibling.Name == "h4") && nextSibling.InnerText.StartsWith("Limited:")) { rating = WebUtility.HtmlDecode(nextSibling.InnerText).Replace("Limited:", "").Trim(); } if (nextSibling.Name == "p" || nextSibling.Name == "ul") { desc += $"{WebUtility.HtmlDecode(nextSibling.InnerText).Trim()}{Environment.NewLine}{Environment.NewLine}"; } nextSibling = nextSibling.NextSibling; //try //{ lookingForDesc = nextSibling.Name == "#text" || nextSibling.Name == "p" || nextSibling.Name == "ul" || nextSibling.Name == "h3"; lookingForDesc &= nextSibling.NextSibling != null && nextSibling.InnerText.StartsWith("Tags:") == false; //} //catch (Exception ex) //{ // System.Diagnostics.Debugger.Break(); //} } // Special treatment if (cardName == "Roving Keep") { rating = "1.5"; } draftRatings.Add(new DraftRating { CardName = cardName, Rating = rating, Description = desc.Trim(), }); //} //catch (Exception ex) //{ // System.Diagnostics.Debugger.Break(); //} } setInfo.Ratings = setInfo.Ratings.Union(draftRatings).ToArray(); if (c.Key.Length == 1) { var texts = doc.DocumentNode.SelectNodes($"{postContentSelector}/p | {postContentSelector}/h4 | {postContentSelector}/h3") .Select(i => i.InnerText.Trim()) .SelectMany(i => i.Split('\n').Select(x => x.Trim())) .ToArray(); var top5 = texts .Where(i => i.StartsWith("1. ") || i.StartsWith("2. ") || i.StartsWith("3. ") || i.StartsWith("4. ") || i.StartsWith("5. ")) .Select(i => new DraftRatingTopCard(Convert.ToInt32(i.Substring(0, 1)), WebUtility.HtmlDecode(i.Substring(2, i.Length - 2)).Trim())) .ToArray(); setInfo.TopCommonCardsByColor[c.Key] = top5; } } ret.RatingsBySet.Add(set.Key, setInfo); } // Added manually because Missing foreach (var set in ret.RatingsBySet) { set.Value.Ratings = set.Value.Ratings.Union(manualRatings[set.Key]).ToArray(); } return(ret); }
public DraftRatings Scrape(string setFilter = "") { var allCardsNames = allCards.Select(i => i.name).ToArray(); var ret = new DraftRatings(); var draftSimCardList = new List <DraftSimCard>(); foreach (var set in sets.Where(i => setFilter == "" || i == setFilter)) { var draftRatings = new List <DraftRating>(); foreach (var fileTemplate in jsFilesPerSet[set]) { var urlFormatted = string.Format(UrlToScrapeModel.UrlTemplate, fileTemplate); HttpClient client = new HttpClient(); var response = client.GetAsync(urlFormatted).Result; if (response.IsSuccessStatusCode) { string jsonData = response.Content.ReadAsStringAsync().Result; jsonData = jsonData.Trim(); try { jsonData = jsonData.Substring(jsonData.IndexOf("[")); //this comes through as a javascript assignment. remove anything prior to the start of the json array } catch { System.Diagnostics.Debugger.Break(); } if (jsonData.EndsWith(";")) //get rid of the ending javascript ; also { jsonData = jsonData.Remove(jsonData.Length - 1); } draftSimCardList = JsonConvert.DeserializeObject <List <DraftSimCard> >(jsonData); foreach (var card in draftSimCardList) { card.name = card.name.Replace("_", " "); if (allCardsNames.Contains(card.name) == false) { Console.WriteLine($"ACK. {card.name} not found in list"); continue; } draftRatings.Add(new DraftRating { CardName = card.name, Rating = card.myrating, Description = "" }); } } } var setinfo = new DraftRatingScraperResultForSet { Ratings = draftRatings }; for (int colorIndex = 0; colorIndex < colors.Length; colorIndex++) { var top5Colors = draftSimCardList .Where(i => i.colorsort == colorIndex && i.rarity.ToLower() == "c") .OrderByDescending(i => i.myrating) .Take(5) .Select((i, idx) => new DraftRatingTopCard(idx + 1, i.name)) .ToArray(); setinfo.TopCommonCardsByColor.Add(colors[colorIndex], top5Colors); } ret.RatingsBySet.Add(set, setinfo); } return(ret); }