public HtmlContent GetContent(string xpath) { if (this.url == null || content == null) { throw new InvalidOperationException("call IContentExtractor.GotoPage first"); } HtmlContent doc = new HtmlContent(); doc.LastUpdatedDate = DateTime.Now; HtmlDocument htmldoc = new HtmlDocument(); htmldoc.LoadHtml(content); HtmlAgilityPack.HtmlNode titleNode = htmldoc.DocumentNode.SelectSingleNode("//title"); if (titleNode != null) { doc.Title = titleNode.InnerText; } if (xpath.ToLower() == "all") { doc.Content = htmldoc.DocumentNode.OuterHtml; } else if (!xpath.StartsWith("/")) { HtmlAgilityPack.HtmlNodeCollection divs = htmldoc.DocumentNode.SelectNodes("//div"); if (divs == null) { return(null); } HtmlNode targetNode = divs.FirstOrDefault(a => a.Id == xpath || (a.Attributes["class"] != null && a.Attributes["class"].Value == xpath)); if (targetNode == null) { if (Logger != null) { Logger.WarnFormat("[Mine Fails] '{1}' container not found - {0}", url, xpath); } return(null); } doc.Content = targetNode.OuterHtml; } else //starts with '//' { var targetNode = htmldoc.DocumentNode.SelectSingleNode(xpath); if (targetNode == null) { if (Logger != null) { Logger.WarnFormat("[Mine Fails] '{1}' container not found - {0}", url, xpath); } return(null); } doc.Content = targetNode.OuterHtml; } doc.Url = url; return(doc); }
private static void SetExpansion(ScrappedCardModel scrappedCard, HtmlNodeCollection table) { var row = table.FirstOrDefault(x => x.ChildNodes[1].InnerText.Contains("Expansion")); if (row != null) scrappedCard.Expansion = row.ChildNodes[2].InnerText.Trim(); }
private static void SetConvertedManaCost(ScrappedCardModel scrappedCard, HtmlNodeCollection table) { var row = table.FirstOrDefault(x => x.ChildNodes[1].InnerText.Contains("Converted Mana Cost")); if (row != null) scrappedCard.ConvertedManaCost = int.Parse(row.ChildNodes[2].InnerText.Trim()); }
private static void SetTypes(ScrappedCardModel scrappedCard, HtmlNodeCollection table) { var row = table.FirstOrDefault(x => x.ChildNodes[1].InnerText.Contains("Types")); if (row != null) scrappedCard.Types = row.ChildNodes[2].InnerText.Trim().Replace(" —", "").Split(' ').ToList(); }
private static void SetPowerAndThoughness(ScrappedCardModel scrappedCard, HtmlNodeCollection table) { var row = table.FirstOrDefault(x => x.ChildNodes[1].InnerText.Contains("P/T")); if (row != null) { var pt = row.ChildNodes[2].InnerText.Trim().Trim('(', ')').Split('/'); scrappedCard.Power = pt[0]; scrappedCard.Thoughness = pt[1]; } }
private static void SetManaCost(ScrappedCardModel scrappedCard, HtmlNodeCollection table) { var row = table.FirstOrDefault(x => x.ChildNodes[1].InnerText.Contains("Mana Cost")); if (row != null) { foreach (var childNode in row.ChildNodes[2].ChildNodes) { if (childNode.Name == "#text") continue; var alt = childNode.GetAttributeValue("alt", ""); if (alt.Contains("CMC")) { if (alt.Length > 4) { scrappedCard.ColorlessMana = alt[3].ToString() + alt[4]; } else { scrappedCard.ColorlessMana = alt[3].ToString(); } } else if (alt.Contains("Color R")) { scrappedCard.RedMana++; } else if (alt.Contains("Color U")) { scrappedCard.BlueMana++; } else if (alt.Contains("Color G")) { scrappedCard.GreenMana++; } else if (alt.Contains("Color W")) { scrappedCard.WhiteMana++; } else if (alt.Contains("Color B")) { scrappedCard.BlackMana++; } } } }
protected override bool FilterPage(HtmlNodeCollection pictureHtmlNode, ref int pageNum) { if (base.FilterPage(pictureHtmlNode, ref pageNum)) { return true; } if (pictureHtmlNode.Count() <= 1) { var picNode = pictureHtmlNode.FirstOrDefault(); if (picNode == null) { pageNum = 500; return true; } var picturePathName = picNode.Attributes["src"].Value; if (picturePathName.Contains("bctp_28.gif")) { pageNum = 500; return true; } } return false; }
internal static HtmlNode GetNodeForUrl(HtmlNodeCollection nodes, string url) { var hrefNode = nodes.FirstOrDefault(n => n.GetAttributeValue("href", "").Contains(url)); return hrefNode; }
private static string GetMetaProperty(HtmlNodeCollection nodes, string prop) => nodes.FirstOrDefault(x => x.Attributes["property"]?.Value == prop)?.Attributes["content"]?.Value;
private HtmlNode GetInfoElem(HtmlNodeCollection nodes, string title) { return nodes.FirstOrDefault(n => n.SelectNodes("span[text() = '" + title + " :']") != null); }