public override HtmlAgilityPack.HtmlNode SelectElement(HtmlAgilityPack.HtmlNode element) { IList <HtmlAgilityPack.HtmlNode> elements = element.QuerySelectorAll(_selectorText); if (elements != null && elements.Count > 0) { return(elements[0]); } return(null); }
protected override ProductPageInfo ParsePage(HtmlNode document) { IEnumerable<HtmlNode> metaTags = document.QuerySelectorAll("meta"); //let if fire a NRE if some element is not found string name = document.FindHiddenField("name").GetAttributeValue("value"); string description = metaTags.First(m => m.GetAttributeValue("property") == "og:description").GetAttributeValue("content"); string imageUrl = metaTags.First(m => m.GetAttributeValue("property") == "og:image").GetAttributeValue("content"); string priceText = document.FindHiddenField("price").GetAttributeValue("value"); return new ProductPageInfo(name, description, Decimal.Parse(priceText, NumberStyles.Currency), imageUrl); }
public IEnumerable<Game> LoadTopGames(HtmlNode doc) { var gamesListItem = doc.QuerySelectorAll("ol.GameTiles li"); foreach (var listItem in gamesListItem) { var productBox = listItem.QuerySelector("img.ProductBox"); var title = listItem.QuerySelector("h2 a"); yield return new Game { ThumbUrl = productBox.Attributes["src"].Value, Title = title.InnerText }; } }
protected override ProductPageInfo ParsePage(HtmlNode document) { var metaTags = document.QuerySelectorAll("meta"); //let if fire NRE if some element is not found string name = metaTags.First(m => m.GetAttributeValue("name") == "title").GetAttributeValue("content"); string description = metaTags.First(m => m.GetAttributeValue("name") == "description").GetAttributeValue("content"); string imageUrl = metaTags.First(m => m.GetAttributeValue("property") == "og:image").GetAttributeValue("content"); var priceHidden = document.FindHiddenField("ADD_CART_ITEM<>salePriceAmt"); if (priceHidden == null) { priceHidden = document.FindHiddenField("ADD_CART_ITEM_ARRAY<>salePriceAmt"); } string priceText = priceHidden.GetAttributeValue("value"); return new ProductPageInfo(name, description, Decimal.Parse(priceText, NumberStyles.Currency), imageUrl); }
protected override ProductPageInfo ParsePage(HtmlNode document) { IEnumerable<HtmlNode> metaTags = document.QuerySelectorAll("meta"); string name = GetValueSomewhereInThePage(document, new[] { "name", "title" }, metaTags); string description = GetValueSomewhereInThePage(document, new[] { "description" }, metaTags); string priceText = GetValueSomewhereInThePage(document, new[] { "price" }, metaTags); string imageUrl = GetValueSomewhereInThePage(document, new[] { "image" }, metaTags); if (!String.IsNullOrWhiteSpace(name) || !String.IsNullOrWhiteSpace(description) || !String.IsNullOrWhiteSpace(priceText) || !String.IsNullOrWhiteSpace(imageUrl)) { decimal price = String.IsNullOrWhiteSpace(priceText) ? 0 : Decimal.Parse(priceText, NumberStyles.Currency); return new ProductPageInfo(name, description, price, imageUrl); } return null; }
public static HtmlAgilityPack.HtmlNode QuerySelector(this HtmlAgilityPack.HtmlNode node, string cssSelector) { return(node.QuerySelectorAll(cssSelector).FirstOrDefault()); }
/// <summary> /// Query selector collection /// </summary> /// <param name="node">node to query</param> /// <param name="query">query string (css selector)</param> /// <returns>HtmlNode results</returns> protected IEnumerable<HtmlNode> QuerySelectorAll(HtmlNode node, string query) { return node.QuerySelectorAll(query); }
public override IList <HtmlAgilityPack.HtmlNode> SelectElements(HtmlAgilityPack.HtmlNode element) { return(element.QuerySelectorAll(_selectorText)); }
/// <summary> /// Create organized topic data using HtmlAgilityPack, with Fizzler query /// selector extensions. /// </summary> /// <param name="document"></param> /// <param name="initialTopic"></param> /// <returns></returns> private static Dictionary<string, string> BuildTopicData(HtmlNode document, string initialTopic) { var topicData = new Dictionary<string, string>(); string topic = initialTopic.ToLower(); var textAggregator = new StringBuilder(); // Get the contents of the main article content node var nodes = document.QuerySelectorAll("#mw-content-text").First().ChildNodes; foreach (var node in nodes) { // Header nodes contain headlines, which form subtopic names if (node.Name.StartsWith("h")) { var headline = node.QuerySelector("span.mw-headline"); if (headline == null) continue; // If we have accumulated any text at this point, add it to the last // subtopic and start a new one. if (textAggregator.Length > 0) AddTextData(topicData, topic, textAggregator); textAggregator.Clear(); topic = headline.InnerText.ToLower(); continue; } // Paragraph nodes are regular subtopic content if (node.Name == "p") { // As long as the paragraph contains data, add it to the subtopic. if (textAggregator.Length > 0) textAggregator.Append(" "); textAggregator.Append(node.InnerText); } } AddTextData(topicData, topic, textAggregator); return topicData; }
protected IEnumerable<HtmlNode> SelectItems(HtmlNode cq, string css) { return cq.QuerySelectorAll(css); }
private static void RemoveAttributesWithName(HtmlNode document, params string[] attributes) { foreach (var attribute in attributes) { var elements = document.QuerySelectorAll(string.Format("[{0}]", attribute)).ToList(); foreach (var element in elements) { element.Attributes.Remove(attribute); } } }
private static List<ProductDisadvantage> GetProductDisadvantages(HtmlNode reviewNode) { return reviewNode.QuerySelectorAll(".cons-cell li").Select(x => new ProductDisadvantage { Feature = WebUtility.HtmlDecode(x.InnerText.Trim()) }).ToList(); }
private RealtimeInfo ParseWebPage(HtmlNode document) { var buses = document.QuerySelectorAll(_searchPath); return ConvertHtmlNodeToBuses(buses); }