Пример #1
0
        public override HtmlAgilityPack.HtmlNode SelectElement(HtmlAgilityPack.HtmlNode element)
        {
            IList <HtmlAgilityPack.HtmlNode> elements = element.QuerySelectorAll(_selectorText);

            if (elements != null && elements.Count > 0)
            {
                return(elements[0]);
            }
            return(null);
        }
        protected override ProductPageInfo ParsePage(HtmlNode document)
        {
            IEnumerable<HtmlNode> metaTags = document.QuerySelectorAll("meta");

            //let if fire a NRE if some element is not found
            string name = document.FindHiddenField("name").GetAttributeValue("value");
            string description = metaTags.First(m => m.GetAttributeValue("property") == "og:description").GetAttributeValue("content");
            string imageUrl = metaTags.First(m => m.GetAttributeValue("property") == "og:image").GetAttributeValue("content");
            string priceText = document.FindHiddenField("price").GetAttributeValue("value");

            return new ProductPageInfo(name, description, Decimal.Parse(priceText, NumberStyles.Currency), imageUrl);
        }
 public IEnumerable<Game> LoadTopGames(HtmlNode doc)
 {
     var gamesListItem = doc.QuerySelectorAll("ol.GameTiles li");
     foreach (var listItem in gamesListItem)
     {
         var productBox = listItem.QuerySelector("img.ProductBox");
         var title = listItem.QuerySelector("h2 a");
         yield return new Game
         {
             ThumbUrl = productBox.Attributes["src"].Value,
             Title = title.InnerText
         };
     }
 }
        protected override ProductPageInfo ParsePage(HtmlNode document)
        {
            var metaTags = document.QuerySelectorAll("meta");

            //let if fire NRE if some element is not found
            string name = metaTags.First(m => m.GetAttributeValue("name") == "title").GetAttributeValue("content");
            string description = metaTags.First(m => m.GetAttributeValue("name") == "description").GetAttributeValue("content");
            string imageUrl = metaTags.First(m => m.GetAttributeValue("property") == "og:image").GetAttributeValue("content");

            var priceHidden = document.FindHiddenField("ADD_CART_ITEM<>salePriceAmt");

            if (priceHidden == null)
            {
                priceHidden = document.FindHiddenField("ADD_CART_ITEM_ARRAY<>salePriceAmt");
            }

            string priceText = priceHidden.GetAttributeValue("value");

            return new ProductPageInfo(name, description, Decimal.Parse(priceText, NumberStyles.Currency), imageUrl);
        }
        protected override ProductPageInfo ParsePage(HtmlNode document)
        {
            IEnumerable<HtmlNode> metaTags = document.QuerySelectorAll("meta");

            string name = GetValueSomewhereInThePage(document, new[] { "name", "title" }, metaTags);
            string description = GetValueSomewhereInThePage(document, new[] { "description" }, metaTags);
            string priceText = GetValueSomewhereInThePage(document, new[] { "price" }, metaTags);
            string imageUrl = GetValueSomewhereInThePage(document, new[] { "image" }, metaTags);

            if (!String.IsNullOrWhiteSpace(name)
                || !String.IsNullOrWhiteSpace(description)
                || !String.IsNullOrWhiteSpace(priceText)
                || !String.IsNullOrWhiteSpace(imageUrl))
            {
                decimal price = String.IsNullOrWhiteSpace(priceText) ? 0 : Decimal.Parse(priceText, NumberStyles.Currency);
                return new ProductPageInfo(name, description, price, imageUrl);
            }

            return null;
        }
Пример #6
0
 public static HtmlAgilityPack.HtmlNode QuerySelector(this HtmlAgilityPack.HtmlNode node, string cssSelector)
 {
     return(node.QuerySelectorAll(cssSelector).FirstOrDefault());
 }
Пример #7
0
 /// <summary>
 /// Query selector collection
 /// </summary>
 /// <param name="node">node to query</param>
 /// <param name="query">query string (css selector)</param>
 /// <returns>HtmlNode results</returns>
 protected IEnumerable<HtmlNode> QuerySelectorAll(HtmlNode node, string query)
 {
     return node.QuerySelectorAll(query);
 }
Пример #8
0
 public override IList <HtmlAgilityPack.HtmlNode> SelectElements(HtmlAgilityPack.HtmlNode element)
 {
     return(element.QuerySelectorAll(_selectorText));
 }
        /// <summary>
        /// Create organized topic data using HtmlAgilityPack, with Fizzler query
        /// selector extensions.
        /// </summary>
        /// <param name="document"></param>
        /// <param name="initialTopic"></param>
        /// <returns></returns>
        private static Dictionary<string, string> BuildTopicData(HtmlNode document, string initialTopic)
        {
            var topicData = new Dictionary<string, string>();
            string topic = initialTopic.ToLower();
            var textAggregator = new StringBuilder();

            // Get the contents of the main article content node
            var nodes = document.QuerySelectorAll("#mw-content-text").First().ChildNodes;
            foreach (var node in nodes)
            {
                // Header nodes contain headlines, which form subtopic names
                if (node.Name.StartsWith("h"))
                {
                    var headline = node.QuerySelector("span.mw-headline");
                    if (headline == null)
                        continue;

                    // If we have accumulated any text at this point, add it to the last
                    // subtopic and start a new one.
                    if (textAggregator.Length > 0)
                        AddTextData(topicData, topic, textAggregator);

                    textAggregator.Clear();
                    topic = headline.InnerText.ToLower();
                    continue;
                }

                // Paragraph nodes are regular subtopic content
                if (node.Name == "p")
                {
                    // As long as the paragraph contains data, add it to the subtopic.
                    if (textAggregator.Length > 0)
                        textAggregator.Append(" ");
                    textAggregator.Append(node.InnerText);
                }
            }

            AddTextData(topicData, topic, textAggregator);

            return topicData;
        }
Пример #10
0
 protected IEnumerable<HtmlNode> SelectItems(HtmlNode cq, string css)
 {
     return cq.QuerySelectorAll(css);
 }
Пример #11
0
 private static void RemoveAttributesWithName(HtmlNode document, params string[] attributes)
 {
     foreach (var attribute in attributes)
     {
         var elements = document.QuerySelectorAll(string.Format("[{0}]", attribute)).ToList();
         foreach (var element in elements)
         {
             element.Attributes.Remove(attribute);
         }
     }
 }
Пример #12
0
 private static List<ProductDisadvantage> GetProductDisadvantages(HtmlNode reviewNode)
 {
     return reviewNode.QuerySelectorAll(".cons-cell li").Select(x => new ProductDisadvantage { Feature = WebUtility.HtmlDecode(x.InnerText.Trim()) }).ToList();
 }
Пример #13
0
        private RealtimeInfo ParseWebPage(HtmlNode document)
        {
            var buses = document.QuerySelectorAll(_searchPath);

            return ConvertHtmlNodeToBuses(buses);
        }