Пример #1
0
        public HtmlContent GetContent(string xpath)
        {
            if (this.url == null || content == null)
            {
                throw new InvalidOperationException("call IContentExtractor.GotoPage first");
            }
            HtmlContent doc = new HtmlContent();

            doc.LastUpdatedDate = DateTime.Now;
            HtmlDocument htmldoc = new HtmlDocument();

            htmldoc.LoadHtml(content);
            HtmlAgilityPack.HtmlNode titleNode = htmldoc.DocumentNode.SelectSingleNode("//title");
            if (titleNode != null)
            {
                doc.Title = titleNode.InnerText;
            }
            if (xpath.ToLower() == "all")
            {
                doc.Content = htmldoc.DocumentNode.OuterHtml;
            }
            else if (!xpath.StartsWith("/"))
            {
                HtmlAgilityPack.HtmlNodeCollection divs = htmldoc.DocumentNode.SelectNodes("//div");
                if (divs == null)
                {
                    return(null);
                }
                HtmlNode targetNode = divs.FirstOrDefault(a => a.Id == xpath || (a.Attributes["class"] != null && a.Attributes["class"].Value == xpath));
                if (targetNode == null)
                {
                    if (Logger != null)
                    {
                        Logger.WarnFormat("[Mine Fails] '{1}' container not found - {0}", url, xpath);
                    }
                    return(null);
                }
                doc.Content = targetNode.OuterHtml;
            }
            else //starts with '//'
            {
                var targetNode = htmldoc.DocumentNode.SelectSingleNode(xpath);
                if (targetNode == null)
                {
                    if (Logger != null)
                    {
                        Logger.WarnFormat("[Mine Fails] '{1}' container not found - {0}", url, xpath);
                    }
                    return(null);
                }
                doc.Content = targetNode.OuterHtml;
            }
            doc.Url = url;
            return(doc);
        }
Пример #2
0
 private static void SetExpansion(ScrappedCardModel scrappedCard, HtmlNodeCollection table)
 {
     var row = table.FirstOrDefault(x => x.ChildNodes[1].InnerText.Contains("Expansion"));
     if (row != null) scrappedCard.Expansion = row.ChildNodes[2].InnerText.Trim();
 }
Пример #3
0
 private static void SetConvertedManaCost(ScrappedCardModel scrappedCard, HtmlNodeCollection table)
 {
     var row = table.FirstOrDefault(x => x.ChildNodes[1].InnerText.Contains("Converted Mana Cost"));
     if (row != null) scrappedCard.ConvertedManaCost = int.Parse(row.ChildNodes[2].InnerText.Trim());
 }
Пример #4
0
 private static void SetTypes(ScrappedCardModel scrappedCard, HtmlNodeCollection table)
 {
     var row = table.FirstOrDefault(x => x.ChildNodes[1].InnerText.Contains("Types"));
     if (row != null) scrappedCard.Types = row.ChildNodes[2].InnerText.Trim().Replace(" —", "").Split(' ').ToList();
 }
Пример #5
0
 private static void SetPowerAndThoughness(ScrappedCardModel scrappedCard, HtmlNodeCollection table)
 {
     var row = table.FirstOrDefault(x => x.ChildNodes[1].InnerText.Contains("P/T"));
     if (row != null)
     {
         var pt = row.ChildNodes[2].InnerText.Trim().Trim('(', ')').Split('/');
         scrappedCard.Power = pt[0];
         scrappedCard.Thoughness = pt[1];
     }
 }
Пример #6
0
        private static void SetManaCost(ScrappedCardModel scrappedCard, HtmlNodeCollection table)
        {
            var row = table.FirstOrDefault(x => x.ChildNodes[1].InnerText.Contains("Mana Cost"));
            if (row != null)
            {
                foreach (var childNode in row.ChildNodes[2].ChildNodes)
                {
                    if (childNode.Name == "#text")
                        continue;

                    var alt = childNode.GetAttributeValue("alt", "");
                    if (alt.Contains("CMC"))
                    {
                        if (alt.Length > 4)
                        {
                            scrappedCard.ColorlessMana = alt[3].ToString() + alt[4];
                        }
                        else
                        {
                            scrappedCard.ColorlessMana = alt[3].ToString();
                        }
                    }
                    else if (alt.Contains("Color R"))
                    {
                        scrappedCard.RedMana++;
                    }
                    else if (alt.Contains("Color U"))
                    {
                        scrappedCard.BlueMana++;
                    }
                    else if (alt.Contains("Color G"))
                    {
                        scrappedCard.GreenMana++;
                    }
                    else if (alt.Contains("Color W"))
                    {
                        scrappedCard.WhiteMana++;
                    }
                    else if (alt.Contains("Color B"))
                    {
                        scrappedCard.BlackMana++;
                    }
                }
            }
        }
 protected override bool FilterPage(HtmlNodeCollection pictureHtmlNode, ref int pageNum)
 {
     if (base.FilterPage(pictureHtmlNode, ref pageNum))
     {
         return true;
     }
     if (pictureHtmlNode.Count() <= 1)
     {
         var picNode = pictureHtmlNode.FirstOrDefault();
         if (picNode == null)
         {
             pageNum = 500;
             return true;
         }
         var picturePathName = picNode.Attributes["src"].Value;
         if (picturePathName.Contains("bctp_28.gif"))
         {
             pageNum = 500;
             return true;
         }
     }
     return false;
 }
Пример #8
0
 internal static HtmlNode GetNodeForUrl(HtmlNodeCollection nodes, string url) {
     var hrefNode = nodes.FirstOrDefault(n => n.GetAttributeValue("href", "").Contains(url));
     return hrefNode;
 }
		private static string GetMetaProperty(HtmlNodeCollection nodes, string prop) 
			=> nodes.FirstOrDefault(x => x.Attributes["property"]?.Value == prop)?.Attributes["content"]?.Value;
Пример #10
0
		private HtmlNode GetInfoElem(HtmlNodeCollection nodes, string title) {
			return nodes.FirstOrDefault(n => n.SelectNodes("span[text() = '" + title + "&nbsp;:']") != null);
		}