Exemple #1
0
        /// <summary>
        /// get mobile detail info according to Mobile.CommodityId
        /// </summary>
        /// <param name="m"></param>
        public static void LoadMobileInfo(Mobile m)
        {
            using (HttpWebResponse res = (HttpWebResponse)CreateCommodityInfoRequest(m.CommodityId).GetResponse())
            {
                using (Stream s = res.GetResponseStream())
                {
                    HtmlDocument doc = new HtmlDocument();
                    doc.Load(s, Encoding.GetEncoding("GB2312"));
                    HtmlNodeCollection simpleInfoNodes = doc.DocumentNode.SelectNodes(
                        "//ul[@class='parameter2 p-parameter-list']/li");
                    HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes(
                        "//div[@class='Ptable-item']");

                    //商品介绍 - 商品名称
                    m.Name = simpleInfoNodes?.FirstOrDefault(
                        n => n.InnerText.Contains("商品名称"))
                             ?.Attributes["title"].Value;
                    //商品介绍 - 商品产地
                    m.Origin = simpleInfoNodes?.FirstOrDefault(
                        n => n.InnerText.Contains("商品产地"))
                               ?.Attributes["title"].Value;

                    //主体 - 品牌
                    m.Brand = GetMobileInfo(nodes, "主体", "品牌");
                    //主体 - 型号
                    m.ModelNumber = GetMobileInfo(nodes, "主体", "型号");
                    //主体 - 上市年份
                    int year = GetMobileInfo(nodes, "主体", "上市年份").ToInt();
                    year = year == 0 ? DateTime.Now.Year:year;
                    //主体 - 上市月份
                    int month = GetMobileInfo(nodes, "主体", "上市月份").ToInt();
                    month        = month == 0 ? DateTime.Now.Month:month;
                    m.MarketTime = new DateTime(year, month <= 0 || month > 12 ? 1 : month, 1);

                    //存储 - ROM
                    m.ROM = GetMobileInfo(nodes, "存储", "ROM").ToInt();
                    m.RAM = GetMobileInfo(nodes, "存储", "RAM").ToInt();

                    // 主芯片 - CPU型号
                    m.CPUModelNumber = GetMobileInfo(nodes, "主芯片", "CPU型号");
                    m.CPUCoreNumber  = GetMobileInfo(nodes, "主芯片", "CPU核数");
                    //电池信息 - 电池容量
                    m.BatteryCapacity = GetMobileInfo(nodes, "电池信息", "电池容量(mAh)").ToInt();

                    s.Close();
                }
                res.Close();
            }
        }
Exemple #2
0
        private static void SetName(ScrappedCardModel scrappedCard, HtmlNodeCollection table)
        {
            var row = table.FirstOrDefault(x => x.ChildNodes[1].InnerText.Contains("Card name"));

            if (row != null)
            {
                scrappedCard.CardName = row.ChildNodes[2].InnerText.Trim();
            }
        }
Exemple #3
0
        private string handleItem(HtmlNodeCollection col)
        {
            if (col == null)
            {
                Log.Information("collection is null, returning empty string");

                return("");
            }
            else if (col.FirstOrDefault() != null)
            {
                Log.Information("returning col text");

                return(col.FirstOrDefault().InnerText);
            }
            else
            {
                return("");
            }
        }
Exemple #4
0
        /// <summary>
        /// Gets the user's blurb/status using the profile page.
        ///
        /// Will most likely break due to ROBLOX possibly changing the html format.
        /// </summary>
        /// <returns>The status/blurb of this user.</returns>
        public async Task <string> GetStatus()
        {
            HtmlDocument document = new HtmlDocument();

            document.LoadHtml(await HttpHelper.GetStringFromURL(string.Format("https://www.roblox.com/users/{0}/profile", ID)));
            HtmlNodeCollection nodes = document
                                       .DocumentNode
                                       .SelectNodes(string.Format("//div[@data-profileuserid='{0}']", ID));

            if (nodes == null || nodes.FirstOrDefault() == null)
            {
                throw new Exception("User page did not have the correct element. Did the website change?");
            }
            if (string.IsNullOrEmpty(nodes.FirstOrDefault().Attributes["data-statustext"].Value))
            {
                return(null);
            }
            return(nodes.FirstOrDefault().Attributes["data-statustext"].Value);
        }
Exemple #5
0
        private static void SetPowerAndThoughness(ScrappedCardModel scrappedCard, HtmlNodeCollection table)
        {
            var row = table.FirstOrDefault(x => x.ChildNodes[1].InnerText.Contains("P/T"));

            if (row != null)
            {
                var pt = row.ChildNodes[2].InnerText.Trim().Trim('(', ')').Split('/');
                scrappedCard.Power      = pt[0];
                scrappedCard.Thoughness = pt[1];
            }
        }
        private HtmlNode GetMarketContainer(HtmlNode marketNode)
        {
            HtmlNodeCollection containerNodes = marketNode?.SelectNodes(ContainerXPaths.MARKET);

            if (containerNodes != null)
            {
                return(containerNodes.FirstOrDefault());
            }

            return(null);
        }
Exemple #7
0
 /// <summary>
 /// Gets the text from an HtmlNode that may or may not be there based on the label from another node
 /// </summary>
 /// <param name="list">The list to fetch information from</param>
 /// <param name="name">The name of the label to search for</param>
 /// <param name="extraTrimChars">Optional parameter to add more trim characters</param>
 /// <returns>Either an empty string or the text in the specified HtmlNode</returns>
 private string GetAttributeText(HtmlNodeCollection list, string name, char[] extraTrimChars = null)
 {
     if (extraTrimChars == null)
     {
         extraTrimChars = new char[] { }
     }
     ;
     return(RemoveDoubleSpaces(list
                               .FirstOrDefault(x => x.ChildNodes[0].InnerText.Trim(_trimChars.Concat(extraTrimChars).ToArray()).ToLower() == name)
                               ?.ChildNodes[1].InnerText.Trim(_trimChars.Concat(extraTrimChars).ToArray()) ?? ""));
 }
        private async Task <byte[]> GetThumbnail(string link)
        {
            byte[] imgContent = null;
            try
            {
                using (var httpClient = new HttpClient())
                {
                    using (var response = await httpClient.GetAsync(link))
                    {
                        response.EnsureSuccessStatusCode();

                        using (var stream = await response.Content.ReadAsStreamAsync())
                        {
                            HtmlDocument doc = new HtmlDocument();
                            doc.Load(stream);
                            HtmlNodeCollection metaImageNodes = doc.DocumentNode.SelectNodes("/html/head/meta[@property='og:image']");
                            if (metaImageNodes == null)
                            {
                                metaImageNodes = doc.DocumentNode.SelectNodes("/html/head/meta[@property='og:image:secure_url']");
                            }
                            var imgUrl = metaImageNodes?.FirstOrDefault()?.Attributes["content"]?.Value;
                            if (imgUrl == null)
                            {
                                return(null);
                            }

                            using (HttpClient client = new HttpClient())
                            {
                                using (var imgResponse = await httpClient.GetAsync(imgUrl))
                                {
                                    imgResponse.EnsureSuccessStatusCode();
                                    imgContent = await imgResponse.Content.ReadAsByteArrayAsync();
                                }
                            }
                        }
                    }
                }

                using (MagickImage image = new MagickImage(imgContent))
                {
                    var size = new MagickGeometry(300, 100);
                    size.IgnoreAspectRatio = false;
                    image.Resize(size);
                    image.Format = MagickFormat.Jpg;
                    imgContent   = image.ToByteArray();
                }
            }
            catch (Exception ex)
            {
                _logger.LogError(ex, $"Error downloading image for: {link}");
            }

            return(imgContent);
        }
Exemple #9
0
        private void GetEarliestMeeting(string targetUrl, ref string earliestUrl, ref string latestUrl)
        {
            HtmlWeb            web = new HtmlWeb();
            HtmlDocument       doc = web.Load(targetUrl);
            HtmlNodeCollection councilMeetingUrlNodes = doc.DocumentNode.SelectNodes("//div[@class='EventDayScroll']//a[contains(@href,'ModuleID')]");

            if (councilMeetingUrlNodes != null)
            {
                earliestUrl = councilMeetingUrlNodes.FirstOrDefault().Attributes["href"].Value;
                latestUrl   = councilMeetingUrlNodes.LastOrDefault().Attributes["href"].Value;
            }
        }
Exemple #10
0
        private static string GetValue(string propertyName, HtmlNodeCollection rows)
        {
            var cellContainingMatch = rows.FirstOrDefault(node =>
            {
                var row = node.SelectSingleNode($@"td[text()='{propertyName}']");
                return(row != null);
            })?.SelectSingleNode($"td[text()='{propertyName}']");

            string value = cellContainingMatch?.NextSibling?.NextSibling.InnerText;

            return(value);
        }
Exemple #11
0
        private string GetName(HtmlNodeCollection titleNode)
        {
            string authorName = null;

            if (titleNode.Count > 0)
            {
                var innerText        = titleNode.FirstOrDefault().InnerText;
                var dashIndex        = innerText.IndexOf(" -");
                var firstPartOfTitle = innerText.Substring(0, dashIndex);
                authorName = firstPartOfTitle.Replace("Цитати на тема ", "");
            }
            return(authorName);
        }
Exemple #12
0
        private List <Diseases_Tbl> GetData4(string Link, int Section_Fk)
        {
            Link            = "https://icd.codes" + Link;
            webBrowser1.Url = new Uri(Link);

            WebClient client = new WebClient();

            string content = client.DownloadString(Link);

            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.LoadHtml(content);

            List <Diseases_Tbl> dt = new List <Diseases_Tbl>();
            //DataTable dt = new DataTable();
            //dt.Columns.Add("Code");
            //dt.Columns.Add("Name");
            //dt.Columns.Add("Html");
            var n = doc.DocumentNode.SelectNodes(xpath: "//div[@id='content']/div[@class='panel-group']/div[@class='panel panel-default']");

            //var n = doc.DocumentNode.SelectNodes(xpath: "//div[@id='content']/div[@class='panel-heading collapse-next-tree']/div[@class='collapse-next']/h4]");

            foreach (HtmlNode table in n)

            {
                Diseases_Tbl diseases = new Diseases_Tbl();
                //DataRow r = dt.NewRow();
                HtmlAgilityPack.HtmlDocument docu = new HtmlAgilityPack.HtmlDocument();
                docu.LoadHtml(table.InnerHtml);
                HtmlNodeCollection nodes = docu.DocumentNode.SelectNodes("//b[@class='text-warning']");

                HtmlNodeCollection muted       = docu.DocumentNode.SelectNodes("//span[@class='text-muted']");
                HtmlNodeCollection success     = docu.DocumentNode.SelectNodes("//b[@class='text-success']");
                HtmlNodeCollection Html_string = docu.DocumentNode.SelectNodes("//li[@class='list-group-item section-group']");
                var Name = muted == null ? "" : muted.FirstOrDefault().InnerText;
                var Code = nodes == null ? success == null ? "" : success.FirstOrDefault().InnerText : nodes.FirstOrDefault().InnerText;

                diseases.Code = Code;
                diseases.Name = Name;

                //r[0] = Code;
                //r[1] = Name;
                //r[2] = Html_string.FirstOrDefault().InnerHtml.ToString();
                //dt.Rows.Add(r);
                diseases.Section_Fk = Section_Fk;
                List <Diseases_destails_tbl> destails = GetData5(Html_string.FirstOrDefault().InnerHtml.ToString());
                diseases.Diseases_destails_tbl = destails;
                dt.Add(diseases);
            }

            return(dt);
        }
Exemple #13
0
        private List <Diseases_destails_tbl> GetData5(string content)
        {
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.LoadHtml(content);

            List <Diseases_destails_tbl> dt = new List <Diseases_destails_tbl>();
            //DataTable dt = new DataTable();
            //dt.Columns.Add("Name");
            //dt.Columns.Add("Code");
            //dt.Columns.Add("link_data");


            var n = doc.DocumentNode.SelectNodes(xpath: "//ul/li[@class='list-group-item']");

            if (n != null)
            {
                foreach (HtmlNode table in n)
                {
                    Diseases_destails_tbl destail = new Diseases_destails_tbl();
                    //DataRow r = dt.NewRow();
                    HtmlAgilityPack.HtmlDocument docu = new HtmlAgilityPack.HtmlDocument();
                    docu.LoadHtml(table.InnerHtml);

                    HtmlNodeCollection CodeName  = docu.DocumentNode.SelectNodes("//a");
                    HtmlNodeCollection LabelName = docu.DocumentNode.SelectNodes("//span");
                    var code      = CodeName == null ? "" : CodeName.FirstOrDefault().InnerText;
                    var Name      = LabelName == null ? "" : LabelName.FirstOrDefault().InnerText;
                    var Link_data = CodeName == null ? "" : CodeName == null ? "" : CodeName.FirstOrDefault().Attributes["href"].Value;
                    //destail.Diseases_fk = Diseases_fk;
                    destail.Code      = code;
                    destail.Name      = Name;
                    destail.Link_data = Link_data;

                    dt.Add(destail);
                }
            }
            return(dt);
        }
Exemple #14
0
        public string GetInnerText(HtmlDocument htmlDocument, string xpath)
        {
            HtmlNodeCollection htmlNodes = htmlDocument.DocumentNode.SelectNodes(xpath);

            if (htmlNodes != null)
            {
                HtmlNode node = htmlNodes.FirstOrDefault();
                if (node != null)
                {
                    return(node.InnerText);
                }
            }
            return(string.Empty);
        }
        public static string Text(this HtmlNodeCollection htmlNodeCollection)
        {
            if (htmlNodeCollection is null)
            {
                return(null);
            }
            var firsElement = htmlNodeCollection.FirstOrDefault();

            if (firsElement is null)
            {
                return(null);
            }
            return(firsElement.InnerText.Trim());
        }
Exemple #16
0
        private static void SetManaCost(ScrappedCardModel scrappedCard, HtmlNodeCollection table)
        {
            var row = table.FirstOrDefault(x => x.ChildNodes[1].InnerText.Contains("Mana Cost"));

            if (row != null)
            {
                foreach (var childNode in row.ChildNodes[2].ChildNodes)
                {
                    if (childNode.Name == "#text")
                    {
                        continue;
                    }

                    var alt = childNode.GetAttributeValue("alt", "");
                    if (alt.Contains("CMC"))
                    {
                        if (alt.Length > 4)
                        {
                            scrappedCard.ColorlessMana = alt[3].ToString() + alt[4];
                        }
                        else
                        {
                            scrappedCard.ColorlessMana = alt[3].ToString();
                        }
                    }
                    else if (alt.Contains("Color R"))
                    {
                        scrappedCard.RedMana++;
                    }
                    else if (alt.Contains("Color U"))
                    {
                        scrappedCard.BlueMana++;
                    }
                    else if (alt.Contains("Color G"))
                    {
                        scrappedCard.GreenMana++;
                    }
                    else if (alt.Contains("Color W"))
                    {
                        scrappedCard.WhiteMana++;
                    }
                    else if (alt.Contains("Color B"))
                    {
                        scrappedCard.BlackMana++;
                    }
                }
            }
        }
Exemple #17
0
        private List <Article> RebuildFeed()
        {
            List <Article> articles = new List <Article>();

            string feedUrl = "https://www.bonnegueule.fr/feed/";

            XmlReader       reader = XmlReader.Create(feedUrl);
            SyndicationFeed feed   = SyndicationFeed.Load(reader);

            reader.Close();

            foreach (SyndicationItem item in feed.Items)
            {
                // Get the content of the article
                var web = new HtmlWeb();
                var doc = web.Load(item.Links.FirstOrDefault().Uri.ToString());

                string             ClassToGet = "entry-content clearfix e-content";
                string             xPath      = @"//div[@class='" + ClassToGet + "']";
                HtmlNodeCollection htmlNodes  = doc.DocumentNode.SelectNodes(xPath);

                if (htmlNodes != null)
                {
                    string content = htmlNodes.FirstOrDefault()?.InnerHtml;

                    if (content != null)
                    {
                        articles.Add(new Article
                        {
                            Id         = item.Id,
                            HTMLTitle  = item.Title.Text,
                            Title      = item.Title.Text,
                            WebsiteUrl = item.Links.FirstOrDefault().Uri.ToString(),
                            Link       = item.Links.FirstOrDefault().Uri.ToString(),
                            Summary    = item.Summary.Text,
                            Content    = content.Replace("\b", ""),
                            MediaLink  = "",
                            Updated    = item.PublishDate.UtcDateTime,
                            Category   = item.Categories.FirstOrDefault()?.ToString(),
                            Author     = item.Authors.FirstOrDefault()?.Name
                        });
                    }
                }
            }

            return(articles);
        }
Exemple #18
0
 public void Extract()
 {
     if (NotFound)
     {
         return;
     }
     foreach (Cell cell in tableLayout)
     {
         HtmlNodeCollection cellNodes =
             HtmlDocument.DocumentNode.SelectNodes(cell.Expression());
         if (cellNodes != null && cellNodes.Count > 0)
         {
             cell.Value = cellNodes.FirstOrDefault().InnerText;
             CellReader(cell);
         }
     }
 }
Exemple #19
0
        private List <Article> RebuildFeed()
        {
            List <Article> articles = new List <Article>();

            string feedUrl = "https://www.permanentstyle.com/feed";

            XmlReader       reader = XmlReader.Create(feedUrl);
            SyndicationFeed feed   = SyndicationFeed.Load(reader);

            reader.Close();

            foreach (SyndicationItem item in feed.Items)
            {
                // Get the content of the article
                var web = new HtmlWeb();
                var doc = web.Load(item.Links.FirstOrDefault().Uri.ToString());

                string             ClassToGet = "siteorigin-widget-tinymce textwidget";
                string             xPath      = @"//div[@class='" + ClassToGet + "']";
                HtmlNodeCollection htmlNodes  = doc.DocumentNode.SelectNodes(xPath);
                if (htmlNodes != null)
                {
                    string content = htmlNodes.FirstOrDefault()?.InnerHtml;

                    if (content != null)
                    {
                        articles.Add(new Article
                        {
                            Id         = item.Id,
                            HTMLTitle  = item.Title.Text,
                            Title      = item.Title.Text,
                            WebsiteUrl = item.Links.FirstOrDefault().Uri.ToString(),
                            Link       = item.Links.FirstOrDefault().Uri.ToString(),
                            Summary    = item.Summary.Text,
                            Content    = content,
                            MediaLink  = "",
                            Updated    = item.PublishDate.UtcDateTime,
                            Category   = item.Categories.FirstOrDefault().ToString(),
                            Author     = "Simon Crompton"
                        });
                    }
                }
            }

            return(articles);
        }
Exemple #20
0
        private static MovieViewModel GetPalyList(string innerHtml)
        {
            string       imagePath = "div[@class='site-piclist_pic']/a/img";
            string       urlPath   = "div[@class='site-piclist_pic']/a";
            string       rolePath  = "div[@class='site-piclist_info']";
            string       scorePath = "div[@class='site-piclist_info']/div/span/strong";
            HtmlDocument doc       = new HtmlDocument();

            doc.LoadHtml(innerHtml);
            HtmlNodeCollection imgNodeList   = doc.DocumentNode.SelectNodes(imagePath);
            HtmlNodeCollection urlNodeList   = doc.DocumentNode.SelectNodes(urlPath);
            HtmlNodeCollection roleNodeList  = doc.DocumentNode.SelectNodes(rolePath);
            HtmlNodeCollection scoreNodeList = doc.DocumentNode.SelectNodes(scorePath);
            HtmlNode           imgNode       = imgNodeList.FirstOrDefault();
            HtmlNode           urlNode       = urlNodeList.FirstOrDefault();
            HtmlNode           roleNode      = roleNodeList.FirstOrDefault();
            MovieViewModel     movie         = new MovieViewModel();

            movie.Url      = $"http://jx.vgoodapi.com/jx.php?url={urlNode.Attributes["href"].Value}";
            movie.Title    = urlNode.Attributes["title"].Value;
            movie.Id       = urlNode.Attributes["data-qipuid"].Value;
            movie.ImageUrl = imgNode.Attributes["src"].Value;
            if (scoreNodeList != null)
            {
                HtmlNode scoreNode = scoreNodeList.FirstOrDefault();
                movie.Score = scoreNode.InnerText;//评分
            }
            {
                var infoStr = roleNode.InnerText.Replace(" ", "").Replace("\r\n", ",");
                infoStr = ProcessRepetition(infoStr).Trim(',');
                var infos = infoStr.Split(",");

                if (infos.Length > 2)
                {
                    string newInfo = "";
                    for (int i = 2; i < infos.Length; i++)
                    {
                        newInfo += string.Join("", infos[i]);
                    }
                    movie.Role = newInfo.Replace(",", " ");
                }
            }

            return(movie);
        }
        private void GetItemDetailedInfo(ref Item item)
        {
            if (item.Name == "Life Fluid")
            {
                Console.WriteLine();
            }

            if (string.IsNullOrEmpty(item.ItemLink))
            {
                return;
            }

            HttpResponseMessage response = _httpClient.GetAsync(item.ItemLink).Result;

            if (!response.IsSuccessStatusCode)
            {
                return;
            }

            string html = response.Content.ReadAsStringAsync().Result;

            if (string.IsNullOrEmpty(html))
            {
                throw new InvalidDataException($"Empty html content while getting items in {nameof(GetItemDetailedInfo)}");
            }

            HtmlDocument document = new HtmlDocument();

            document.LoadHtml(html);

            HtmlNodeCollection infoNodes = document.DocumentNode.SelectNodes("//table[@class='wiki_table']/tr");

            HtmlNodeCollection lootValueNodes = infoNodes?.FirstOrDefault(n => n.InnerText.ToLower().Contains("loot value"))?.SelectNodes("td");

            if (lootValueNodes != null && lootValueNodes.Count > 1)
            {
                GetLootValue(item, lootValueNodes);

                _logger.LogInformation($"Tried to get the lootvalue wiki string: {lootValueNodes[1]?.InnerText} " +
                                       $"current item information: " +
                                       $"\n Name: {item.Name} \n LootValue: {item.LootValue}");
            }
        }
        private List <Article> RebuildFeed()
        {
            List <Article> articles = new List <Article>();


            string feedUrl = "https://philippesilberzahn.com/feed/";

            XmlReader       reader = XmlReader.Create(feedUrl);
            SyndicationFeed feed   = SyndicationFeed.Load(reader);

            reader.Close();

            foreach (SyndicationItem item in feed.Items)
            {
                // Get the content of the article
                var web  = new HtmlWeb();
                var test = web.Load("https://google.com/");

                var doc = web.Load(item.Links.FirstOrDefault().Uri.ToString());

                string             ClassToGet = "post type-post status-publish format-standard";
                string             xPath      = @"//div[@class='" + ClassToGet + "']";
                HtmlNodeCollection htmlNodes  = doc.DocumentNode.SelectNodes(xPath);
                string             content    = htmlNodes.FirstOrDefault().InnerHtml;

                articles.Add(new Article
                {
                    Id         = item.Id,
                    HTMLTitle  = item.Title.Text,
                    Title      = item.Title.Text,
                    WebsiteUrl = item.Links.FirstOrDefault().Uri.ToString(),
                    Link       = item.Links.FirstOrDefault().Uri.ToString(),
                    Summary    = item.Summary.Text,
                    Content    = content.Replace("\b", ""),
                    MediaLink  = "",
                    Updated    = item.PublishDate.UtcDateTime,
                    Category   = item.Categories.FirstOrDefault()?.ToString(),
                    Author     = item.Authors.FirstOrDefault()?.Name
                });
            }

            return(articles);
        }
Exemple #23
0
        public string CrawlSteepandCheap(string html)
        {
            var doc = new HtmlDocument();

            doc.LoadHtml(html);
            HtmlNodeCollection liNodes = doc.DocumentNode.SelectNodes("//ul[contains(@class, 'ui-product-listing-grid')]/li");

            if (liNodes == null)//如果关键词搜索不到任何商品
            {
                return("We couldn’t find any results");
            }

            string[] words      = new string[] { "alpha", "sv", "men" };
            HtmlNode targetNode = liNodes.FirstOrDefault(ln => ContainWords(words, ln.InnerText));

            if (targetNode == null)//如果找不到符合条件的产品
            {
                return("No qualified products");
            }

            Product product = new Product();

            product.Keyword = _iad.AddKeyword(words);

            HtmlNode globalText = doc.DocumentNode.SelectSingleNode("//button[contains(@class,'global-text')]");

            if (globalText != null)
            {
                product.GlobalText = HttpUtility.HtmlDecode(globalText.InnerText).Trim();
            }

            _iad.AddProduct(product);
            //    return "No globaltext";

            //HtmlNode s = globalText.SelectSingleNode("./span");
            //return s.InnerText;
            //System.Web.HttpUtility.HtmlDecode
            //return HttpUtility.HtmlDecode(globalText.InnerText);

            //return targetNode.InnerText;
            return(product.Keyword.ToString());
        }
    static void Main(string[] args)
    {
        var html = File.ReadAllText(@"d:/my.html");

        var htmlDoc = new HtmlDocument();

        htmlDoc.LoadHtml(html);

        HtmlNodeCollection sections = htmlDoc.DocumentNode.SelectNodes("//*[@class='image-section']");
        var section = sections.FirstOrDefault();

        if (section != null)
        {
            foreach (var imgElement in section.Elements("img"))
            {
                Console.WriteLine(imgElement.OuterHtml);
            }
        }

        Console.ReadKey();
    }
        private IEnumerable <Tuple <string, string> > ParseValues(HtmlNodeCollection table)
        {
            var node = table.FirstOrDefault();
            var list = new List <Tuple <string, string> >();

            if (node == null)
            {
                return(Enumerable.Empty <Tuple <string, string> >());
            }

            var items  = node.InnerText.Split("\r\n").Select(v => v.Trim()).Where(v => !string.IsNullOrEmpty(v));
            var names  = items.Where((c, i) => i % 2 == 0).ToArray();
            var values = items.Where((c, i) => i % 2 != 0).ToArray();

            if (names.Length != values.Length)
            {
                return(Enumerable.Empty <Tuple <string, string> >());
            }

            return(names.Select((c, i) => new Tuple <string, string>(c, values[i])));
        }
        private HtmlNode tryLoadNode(string url, string nodeExpression, out HtmlDocument doc)
        {
            HtmlNodeCollection nodeCollection = null;

            doc = null;
            int tryNum = 10;

            while (nodeCollection == null && tryNum > 0)
            {
                string html = DownloadContent(url);
                doc = new HtmlDocument();
                doc.LoadHtml(html);
                nodeCollection = doc.DocumentNode.SelectNodes(nodeExpression);
                tryNum--;
            }
            if (nodeCollection == null)
            {
                throw new Exception(string.Format("Unable to retrieve node \"{0}\" for \"{1}\"", nodeExpression, url));
            }
            return(nodeCollection.FirstOrDefault());
        }
Exemple #27
0
        private HtmlNode GetNodeWithAttributeValue(HtmlNodeCollection htmlNodes, string attName, string attValue)
        {
            if (htmlNodes != null)
            {
                return(null);
            }

            //foreach(var node in htmlNodes)
            //{
            //    if(node != null && node.Attributes.Any(p => p.Name == attName && p.Value == attValue))
            //    {
            //        return node;
            //    }
            //}

            return(htmlNodes.FirstOrDefault(p => p.Attributes.Any(a => a.Name == attName && a.Value == attValue)));



            //return null;
        }
Exemple #28
0
        private List <Article> RebuildFeed()
        {
            List <Article> articles = new List <Article>();

            string feedUrl = "https://www.batirama.com/rss/2-l-info-actualites.html";

            XmlReader       reader = XmlReader.Create(feedUrl);
            SyndicationFeed feed   = SyndicationFeed.Load(reader);

            reader.Close();

            foreach (SyndicationItem item in feed.Items)
            {
                // Get the content of the article
                var web = new HtmlWeb();
                var doc = web.Load(item.Links.FirstOrDefault().Uri.ToString());

                string             ClassToGet = "post post-default post-variant-3";
                string             xPath      = @"//div[@class='" + ClassToGet + "']";
                HtmlNodeCollection htmlNodes  = doc.DocumentNode.SelectNodes(xPath);
                string             content    = htmlNodes.FirstOrDefault().InnerHtml;

                articles.Add(new Article
                {
                    Id         = item.Id,
                    HTMLTitle  = item.Title.Text,
                    Title      = item.Title.Text,
                    WebsiteUrl = item.Links.FirstOrDefault().Uri.ToString(),
                    Link       = item.Links.FirstOrDefault().Uri.ToString(),
                    Summary    = item.Summary.Text,
                    Content    = content,
                    MediaLink  = "",
                    Updated    = item.PublishDate.UtcDateTime,
                    Category   = item.Categories.FirstOrDefault()?.ToString(),
                    Author     = item.Authors.FirstOrDefault()?.Name
                });
            }

            return(articles);
        }
Exemple #29
0
        public static List <PlayViewModel> Crawler(string url, string checkedIndex)
        {
            List <PlayViewModel> playList = new List <PlayViewModel>();

            try
            {
                string       html = HttpHelper.Download(url);
                HtmlDocument doc  = new HtmlDocument();
                doc.LoadHtml(html);
                string             fristPath = GetFristPath(checkedIndex);
                HtmlNodeCollection nodeList  = doc.DocumentNode.SelectNodes(fristPath);
                var    nodeInfo    = nodeList.FirstOrDefault();
                string playListUrl = nodeInfo.Attributes["href"].Value;
                //抓取集数
                string playListHtml = HttpHelper.Download(playListUrl);
                doc.LoadHtml(playListHtml);
                //data-series-elem="cont"
                string             listPath     = "//div[@data-series-elem='cont']/div/ul/li";
                HtmlNodeCollection nodePlayList = doc.DocumentNode.SelectNodes(listPath);
                for (int i = 0; i < nodePlayList.Count; i++)
                {
                    var tvId  = nodePlayList[i].Attributes["data-videolist-tvid"].Value;
                    var vId   = nodePlayList[i].Attributes["data-videolist-vid"].Value;
                    var node  = nodePlayList[i].ChildNodes.FirstOrDefault(n => n.Name == "a");
                    var tvUrl = node.Attributes["href"].Value.Remove(0, 2);
                    //http://www.iqiyi.com/v_19rrbo7gxg.html?#curid=946111000_0277ef6a29cd9967bb93ab76fd1068f9

                    playList.Add(new PlayViewModel {
                        Number = i + 1, Url = $"http://jx.aeidu.cn/index.php?url=http://{tvUrl}#curid={tvId}_{vId}"
                    });
                }
            }
            catch (Exception ex)
            {
                logger.Error("CrawlerMuti出现异常", ex);
            }
            return(playList);
        }
        private string GetHtmlString(HtmlNodeCollection nodes, string xpath)
        {
            if (nodes == null || !nodes.Any())
            {
                return(String.Empty);
            }

            var xpathNode = nodes.FirstOrDefault(x => x.SelectNodes(xpath) != null);

            if (xpathNode == null)
            {
                return(String.Empty);
            }

            var value = xpathNode.SelectNodes(xpath).First().InnerText;

            if (String.IsNullOrEmpty(value))
            {
                return(String.Empty);
            }

            return(HtmlEntity.DeEntitize(value).Replace(" ", " ").Trim()); // removes special space
        }