/// <summary> /// get mobile detail info according to Mobile.CommodityId /// </summary> /// <param name="m"></param> public static void LoadMobileInfo(Mobile m) { using (HttpWebResponse res = (HttpWebResponse)CreateCommodityInfoRequest(m.CommodityId).GetResponse()) { using (Stream s = res.GetResponseStream()) { HtmlDocument doc = new HtmlDocument(); doc.Load(s, Encoding.GetEncoding("GB2312")); HtmlNodeCollection simpleInfoNodes = doc.DocumentNode.SelectNodes( "//ul[@class='parameter2 p-parameter-list']/li"); HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes( "//div[@class='Ptable-item']"); //商品介绍 - 商品名称 m.Name = simpleInfoNodes?.FirstOrDefault( n => n.InnerText.Contains("商品名称")) ?.Attributes["title"].Value; //商品介绍 - 商品产地 m.Origin = simpleInfoNodes?.FirstOrDefault( n => n.InnerText.Contains("商品产地")) ?.Attributes["title"].Value; //主体 - 品牌 m.Brand = GetMobileInfo(nodes, "主体", "品牌"); //主体 - 型号 m.ModelNumber = GetMobileInfo(nodes, "主体", "型号"); //主体 - 上市年份 int year = GetMobileInfo(nodes, "主体", "上市年份").ToInt(); year = year == 0 ? DateTime.Now.Year:year; //主体 - 上市月份 int month = GetMobileInfo(nodes, "主体", "上市月份").ToInt(); month = month == 0 ? DateTime.Now.Month:month; m.MarketTime = new DateTime(year, month <= 0 || month > 12 ? 1 : month, 1); //存储 - ROM m.ROM = GetMobileInfo(nodes, "存储", "ROM").ToInt(); m.RAM = GetMobileInfo(nodes, "存储", "RAM").ToInt(); // 主芯片 - CPU型号 m.CPUModelNumber = GetMobileInfo(nodes, "主芯片", "CPU型号"); m.CPUCoreNumber = GetMobileInfo(nodes, "主芯片", "CPU核数"); //电池信息 - 电池容量 m.BatteryCapacity = GetMobileInfo(nodes, "电池信息", "电池容量(mAh)").ToInt(); s.Close(); } res.Close(); } }
private static void SetName(ScrappedCardModel scrappedCard, HtmlNodeCollection table) { var row = table.FirstOrDefault(x => x.ChildNodes[1].InnerText.Contains("Card name")); if (row != null) { scrappedCard.CardName = row.ChildNodes[2].InnerText.Trim(); } }
private string handleItem(HtmlNodeCollection col) { if (col == null) { Log.Information("collection is null, returning empty string"); return(""); } else if (col.FirstOrDefault() != null) { Log.Information("returning col text"); return(col.FirstOrDefault().InnerText); } else { return(""); } }
/// <summary> /// Gets the user's blurb/status using the profile page. /// /// Will most likely break due to ROBLOX possibly changing the html format. /// </summary> /// <returns>The status/blurb of this user.</returns> public async Task <string> GetStatus() { HtmlDocument document = new HtmlDocument(); document.LoadHtml(await HttpHelper.GetStringFromURL(string.Format("https://www.roblox.com/users/{0}/profile", ID))); HtmlNodeCollection nodes = document .DocumentNode .SelectNodes(string.Format("//div[@data-profileuserid='{0}']", ID)); if (nodes == null || nodes.FirstOrDefault() == null) { throw new Exception("User page did not have the correct element. Did the website change?"); } if (string.IsNullOrEmpty(nodes.FirstOrDefault().Attributes["data-statustext"].Value)) { return(null); } return(nodes.FirstOrDefault().Attributes["data-statustext"].Value); }
private static void SetPowerAndThoughness(ScrappedCardModel scrappedCard, HtmlNodeCollection table) { var row = table.FirstOrDefault(x => x.ChildNodes[1].InnerText.Contains("P/T")); if (row != null) { var pt = row.ChildNodes[2].InnerText.Trim().Trim('(', ')').Split('/'); scrappedCard.Power = pt[0]; scrappedCard.Thoughness = pt[1]; } }
private HtmlNode GetMarketContainer(HtmlNode marketNode) { HtmlNodeCollection containerNodes = marketNode?.SelectNodes(ContainerXPaths.MARKET); if (containerNodes != null) { return(containerNodes.FirstOrDefault()); } return(null); }
/// <summary> /// Gets the text from an HtmlNode that may or may not be there based on the label from another node /// </summary> /// <param name="list">The list to fetch information from</param> /// <param name="name">The name of the label to search for</param> /// <param name="extraTrimChars">Optional parameter to add more trim characters</param> /// <returns>Either an empty string or the text in the specified HtmlNode</returns> private string GetAttributeText(HtmlNodeCollection list, string name, char[] extraTrimChars = null) { if (extraTrimChars == null) { extraTrimChars = new char[] { } } ; return(RemoveDoubleSpaces(list .FirstOrDefault(x => x.ChildNodes[0].InnerText.Trim(_trimChars.Concat(extraTrimChars).ToArray()).ToLower() == name) ?.ChildNodes[1].InnerText.Trim(_trimChars.Concat(extraTrimChars).ToArray()) ?? "")); }
private async Task <byte[]> GetThumbnail(string link) { byte[] imgContent = null; try { using (var httpClient = new HttpClient()) { using (var response = await httpClient.GetAsync(link)) { response.EnsureSuccessStatusCode(); using (var stream = await response.Content.ReadAsStreamAsync()) { HtmlDocument doc = new HtmlDocument(); doc.Load(stream); HtmlNodeCollection metaImageNodes = doc.DocumentNode.SelectNodes("/html/head/meta[@property='og:image']"); if (metaImageNodes == null) { metaImageNodes = doc.DocumentNode.SelectNodes("/html/head/meta[@property='og:image:secure_url']"); } var imgUrl = metaImageNodes?.FirstOrDefault()?.Attributes["content"]?.Value; if (imgUrl == null) { return(null); } using (HttpClient client = new HttpClient()) { using (var imgResponse = await httpClient.GetAsync(imgUrl)) { imgResponse.EnsureSuccessStatusCode(); imgContent = await imgResponse.Content.ReadAsByteArrayAsync(); } } } } } using (MagickImage image = new MagickImage(imgContent)) { var size = new MagickGeometry(300, 100); size.IgnoreAspectRatio = false; image.Resize(size); image.Format = MagickFormat.Jpg; imgContent = image.ToByteArray(); } } catch (Exception ex) { _logger.LogError(ex, $"Error downloading image for: {link}"); } return(imgContent); }
private void GetEarliestMeeting(string targetUrl, ref string earliestUrl, ref string latestUrl) { HtmlWeb web = new HtmlWeb(); HtmlDocument doc = web.Load(targetUrl); HtmlNodeCollection councilMeetingUrlNodes = doc.DocumentNode.SelectNodes("//div[@class='EventDayScroll']//a[contains(@href,'ModuleID')]"); if (councilMeetingUrlNodes != null) { earliestUrl = councilMeetingUrlNodes.FirstOrDefault().Attributes["href"].Value; latestUrl = councilMeetingUrlNodes.LastOrDefault().Attributes["href"].Value; } }
private static string GetValue(string propertyName, HtmlNodeCollection rows) { var cellContainingMatch = rows.FirstOrDefault(node => { var row = node.SelectSingleNode($@"td[text()='{propertyName}']"); return(row != null); })?.SelectSingleNode($"td[text()='{propertyName}']"); string value = cellContainingMatch?.NextSibling?.NextSibling.InnerText; return(value); }
private string GetName(HtmlNodeCollection titleNode) { string authorName = null; if (titleNode.Count > 0) { var innerText = titleNode.FirstOrDefault().InnerText; var dashIndex = innerText.IndexOf(" -"); var firstPartOfTitle = innerText.Substring(0, dashIndex); authorName = firstPartOfTitle.Replace("Цитати на тема ", ""); } return(authorName); }
private List <Diseases_Tbl> GetData4(string Link, int Section_Fk) { Link = "https://icd.codes" + Link; webBrowser1.Url = new Uri(Link); WebClient client = new WebClient(); string content = client.DownloadString(Link); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(content); List <Diseases_Tbl> dt = new List <Diseases_Tbl>(); //DataTable dt = new DataTable(); //dt.Columns.Add("Code"); //dt.Columns.Add("Name"); //dt.Columns.Add("Html"); var n = doc.DocumentNode.SelectNodes(xpath: "//div[@id='content']/div[@class='panel-group']/div[@class='panel panel-default']"); //var n = doc.DocumentNode.SelectNodes(xpath: "//div[@id='content']/div[@class='panel-heading collapse-next-tree']/div[@class='collapse-next']/h4]"); foreach (HtmlNode table in n) { Diseases_Tbl diseases = new Diseases_Tbl(); //DataRow r = dt.NewRow(); HtmlAgilityPack.HtmlDocument docu = new HtmlAgilityPack.HtmlDocument(); docu.LoadHtml(table.InnerHtml); HtmlNodeCollection nodes = docu.DocumentNode.SelectNodes("//b[@class='text-warning']"); HtmlNodeCollection muted = docu.DocumentNode.SelectNodes("//span[@class='text-muted']"); HtmlNodeCollection success = docu.DocumentNode.SelectNodes("//b[@class='text-success']"); HtmlNodeCollection Html_string = docu.DocumentNode.SelectNodes("//li[@class='list-group-item section-group']"); var Name = muted == null ? "" : muted.FirstOrDefault().InnerText; var Code = nodes == null ? success == null ? "" : success.FirstOrDefault().InnerText : nodes.FirstOrDefault().InnerText; diseases.Code = Code; diseases.Name = Name; //r[0] = Code; //r[1] = Name; //r[2] = Html_string.FirstOrDefault().InnerHtml.ToString(); //dt.Rows.Add(r); diseases.Section_Fk = Section_Fk; List <Diseases_destails_tbl> destails = GetData5(Html_string.FirstOrDefault().InnerHtml.ToString()); diseases.Diseases_destails_tbl = destails; dt.Add(diseases); } return(dt); }
private List <Diseases_destails_tbl> GetData5(string content) { HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(content); List <Diseases_destails_tbl> dt = new List <Diseases_destails_tbl>(); //DataTable dt = new DataTable(); //dt.Columns.Add("Name"); //dt.Columns.Add("Code"); //dt.Columns.Add("link_data"); var n = doc.DocumentNode.SelectNodes(xpath: "//ul/li[@class='list-group-item']"); if (n != null) { foreach (HtmlNode table in n) { Diseases_destails_tbl destail = new Diseases_destails_tbl(); //DataRow r = dt.NewRow(); HtmlAgilityPack.HtmlDocument docu = new HtmlAgilityPack.HtmlDocument(); docu.LoadHtml(table.InnerHtml); HtmlNodeCollection CodeName = docu.DocumentNode.SelectNodes("//a"); HtmlNodeCollection LabelName = docu.DocumentNode.SelectNodes("//span"); var code = CodeName == null ? "" : CodeName.FirstOrDefault().InnerText; var Name = LabelName == null ? "" : LabelName.FirstOrDefault().InnerText; var Link_data = CodeName == null ? "" : CodeName == null ? "" : CodeName.FirstOrDefault().Attributes["href"].Value; //destail.Diseases_fk = Diseases_fk; destail.Code = code; destail.Name = Name; destail.Link_data = Link_data; dt.Add(destail); } } return(dt); }
public string GetInnerText(HtmlDocument htmlDocument, string xpath) { HtmlNodeCollection htmlNodes = htmlDocument.DocumentNode.SelectNodes(xpath); if (htmlNodes != null) { HtmlNode node = htmlNodes.FirstOrDefault(); if (node != null) { return(node.InnerText); } } return(string.Empty); }
public static string Text(this HtmlNodeCollection htmlNodeCollection) { if (htmlNodeCollection is null) { return(null); } var firsElement = htmlNodeCollection.FirstOrDefault(); if (firsElement is null) { return(null); } return(firsElement.InnerText.Trim()); }
private static void SetManaCost(ScrappedCardModel scrappedCard, HtmlNodeCollection table) { var row = table.FirstOrDefault(x => x.ChildNodes[1].InnerText.Contains("Mana Cost")); if (row != null) { foreach (var childNode in row.ChildNodes[2].ChildNodes) { if (childNode.Name == "#text") { continue; } var alt = childNode.GetAttributeValue("alt", ""); if (alt.Contains("CMC")) { if (alt.Length > 4) { scrappedCard.ColorlessMana = alt[3].ToString() + alt[4]; } else { scrappedCard.ColorlessMana = alt[3].ToString(); } } else if (alt.Contains("Color R")) { scrappedCard.RedMana++; } else if (alt.Contains("Color U")) { scrappedCard.BlueMana++; } else if (alt.Contains("Color G")) { scrappedCard.GreenMana++; } else if (alt.Contains("Color W")) { scrappedCard.WhiteMana++; } else if (alt.Contains("Color B")) { scrappedCard.BlackMana++; } } } }
private List <Article> RebuildFeed() { List <Article> articles = new List <Article>(); string feedUrl = "https://www.bonnegueule.fr/feed/"; XmlReader reader = XmlReader.Create(feedUrl); SyndicationFeed feed = SyndicationFeed.Load(reader); reader.Close(); foreach (SyndicationItem item in feed.Items) { // Get the content of the article var web = new HtmlWeb(); var doc = web.Load(item.Links.FirstOrDefault().Uri.ToString()); string ClassToGet = "entry-content clearfix e-content"; string xPath = @"//div[@class='" + ClassToGet + "']"; HtmlNodeCollection htmlNodes = doc.DocumentNode.SelectNodes(xPath); if (htmlNodes != null) { string content = htmlNodes.FirstOrDefault()?.InnerHtml; if (content != null) { articles.Add(new Article { Id = item.Id, HTMLTitle = item.Title.Text, Title = item.Title.Text, WebsiteUrl = item.Links.FirstOrDefault().Uri.ToString(), Link = item.Links.FirstOrDefault().Uri.ToString(), Summary = item.Summary.Text, Content = content.Replace("\b", ""), MediaLink = "", Updated = item.PublishDate.UtcDateTime, Category = item.Categories.FirstOrDefault()?.ToString(), Author = item.Authors.FirstOrDefault()?.Name }); } } } return(articles); }
public void Extract() { if (NotFound) { return; } foreach (Cell cell in tableLayout) { HtmlNodeCollection cellNodes = HtmlDocument.DocumentNode.SelectNodes(cell.Expression()); if (cellNodes != null && cellNodes.Count > 0) { cell.Value = cellNodes.FirstOrDefault().InnerText; CellReader(cell); } } }
private List <Article> RebuildFeed() { List <Article> articles = new List <Article>(); string feedUrl = "https://www.permanentstyle.com/feed"; XmlReader reader = XmlReader.Create(feedUrl); SyndicationFeed feed = SyndicationFeed.Load(reader); reader.Close(); foreach (SyndicationItem item in feed.Items) { // Get the content of the article var web = new HtmlWeb(); var doc = web.Load(item.Links.FirstOrDefault().Uri.ToString()); string ClassToGet = "siteorigin-widget-tinymce textwidget"; string xPath = @"//div[@class='" + ClassToGet + "']"; HtmlNodeCollection htmlNodes = doc.DocumentNode.SelectNodes(xPath); if (htmlNodes != null) { string content = htmlNodes.FirstOrDefault()?.InnerHtml; if (content != null) { articles.Add(new Article { Id = item.Id, HTMLTitle = item.Title.Text, Title = item.Title.Text, WebsiteUrl = item.Links.FirstOrDefault().Uri.ToString(), Link = item.Links.FirstOrDefault().Uri.ToString(), Summary = item.Summary.Text, Content = content, MediaLink = "", Updated = item.PublishDate.UtcDateTime, Category = item.Categories.FirstOrDefault().ToString(), Author = "Simon Crompton" }); } } } return(articles); }
private static MovieViewModel GetPalyList(string innerHtml) { string imagePath = "div[@class='site-piclist_pic']/a/img"; string urlPath = "div[@class='site-piclist_pic']/a"; string rolePath = "div[@class='site-piclist_info']"; string scorePath = "div[@class='site-piclist_info']/div/span/strong"; HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(innerHtml); HtmlNodeCollection imgNodeList = doc.DocumentNode.SelectNodes(imagePath); HtmlNodeCollection urlNodeList = doc.DocumentNode.SelectNodes(urlPath); HtmlNodeCollection roleNodeList = doc.DocumentNode.SelectNodes(rolePath); HtmlNodeCollection scoreNodeList = doc.DocumentNode.SelectNodes(scorePath); HtmlNode imgNode = imgNodeList.FirstOrDefault(); HtmlNode urlNode = urlNodeList.FirstOrDefault(); HtmlNode roleNode = roleNodeList.FirstOrDefault(); MovieViewModel movie = new MovieViewModel(); movie.Url = $"http://jx.vgoodapi.com/jx.php?url={urlNode.Attributes["href"].Value}"; movie.Title = urlNode.Attributes["title"].Value; movie.Id = urlNode.Attributes["data-qipuid"].Value; movie.ImageUrl = imgNode.Attributes["src"].Value; if (scoreNodeList != null) { HtmlNode scoreNode = scoreNodeList.FirstOrDefault(); movie.Score = scoreNode.InnerText;//评分 } { var infoStr = roleNode.InnerText.Replace(" ", "").Replace("\r\n", ","); infoStr = ProcessRepetition(infoStr).Trim(','); var infos = infoStr.Split(","); if (infos.Length > 2) { string newInfo = ""; for (int i = 2; i < infos.Length; i++) { newInfo += string.Join("", infos[i]); } movie.Role = newInfo.Replace(",", " "); } } return(movie); }
private void GetItemDetailedInfo(ref Item item) { if (item.Name == "Life Fluid") { Console.WriteLine(); } if (string.IsNullOrEmpty(item.ItemLink)) { return; } HttpResponseMessage response = _httpClient.GetAsync(item.ItemLink).Result; if (!response.IsSuccessStatusCode) { return; } string html = response.Content.ReadAsStringAsync().Result; if (string.IsNullOrEmpty(html)) { throw new InvalidDataException($"Empty html content while getting items in {nameof(GetItemDetailedInfo)}"); } HtmlDocument document = new HtmlDocument(); document.LoadHtml(html); HtmlNodeCollection infoNodes = document.DocumentNode.SelectNodes("//table[@class='wiki_table']/tr"); HtmlNodeCollection lootValueNodes = infoNodes?.FirstOrDefault(n => n.InnerText.ToLower().Contains("loot value"))?.SelectNodes("td"); if (lootValueNodes != null && lootValueNodes.Count > 1) { GetLootValue(item, lootValueNodes); _logger.LogInformation($"Tried to get the lootvalue wiki string: {lootValueNodes[1]?.InnerText} " + $"current item information: " + $"\n Name: {item.Name} \n LootValue: {item.LootValue}"); } }
private List <Article> RebuildFeed() { List <Article> articles = new List <Article>(); string feedUrl = "https://philippesilberzahn.com/feed/"; XmlReader reader = XmlReader.Create(feedUrl); SyndicationFeed feed = SyndicationFeed.Load(reader); reader.Close(); foreach (SyndicationItem item in feed.Items) { // Get the content of the article var web = new HtmlWeb(); var test = web.Load("https://google.com/"); var doc = web.Load(item.Links.FirstOrDefault().Uri.ToString()); string ClassToGet = "post type-post status-publish format-standard"; string xPath = @"//div[@class='" + ClassToGet + "']"; HtmlNodeCollection htmlNodes = doc.DocumentNode.SelectNodes(xPath); string content = htmlNodes.FirstOrDefault().InnerHtml; articles.Add(new Article { Id = item.Id, HTMLTitle = item.Title.Text, Title = item.Title.Text, WebsiteUrl = item.Links.FirstOrDefault().Uri.ToString(), Link = item.Links.FirstOrDefault().Uri.ToString(), Summary = item.Summary.Text, Content = content.Replace("\b", ""), MediaLink = "", Updated = item.PublishDate.UtcDateTime, Category = item.Categories.FirstOrDefault()?.ToString(), Author = item.Authors.FirstOrDefault()?.Name }); } return(articles); }
public string CrawlSteepandCheap(string html) { var doc = new HtmlDocument(); doc.LoadHtml(html); HtmlNodeCollection liNodes = doc.DocumentNode.SelectNodes("//ul[contains(@class, 'ui-product-listing-grid')]/li"); if (liNodes == null)//如果关键词搜索不到任何商品 { return("We couldn’t find any results"); } string[] words = new string[] { "alpha", "sv", "men" }; HtmlNode targetNode = liNodes.FirstOrDefault(ln => ContainWords(words, ln.InnerText)); if (targetNode == null)//如果找不到符合条件的产品 { return("No qualified products"); } Product product = new Product(); product.Keyword = _iad.AddKeyword(words); HtmlNode globalText = doc.DocumentNode.SelectSingleNode("//button[contains(@class,'global-text')]"); if (globalText != null) { product.GlobalText = HttpUtility.HtmlDecode(globalText.InnerText).Trim(); } _iad.AddProduct(product); // return "No globaltext"; //HtmlNode s = globalText.SelectSingleNode("./span"); //return s.InnerText; //System.Web.HttpUtility.HtmlDecode //return HttpUtility.HtmlDecode(globalText.InnerText); //return targetNode.InnerText; return(product.Keyword.ToString()); }
static void Main(string[] args) { var html = File.ReadAllText(@"d:/my.html"); var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(html); HtmlNodeCollection sections = htmlDoc.DocumentNode.SelectNodes("//*[@class='image-section']"); var section = sections.FirstOrDefault(); if (section != null) { foreach (var imgElement in section.Elements("img")) { Console.WriteLine(imgElement.OuterHtml); } } Console.ReadKey(); }
private IEnumerable <Tuple <string, string> > ParseValues(HtmlNodeCollection table) { var node = table.FirstOrDefault(); var list = new List <Tuple <string, string> >(); if (node == null) { return(Enumerable.Empty <Tuple <string, string> >()); } var items = node.InnerText.Split("\r\n").Select(v => v.Trim()).Where(v => !string.IsNullOrEmpty(v)); var names = items.Where((c, i) => i % 2 == 0).ToArray(); var values = items.Where((c, i) => i % 2 != 0).ToArray(); if (names.Length != values.Length) { return(Enumerable.Empty <Tuple <string, string> >()); } return(names.Select((c, i) => new Tuple <string, string>(c, values[i]))); }
private HtmlNode tryLoadNode(string url, string nodeExpression, out HtmlDocument doc) { HtmlNodeCollection nodeCollection = null; doc = null; int tryNum = 10; while (nodeCollection == null && tryNum > 0) { string html = DownloadContent(url); doc = new HtmlDocument(); doc.LoadHtml(html); nodeCollection = doc.DocumentNode.SelectNodes(nodeExpression); tryNum--; } if (nodeCollection == null) { throw new Exception(string.Format("Unable to retrieve node \"{0}\" for \"{1}\"", nodeExpression, url)); } return(nodeCollection.FirstOrDefault()); }
private HtmlNode GetNodeWithAttributeValue(HtmlNodeCollection htmlNodes, string attName, string attValue) { if (htmlNodes != null) { return(null); } //foreach(var node in htmlNodes) //{ // if(node != null && node.Attributes.Any(p => p.Name == attName && p.Value == attValue)) // { // return node; // } //} return(htmlNodes.FirstOrDefault(p => p.Attributes.Any(a => a.Name == attName && a.Value == attValue))); //return null; }
private List <Article> RebuildFeed() { List <Article> articles = new List <Article>(); string feedUrl = "https://www.batirama.com/rss/2-l-info-actualites.html"; XmlReader reader = XmlReader.Create(feedUrl); SyndicationFeed feed = SyndicationFeed.Load(reader); reader.Close(); foreach (SyndicationItem item in feed.Items) { // Get the content of the article var web = new HtmlWeb(); var doc = web.Load(item.Links.FirstOrDefault().Uri.ToString()); string ClassToGet = "post post-default post-variant-3"; string xPath = @"//div[@class='" + ClassToGet + "']"; HtmlNodeCollection htmlNodes = doc.DocumentNode.SelectNodes(xPath); string content = htmlNodes.FirstOrDefault().InnerHtml; articles.Add(new Article { Id = item.Id, HTMLTitle = item.Title.Text, Title = item.Title.Text, WebsiteUrl = item.Links.FirstOrDefault().Uri.ToString(), Link = item.Links.FirstOrDefault().Uri.ToString(), Summary = item.Summary.Text, Content = content, MediaLink = "", Updated = item.PublishDate.UtcDateTime, Category = item.Categories.FirstOrDefault()?.ToString(), Author = item.Authors.FirstOrDefault()?.Name }); } return(articles); }
public static List <PlayViewModel> Crawler(string url, string checkedIndex) { List <PlayViewModel> playList = new List <PlayViewModel>(); try { string html = HttpHelper.Download(url); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(html); string fristPath = GetFristPath(checkedIndex); HtmlNodeCollection nodeList = doc.DocumentNode.SelectNodes(fristPath); var nodeInfo = nodeList.FirstOrDefault(); string playListUrl = nodeInfo.Attributes["href"].Value; //抓取集数 string playListHtml = HttpHelper.Download(playListUrl); doc.LoadHtml(playListHtml); //data-series-elem="cont" string listPath = "//div[@data-series-elem='cont']/div/ul/li"; HtmlNodeCollection nodePlayList = doc.DocumentNode.SelectNodes(listPath); for (int i = 0; i < nodePlayList.Count; i++) { var tvId = nodePlayList[i].Attributes["data-videolist-tvid"].Value; var vId = nodePlayList[i].Attributes["data-videolist-vid"].Value; var node = nodePlayList[i].ChildNodes.FirstOrDefault(n => n.Name == "a"); var tvUrl = node.Attributes["href"].Value.Remove(0, 2); //http://www.iqiyi.com/v_19rrbo7gxg.html?#curid=946111000_0277ef6a29cd9967bb93ab76fd1068f9 playList.Add(new PlayViewModel { Number = i + 1, Url = $"http://jx.aeidu.cn/index.php?url=http://{tvUrl}#curid={tvId}_{vId}" }); } } catch (Exception ex) { logger.Error("CrawlerMuti出现异常", ex); } return(playList); }
private string GetHtmlString(HtmlNodeCollection nodes, string xpath) { if (nodes == null || !nodes.Any()) { return(String.Empty); } var xpathNode = nodes.FirstOrDefault(x => x.SelectNodes(xpath) != null); if (xpathNode == null) { return(String.Empty); } var value = xpathNode.SelectNodes(xpath).First().InnerText; if (String.IsNullOrEmpty(value)) { return(String.Empty); } return(HtmlEntity.DeEntitize(value).Replace(" ", " ").Trim()); // removes special space }