public NewsItemInfo ParseWebPage(string url) { WebPage page = web.NavigateToPage(new Uri(url)); NewsItemInfo newsItem = new NewsItemInfo(); newsItem.SetTitle(HttpUtility.HtmlDecode(page.Html.SelectSingleNode("//h1[@class='page_news_ttl haveselect']").InnerText.Replace("|", "")).Trim()); newsItem.SetAnnotation(HttpUtility.HtmlDecode(page.Html.SelectSingleNode("//div[@class='universal_content clearfix']").FirstChild.InnerText).Trim()); newsItem.SetNewsUrl(url); newsItem.SetSourceName(sourceName); DateTime time = DateTime.Parse(HttpUtility.HtmlDecode(page.Html.SelectSingleNode("//div[@class='page_news_info clearfix']").ChildNodes[1].InnerText.Replace("|", "").Trim() + ":00")); newsItem.SetDate(time.ToString("yyyy-M-d H:mm:ss")); return(newsItem); }
public NewsItemInfo ParseWebPage(string url) { WebPage page = web.NavigateToPage(new Uri(url)); NewsItemInfo newsItem = new NewsItemInfo(); newsItem.SetTitle(HttpUtility.HtmlDecode(page.Html.SelectSingleNode("//h1[@class='m-t-10 ']").InnerText).Trim()); newsItem.SetAnnotation(HttpUtility.HtmlDecode(page.Html.SelectSingleNode("//div[@itemprop='articleBody']").ChildNodes[1].ChildNodes[0].InnerText).Trim()); newsItem.SetNewsUrl(url); newsItem.SetSourceName(sourceName); string temp = HttpUtility.HtmlDecode(page.Html.SelectSingleNode("//div[@class='article_page__left__top__time__label']").InnerText).Trim(); newsItem.SetDate(GetTimeFromString(temp)); return(newsItem); }
public NewsItemInfo ParseWebPage(string url) { WebPage page = web.NavigateToPage(new Uri(url)); NewsItemInfo newsItem = new NewsItemInfo(); HtmlNode node = page.Html.SelectSingleNode("//div[@class='page_content']"); newsItem.SetTitle(HttpUtility.HtmlDecode(node.ChildNodes[3].InnerText).Trim()); newsItem.SetAnnotation(HttpUtility.HtmlDecode(page.Html.SelectSingleNode("//div[@class='gray']").ChildNodes[1].InnerText).Trim()); newsItem.SetNewsUrl(url); newsItem.SetSourceName(sourceName); string temp = HttpUtility.HtmlDecode(node.ChildNodes[5].InnerText).Trim(); DateTime time = DateTime.Parse(temp.Substring(temp.IndexOf(",") + 1) + ":00"); newsItem.SetDate(time.ToString("yyyy-M-d H:mm:ss")); return(newsItem); }
public NewsItemInfo ParseNewsPanel(HtmlNode node) { NewsItemInfo newsItem = new NewsItemInfo(); HtmlNode temp = node.SelectSingleNode(".//a[@class='post__title_link']"); if (temp == null) { return(newsItem); } newsItem.SetTitle(HttpUtility.HtmlDecode(temp.InnerText).Trim()); temp = node.SelectSingleNode(".//div[@class='post__text post__text-html post__text_v1 ']"); if (temp == null) { temp = node.SelectSingleNode(".//div[@class='post__text post__text-html post__text_v2 ']"); } newsItem.SetAnnotation(HttpUtility.HtmlDecode(temp.InnerText).Trim()); newsItem.SetNewsUrl(HttpUtility.HtmlDecode(node.SelectSingleNode(".//a[@class='post__title_link']").GetAttributeValue("href", "").Trim())); newsItem.SetSourceName(sourceName); newsItem.SetDate(GetTimeFromString(HttpUtility.HtmlDecode(node.SelectSingleNode(".//span[@class='post__time']").InnerText))); return(newsItem); }
public NewsItemInfo ParseWebPage(string url) { NewsItemInfo newsItem = new NewsItemInfo(); WebPage page; try { page = web.NavigateToPage(new Uri(url)); } catch { return(newsItem); } newsItem.SetTitle(HttpUtility.HtmlDecode(page.Html.SelectSingleNode("//h1[@class='new']").InnerText).Trim()); newsItem.SetAnnotation(HttpUtility.HtmlDecode(page.Html.SelectSingleNode("//div[@itemprop='articleBody']").InnerText).Trim()); newsItem.SetNewsUrl(url); newsItem.SetSourceName(sourceName); DateTime time = DateTime.Parse(HttpUtility.HtmlDecode(page.Html.SelectSingleNode("//div[@class='datem']").InnerText.Trim() + ":00")); newsItem.SetDate(time.ToString("yyyy-M-d H:mm:ss")); return(newsItem); }