public static async Task LoadItemsFromTag(NewsItemList plist, ItemTag tag) { if (tag == null) { return; } await DoSearch(plist, tag.Title); }
public static async Task DoSearch(NewsItemList plist, string keyword) { try { plist.Clear(); string url = "http://timkiem.vnexpress.net/?q=" + keyword; string html = await HtmlDownloader.loadFromUrl(url); HtmlDocument page = new HtmlDocument(); page.LoadHtml(html); var allResultNodes = page.DocumentNode.SelectNodes("//li[@class='block_search_result_text']"); if (allResultNodes == null) { MessageBox.Show("Sorry, no articles found!"); return; } foreach (HtmlNode itemNode in allResultNodes) { NewsItem article = new NewsItem(); article.Source = "Vnexpress"; HtmlNode nodeLink = itemNode.SelectSingleNode(itemNode.XPath + "//a[@href]"); HtmlNode nodeImage = itemNode.SelectSingleNode(itemNode.XPath + "//img[@src]"); if (nodeImage != null) { article.ImageLink = nodeImage.Attributes["src"].Value; } article.Name = HtmlDownloader.removeHtml(nodeLink.Attributes["alt"].Value); article.LinkUrl = nodeLink.Attributes["href"].Value; HtmlNode nodeTime = itemNode.SelectSingleNode(itemNode.XPath + "//p[@class='txt_gray txt_11 ex_hi']"); article.DatePublished = HtmlDownloader.removeHtml(nodeTime.InnerHtml); HtmlNode nodeShortContent = itemNode.SelectSingleNode(itemNode.XPath + "//span[@class='hightlight']"); article.ShortContent = HtmlDownloader.removeHtml(nodeShortContent.InnerText); plist.Add(article); } if (plist.Count == 0) { MessageBox.Show("Sorry, no articles found!"); } } catch (Exception ex) { MessageBox.Show("System got an error at DoSearch function with message:\n" + ex.Message); return; } }
public static async Task DoSearch(NewsItemList plist, string keyword) { try { plist.Clear(); string url = "http://search.dantri.com.vn/SearchResult.aspx?s=" + keyword + "&PageIndex=1"; await LoadItemsFromPage(plist, url, "Dantri", null); if (plist.Count == 0) { MessageBox.Show("Sorry, no articles found!"); } } catch (Exception ex) { MessageBox.Show("System got an error at DoSearch function with message:\n" + ex.Message); return; } }
//load articles from source, category, url public static async Task LoadItemsFromPage(NewsItemList plist, string url, string source, NewsItem hottest = null) { try { if (!url.Contains("vnexpress.net")) { url = "http://vnexpress.net" + url; } plist.Clear(); if (hottest != null) { hottest.Source = source; } string html = await HtmlDownloader.loadFromUrl(url); HtmlDocument page = new HtmlDocument(); page.LoadHtml(html); var allItemNodes = page.DocumentNode.SelectNodes("//div[@class='folder-news']"); getHottestNew(page, hottest); if (hottest != null) { plist.Add(hottest); } foreach (HtmlNode itemNode in allItemNodes) { NewsItem article = new NewsItem(); article.Source = source; HtmlNode nodeImage = itemNode.SelectSingleNode(itemNode.XPath + "//img[@src]"); if (nodeImage != null) { article.ImageLink = nodeImage.Attributes["src"].Value; } HtmlNode nodeTitle = itemNode.SelectSingleNode(itemNode.XPath + "//a[@class='link-title14' and @href]"); article.Name = HtmlDownloader.removeHtml(nodeTitle.InnerText); article.LinkUrl = nodeTitle.Attributes["href"].Value; HtmlNode nodeTime = itemNode.SelectSingleNode(itemNode.XPath + "//span[@class='timeListHome']"); if (nodeTime != null) { article.DatePublished = HtmlDownloader.removeHtml(nodeTime.InnerText); } HtmlNode nodeShortContent = itemNode.SelectSingleNode(itemNode.XPath + "//h3[@class='h3Lead']"); if (nodeShortContent != null) { //cut the related link out int cutIndex = nodeShortContent.InnerHtml.IndexOf("<br"); string shortContent = nodeShortContent.InnerHtml; //if find the trash if (cutIndex > 0) { shortContent = nodeShortContent.InnerHtml.Substring(0, cutIndex); } //modify the trash in the content shortContent = shortContent.Replace(">", ""); article.ShortContent = HtmlDownloader.removeHtml(shortContent); } plist.Add(article); } } catch (Exception ex) { MessageBox.Show("System got an error at LoadItemsFromPage function with message:\n" + ex.Message); return; } }
//hàm lấy một list các article từ 1 url của category public static async Task LoadItemsFromPage(NewsItemList plist, string url, string source, NewsItem hottest = null) { try { if (!url.Contains("http://")) { url = "http://dantri.com.vn" + url; } plist.Clear(); if (hottest != null) { hottest.Source = source; } string html = await HtmlDownloader.loadFromUrl(url); HtmlDocument page = new HtmlDocument(); page.LoadHtml(html); var allItemNodes = page.DocumentNode.SelectNodes("//div[@class='mt3 clearfix']"); if (getHottestNew(page, hottest)) { if (hottest != null) { plist.Add(hottest); } } if (allItemNodes == null) { return; } foreach (HtmlNode itemNode in allItemNodes) { NewsItem article = new NewsItem(); article.Source = source; HtmlNode nodeImage = itemNode.SelectSingleNode(itemNode.XPath + "//img[@src]"); if (nodeImage != null) { article.ImageLink = nodeImage.Attributes["src"].Value; } var nodeTitle = itemNode.SelectNodes(itemNode.XPath + "//a"); foreach (HtmlNode node in nodeTitle) { if (node.Attributes.Contains("href")) { if (article.LinkUrl == null || article.LinkUrl == "") { article.LinkUrl = node.Attributes["href"].Value; } } if (node.Attributes.Contains("title")) { if (article.Name == null || article.Name == "") { article.Name = HtmlDownloader.removeHtml(node.Attributes["title"].Value); } } if (!node.InnerHtml.Contains("<img")) { if (node.InnerText.Length > 10) { if (article.Name == null || article.Name == "") { article.Name = HtmlDownloader.removeHtml(node.InnerText); } } } } if (article.LinkUrl.Contains("tuyensinh.dantri")) { continue; } HtmlNode nodeShortContent = itemNode.SelectSingleNode(itemNode.XPath + "//div[@class='fon5 wid324 fl']"); if (nodeShortContent == null) { nodeShortContent = itemNode.SelectSingleNode(itemNode.XPath + "//div[@class='fon5 fl']"); } if (nodeShortContent != null) { //cut the related link out int cutIndex = nodeShortContent.InnerHtml.IndexOf("<br"); string shortContent = nodeShortContent.InnerHtml; //if find the trash if (cutIndex > 0) { shortContent = nodeShortContent.InnerHtml.Substring(0, cutIndex); } //modify the trash in the content shortContent = shortContent.Replace(">", ""); article.ShortContent = HtmlDownloader.removeHtml(shortContent); } plist.Add(article); } } catch (Exception ex) { MessageBox.Show("System got an error at LoadItemsFromPage function with message:\n" + ex.Message); return; } }
public static async Task LoadItemsFromTag(NewsItemList plist, ItemTag tag) { await LoadItemsFromPage(plist, tag.Link, "Dantri", null); }