Exemplo n.º 1
0
 public static async Task LoadItemsFromTag(NewsItemList plist, ItemTag tag)
 {
     if (tag == null)
     {
         return;
     }
     await DoSearch(plist, tag.Title);
 }
Exemplo n.º 2
0
        public static async Task DoSearch(NewsItemList plist, string keyword)
        {
            try
            {
                plist.Clear();
                string url  = "http://timkiem.vnexpress.net/?q=" + keyword;
                string html = await HtmlDownloader.loadFromUrl(url);

                HtmlDocument page = new HtmlDocument();
                page.LoadHtml(html);
                var allResultNodes = page.DocumentNode.SelectNodes("//li[@class='block_search_result_text']");
                if (allResultNodes == null)
                {
                    MessageBox.Show("Sorry, no articles found!");
                    return;
                }
                foreach (HtmlNode itemNode in allResultNodes)
                {
                    NewsItem article = new NewsItem();
                    article.Source = "Vnexpress";
                    HtmlNode nodeLink  = itemNode.SelectSingleNode(itemNode.XPath + "//a[@href]");
                    HtmlNode nodeImage = itemNode.SelectSingleNode(itemNode.XPath + "//img[@src]");
                    if (nodeImage != null)
                    {
                        article.ImageLink = nodeImage.Attributes["src"].Value;
                    }
                    article.Name    = HtmlDownloader.removeHtml(nodeLink.Attributes["alt"].Value);
                    article.LinkUrl = nodeLink.Attributes["href"].Value;
                    HtmlNode nodeTime = itemNode.SelectSingleNode(itemNode.XPath + "//p[@class='txt_gray txt_11 ex_hi']");
                    article.DatePublished = HtmlDownloader.removeHtml(nodeTime.InnerHtml);
                    HtmlNode nodeShortContent = itemNode.SelectSingleNode(itemNode.XPath + "//span[@class='hightlight']");
                    article.ShortContent = HtmlDownloader.removeHtml(nodeShortContent.InnerText);
                    plist.Add(article);
                }
                if (plist.Count == 0)
                {
                    MessageBox.Show("Sorry, no articles found!");
                }
            }
            catch (Exception ex)
            {
                MessageBox.Show("System got an error at DoSearch function with message:\n" + ex.Message);
                return;
            }
        }
Exemplo n.º 3
0
        public static async Task DoSearch(NewsItemList plist, string keyword)
        {
            try
            {
                plist.Clear();
                string url = "http://search.dantri.com.vn/SearchResult.aspx?s=" + keyword + "&PageIndex=1";
                await LoadItemsFromPage(plist, url, "Dantri", null);

                if (plist.Count == 0)
                {
                    MessageBox.Show("Sorry, no articles found!");
                }
            }
            catch (Exception ex)
            {
                MessageBox.Show("System got an error at DoSearch function with message:\n" + ex.Message);
                return;
            }
        }
Exemplo n.º 4
0
        //load articles from source, category, url
        public static async Task LoadItemsFromPage(NewsItemList plist, string url, string source, NewsItem hottest = null)
        {
            try
            {
                if (!url.Contains("vnexpress.net"))
                {
                    url = "http://vnexpress.net" + url;
                }
                plist.Clear();
                if (hottest != null)
                {
                    hottest.Source = source;
                }
                string html = await HtmlDownloader.loadFromUrl(url);

                HtmlDocument page = new HtmlDocument();
                page.LoadHtml(html);
                var allItemNodes = page.DocumentNode.SelectNodes("//div[@class='folder-news']");
                getHottestNew(page, hottest);
                if (hottest != null)
                {
                    plist.Add(hottest);
                }
                foreach (HtmlNode itemNode in allItemNodes)
                {
                    NewsItem article = new NewsItem();
                    article.Source = source;
                    HtmlNode nodeImage = itemNode.SelectSingleNode(itemNode.XPath + "//img[@src]");
                    if (nodeImage != null)
                    {
                        article.ImageLink = nodeImage.Attributes["src"].Value;
                    }
                    HtmlNode nodeTitle = itemNode.SelectSingleNode(itemNode.XPath + "//a[@class='link-title14' and @href]");
                    article.Name    = HtmlDownloader.removeHtml(nodeTitle.InnerText);
                    article.LinkUrl = nodeTitle.Attributes["href"].Value;
                    HtmlNode nodeTime = itemNode.SelectSingleNode(itemNode.XPath + "//span[@class='timeListHome']");
                    if (nodeTime != null)
                    {
                        article.DatePublished = HtmlDownloader.removeHtml(nodeTime.InnerText);
                    }
                    HtmlNode nodeShortContent = itemNode.SelectSingleNode(itemNode.XPath + "//h3[@class='h3Lead']");
                    if (nodeShortContent != null)
                    {
                        //cut the related link out
                        int cutIndex = nodeShortContent.InnerHtml.IndexOf("<br");

                        string shortContent = nodeShortContent.InnerHtml;
                        //if find the trash
                        if (cutIndex > 0)
                        {
                            shortContent = nodeShortContent.InnerHtml.Substring(0, cutIndex);
                        }
                        //modify the trash in the content
                        shortContent         = shortContent.Replace("&gt;", "");
                        article.ShortContent = HtmlDownloader.removeHtml(shortContent);
                    }
                    plist.Add(article);
                }
            }
            catch (Exception ex)
            {
                MessageBox.Show("System got an error at LoadItemsFromPage function with message:\n" + ex.Message);
                return;
            }
        }
Exemplo n.º 5
0
        //hàm lấy một list các article từ 1 url của category
        public static async Task LoadItemsFromPage(NewsItemList plist, string url, string source, NewsItem hottest = null)
        {
            try
            {
                if (!url.Contains("http://"))
                {
                    url = "http://dantri.com.vn" + url;
                }
                plist.Clear();
                if (hottest != null)
                {
                    hottest.Source = source;
                }
                string html = await HtmlDownloader.loadFromUrl(url);

                HtmlDocument page = new HtmlDocument();
                page.LoadHtml(html);
                var allItemNodes = page.DocumentNode.SelectNodes("//div[@class='mt3 clearfix']");

                if (getHottestNew(page, hottest))
                {
                    if (hottest != null)
                    {
                        plist.Add(hottest);
                    }
                }
                if (allItemNodes == null)
                {
                    return;
                }
                foreach (HtmlNode itemNode in allItemNodes)
                {
                    NewsItem article = new NewsItem();
                    article.Source = source;
                    HtmlNode nodeImage = itemNode.SelectSingleNode(itemNode.XPath + "//img[@src]");
                    if (nodeImage != null)
                    {
                        article.ImageLink = nodeImage.Attributes["src"].Value;
                    }
                    var nodeTitle = itemNode.SelectNodes(itemNode.XPath + "//a");
                    foreach (HtmlNode node in nodeTitle)
                    {
                        if (node.Attributes.Contains("href"))
                        {
                            if (article.LinkUrl == null || article.LinkUrl == "")
                            {
                                article.LinkUrl = node.Attributes["href"].Value;
                            }
                        }
                        if (node.Attributes.Contains("title"))
                        {
                            if (article.Name == null || article.Name == "")
                            {
                                article.Name = HtmlDownloader.removeHtml(node.Attributes["title"].Value);
                            }
                        }
                        if (!node.InnerHtml.Contains("<img"))
                        {
                            if (node.InnerText.Length > 10)
                            {
                                if (article.Name == null || article.Name == "")
                                {
                                    article.Name = HtmlDownloader.removeHtml(node.InnerText);
                                }
                            }
                        }
                    }
                    if (article.LinkUrl.Contains("tuyensinh.dantri"))
                    {
                        continue;
                    }

                    HtmlNode nodeShortContent = itemNode.SelectSingleNode(itemNode.XPath + "//div[@class='fon5 wid324 fl']");
                    if (nodeShortContent == null)
                    {
                        nodeShortContent = itemNode.SelectSingleNode(itemNode.XPath + "//div[@class='fon5 fl']");
                    }
                    if (nodeShortContent != null)
                    {
                        //cut the related link out
                        int    cutIndex     = nodeShortContent.InnerHtml.IndexOf("<br");
                        string shortContent = nodeShortContent.InnerHtml;
                        //if find the trash
                        if (cutIndex > 0)
                        {
                            shortContent = nodeShortContent.InnerHtml.Substring(0, cutIndex);
                        }
                        //modify the trash in the content
                        shortContent         = shortContent.Replace("&gt;", "");
                        article.ShortContent = HtmlDownloader.removeHtml(shortContent);
                    }
                    plist.Add(article);
                }
            }
            catch (Exception ex)
            {
                MessageBox.Show("System got an error at LoadItemsFromPage function with message:\n" + ex.Message);
                return;
            }
        }
Exemplo n.º 6
0
 public static async Task LoadItemsFromTag(NewsItemList plist, ItemTag tag)
 {
     await LoadItemsFromPage(plist, tag.Link, "Dantri", null);
 }