Exemple #1
0
        //hàm lấy một list các article từ 1 url của category
        public static async Task LoadItemsFromPage(NewsItemList plist, string url, string source, NewsItem hottest = null)
        {
            try
            {
                if (!url.Contains("http://"))
                {
                    url = "http://dantri.com.vn" + url;
                }
                plist.Clear();
                if (hottest != null)
                {
                    hottest.Source = source;
                }
                string html = await HtmlDownloader.loadFromUrl(url);

                HtmlDocument page = new HtmlDocument();
                page.LoadHtml(html);
                var allItemNodes = page.DocumentNode.SelectNodes("//div[@class='mt3 clearfix']");

                if (getHottestNew(page, hottest))
                {
                    if (hottest != null)
                    {
                        plist.Add(hottest);
                    }
                }
                if (allItemNodes == null)
                {
                    return;
                }
                foreach (HtmlNode itemNode in allItemNodes)
                {
                    NewsItem article = new NewsItem();
                    article.Source = source;
                    HtmlNode nodeImage = itemNode.SelectSingleNode(itemNode.XPath + "//img[@src]");
                    if (nodeImage != null)
                    {
                        article.ImageLink = nodeImage.Attributes["src"].Value;
                    }
                    var nodeTitle = itemNode.SelectNodes(itemNode.XPath + "//a");
                    foreach (HtmlNode node in nodeTitle)
                    {
                        if (node.Attributes.Contains("href"))
                        {
                            if (article.LinkUrl == null || article.LinkUrl == "")
                            {
                                article.LinkUrl = node.Attributes["href"].Value;
                            }
                        }
                        if (node.Attributes.Contains("title"))
                        {
                            if (article.Name == null || article.Name == "")
                            {
                                article.Name = HtmlDownloader.removeHtml(node.Attributes["title"].Value);
                            }
                        }
                        if (!node.InnerHtml.Contains("<img"))
                        {
                            if (node.InnerText.Length > 10)
                            {
                                if (article.Name == null || article.Name == "")
                                {
                                    article.Name = HtmlDownloader.removeHtml(node.InnerText);
                                }
                            }
                        }
                    }
                    if (article.LinkUrl.Contains("tuyensinh.dantri"))
                    {
                        continue;
                    }

                    HtmlNode nodeShortContent = itemNode.SelectSingleNode(itemNode.XPath + "//div[@class='fon5 wid324 fl']");
                    if (nodeShortContent == null)
                    {
                        nodeShortContent = itemNode.SelectSingleNode(itemNode.XPath + "//div[@class='fon5 fl']");
                    }
                    if (nodeShortContent != null)
                    {
                        //cut the related link out
                        int    cutIndex     = nodeShortContent.InnerHtml.IndexOf("<br");
                        string shortContent = nodeShortContent.InnerHtml;
                        //if find the trash
                        if (cutIndex > 0)
                        {
                            shortContent = nodeShortContent.InnerHtml.Substring(0, cutIndex);
                        }
                        //modify the trash in the content
                        shortContent         = shortContent.Replace("&gt;", "");
                        article.ShortContent = HtmlDownloader.removeHtml(shortContent);
                    }
                    plist.Add(article);
                }
            }
            catch (Exception ex)
            {
                MessageBox.Show("System got an error at LoadItemsFromPage function with message:\n" + ex.Message);
                return;
            }
        }