예제 #1
0
        static public void Crawl_Test()
        {
            List <string> DsPosts_Link_Test = new List <string>();
            List <string> DsPosts_Link      = new List <string>();
            Posts         p = new Posts();

            // số thứ tự của news
            // test nes ở đây

            p = DS_Get_Link.Get(45);


            //
            string[] Arr_Link = p.url.Split(',');
            foreach (var item in Arr_Link)
            {
                if (item != "")
                {
                    DsPosts_Link_Test.Add(item);
                }
            }
            // lọc những link trùng
            DsPosts_Link = DsPosts_Link_Test.Distinct().ToList();

            //crawl
            //  Console.WriteLine("test getlink o day");
            int dem = 0;

            DsPosts_Link.ForEach(Link =>
            {
                #region Crawl

                if (Link != "")
                {
                    Posts post            = new Posts();
                    CrawlTotal crawlTotal = new CrawlTotal();

                    // check ở đây
                    post = crawlTotal.Crawl(Link);

                    Thread.Sleep(500);

                    if (post.Image != "" && post.Ngay == 28)
                    {
                        dem++;
                    }

                    //if (post.Image != "")
                    //{
                    //    dem++;
                    //}
                }
                #endregion
            });


            dem++;
            p.Crawl("http://news.ltn.com.tw/news/society/breakingnews/2401545");
        }
예제 #2
0
        public Posts Crawl(string Link)
        {
            Posts p = new Posts();

            if (Link != "")
            {
                #region Danh sách link


                if (Link.Contains("http://news.ltn.com.tw/news/entertainment"))
                {
                    p.Crawl_Entertainment(Link);
                }
                else if (Link.Contains("http://news.ltn.com.tw/news/sports"))
                {
                    p.Crawl_Sports(Link);
                }
                else if (Link.Contains("http://istyle.ltn.com.tw/article"))
                {
                    p.Crawl_Style(Link);
                }
                else if (Link.Contains("http://opinion.chinatimes.com/"))
                {
                    p.Crawl_Chinatimes_Opinion(Link);
                }
                else if (Link.Contains("http://www.chinatimes.com/realtimenews/"))
                {
                    p.Crawl_Chinatimes_Opinion(Link);
                }
                else if (Link.Contains("http://www.chinatimes.com/realtimenews/"))
                {
                    p.Crawl_Chinatimes_Opinion(Link);
                }
                else if (Link.Contains("http://culture.dwnews.com"))
                {
                    p.Crawl_DWnews(Link);
                }
                else if (Link.Contains("https://udn.com/news/story/"))
                {
                    p.Crawl_UDN_Sports(Link);
                }
                else if (Link.Contains("http://news.sina.com.cn"))
                {
                    p.Crawl_sina(Link);
                }
                else if (Link.Contains("http://mil.news.sina.com.cn/china"))
                {
                    p.Crawl_Military(Link);
                }
                else if (Link.Contains("http://mil.news.sina.com.cn"))
                {
                    p.Crawl_Military_International(Link);
                }
                else if (Link.Contains("http://sports.sina.com.cn"))
                {
                    p.Crawl_Sport_Sina(Link);
                }
                else if (Link.Contains("http://www.aboluowang.com"))
                {
                    p.Crawl_abo(Link);
                }

                else if (Link.Contains("http://www.cna.com.tw"))
                {
                    //http://www.cna.com.tw
                    p.Crawl_Cna(Link);
                }
                else if (Link.Contains("http://sports.khan.co.kr/news"))
                {
                    // this.dgvPost.DefaultCellStyle.Font = new Font("Baekmuk Headline Regular", 10);
                    p.Crawl_Khan(Link);
                }

                else if (Link.Contains("http://www.cwbst.com"))
                {
                    p.Crawl_Cwbst(Link);
                }
                else if (Link.Contains("http://www.setn.com/News.aspx?NewsID"))
                {
                    string node = "@id='Content1'";
                    p.Crawl_Setn(Link, node);
                }

                else if (Link.Contains("http://www.setn.com/E/News.aspx?NewsID"))
                {
                    string node = "@class='Content2'";
                    p.Crawl_Setn(Link, node);
                }
                else if (Link.Contains("http://www.abc.es"))
                {
                    p.Crawl_AbcES(Link);
                }
                else if (Link.Contains("https://www.elespanol.com"))
                {
                    p.Crawl_elespanol(Link);
                }
                else if (Link.Contains("http://military.china.com/important"))
                {
                    p.Crawl_china(Link);
                }
                else if (Link.Contains("http://news.china.com"))
                {
                    p.Crawl_china(Link);
                }
                #endregion
                else if (Link.Contains("http://www.chinanews.com"))
                {
                    p.Crawl_chinanews_com(Link);
                }
                else if (Link.Contains("https://kknews.cc/entertainment"))
                {
                    p.Crawl_kknews(Link);
                }
                else if (Link.Contains("http://taiwan.huanqiu.com") || Link.Contains("http://china.huanqiu.com") || Link.Contains("http://mil.huanqiu.com/china/") || Link.Contains("http://world.huanqiu.com/article/2.html") || Link.Contains("huanqiu.com"))
                {
                    p.Crawl_huanqiu_com(Link);
                }
                else if (Link.Contains("https://www.nownews.com"))
                {
                    p.Crawl_Nownews(Link);
                }
                else if (Link.Contains("http://www.spotvnews.co.kr"))
                {
                    p.Crawl_Sportvnews(Link);
                }
                else if (Link.Contains("http://www.eldawlagia.com"))
                {
                    p.Crawl_Arabic_Eldawlagia(Link);
                }



                else
                {
                    // bao gồm tổng quan chính trị cuộc sống xã hội
                    p.Crawl(Link);
                }
            }



            return(p);
        }