예제 #1
0
        private void Form1_Load(object sender, EventArgs e)
        {
            pageInfo page = new pageInfo();

            page.FirstPageUrl = "https://dichvubds.vn/nha-dat-ban-ha-noi/page/1";
            page.PageURL      = "https://dichvubds.vn/nha-dat-ban-ha-noi/page/{0}";
            //
            crawler robot = new crawler(page);

            robot.start();
        }
예제 #2
0
        static void Main(string[] args)
        {
            //testMarkdown();
            //return;

            //string fileDV = Path.Combine(Application.StartupPath, "posts_exist.csv");
            //lstExistPost =  File.ReadAllLines(fileDV).ToList() ;

            //userhelper.CreateUser();
            //userhelper.PostUser();
            //return;

            //string url = "http://localhost:4567/api/v2/topics/142";
            //string apiKey = "Bearer f440c24f-a7b1-401e-a788-31c1dc1df19c";
            //string jsonContent = "{\"content\": \"Nội dung câu trả lời\"}";
            //JObject obj = util.HTTP_POST_JSON(url, apiKey, jsonContent);
            //return;


            List <pageInfo> pages = new List <pageInfo>();

            //< option value = "1" > Bảo hiểm y tế </ option >
            //<option value="2">Bảo hiểm xã hội</option>
            //<option value="3">Bảo hiểm thất nghiệp</option>
            //<option value="4">Hỏi đáp các vấn đề khác</option>
            //<option selected="selected" value="5">Hưu trí</option>
            //<option value="6">Tử tuất</option>
            //<option value="7">Ốm đau thai sản</option>
            //<option value="8">Tai nạn lao động, bệnh nghề nghiệp</option>
            //<option value="9">Sổ BHXH</option>
            //<option value="10">Thẻ BHYT</option>
            //<option value="11">KCB BHYT</option>
            //<option value="12">BHXH 1 lần</option>

            pageInfo page;

            page              = new pageInfo();
            page.PageId       = "baohiemxahoi.gov.vn";
            page.FirstPageUrl = "https://baohiemxahoi.gov.vn/tintuc/Pages/hoat-dong-bhxh-viet-nam.aspx?CateID=136&date=&Page=1";
            page.PageURL      = "https://baohiemxahoi.gov.vn/tintuc/Pages/hoat-dong-bhxh-viet-nam.aspx?CateID=136&date=&Page={0}";
            pages.Add(page);

            page              = new pageInfo();
            page.PageId       = "baohiemxahoi.gov.vn";
            page.FirstPageUrl = "https://baohiemxahoi.gov.vn/tintuc/Pages/hoat-dong-bhxh-viet-nam.aspx?CateID=52&date=&Page=1";
            page.PageURL      = "https://baohiemxahoi.gov.vn/tintuc/Pages/hoat-dong-bhxh-viet-nam.aspx?CateID=52&date=&Page={0}";
            pages.Add(page);

            page              = new pageInfo();
            page.PageId       = "baohiemxahoi.gov.vn";
            page.FirstPageUrl = "https://baohiemxahoi.gov.vn/tintuc/Pages/cai-cach-thu-tuc-hanh-chinh.aspx?CateID=59&date=&Page=1";
            page.PageURL      = "https://baohiemxahoi.gov.vn/tintuc/Pages/cai-cach-thu-tuc-hanh-chinh.aspx?CateID=59&date=&Page={0}";
            pages.Add(page);


            page              = new pageInfo();
            page.PageId       = "baohiemxahoi.gov.vn";
            page.FirstPageUrl = "https://baohiemxahoi.gov.vn/tintuc/pages/luat-bhxh-bhyt-bat-buoc.aspx?CateID=53&date=&Page=1";
            page.PageURL      = "https://baohiemxahoi.gov.vn/tintuc/pages/luat-bhxh-bhyt-bat-buoc.aspx?CateID=53&date=&Page={0}";
            pages.Add(page);


            //page = new pageInfo();
            //page.PageId = "bhxhdanang.gov.vn";
            //page.FirstPageUrl = "http://www.bhxhdanang.gov.vn/HoiDap.aspx?&Page=0";
            //page.PageURL = "http://www.bhxhdanang.gov.vn/HoiDap.aspx?&Page={0}";
            //pages.Add(page);

            //Tin tức
            //page = new pageInfo();
            //page.PageId = "ketoanthienung.org";
            //page.FirstPageUrl = "http://ketoanthienung.org/tin-tuc/nhung-diem-moi-ve-bhxh-bhyt-bhtn-kpcd.htm_p1";
            //page.PageURL = "http://ketoanthienung.org/tin-tuc/nhung-diem-moi-ve-bhxh-bhyt-bhtn-kpcd.htm_p{0}";
            //pages.Add(page);

            ////

            //page = new pageInfo();
            //page.PageId = "bhxhhn.com.vn";
            //page.FirstPageUrl = "http://bhxhhn.com.vn/hoidap/tabid/245/TopMenuId/48/cMenu/48/stParentMenuId/48/Default.aspx?index=1&pkcm=1&numberpage=10";
            //page.PageURL = "http://bhxhhn.com.vn/hoidap/tabid/245/TopMenuId/48/cMenu/48/stParentMenuId/48/Default.aspx?index={0}&pkcm=1&numberpage=10";
            //pages.Add(page);

            //page = new pageInfo();
            //page.PageId = "bhxhhn.com.vn";
            //page.FirstPageUrl = "http://bhxhhn.com.vn/hoidap/tabid/245/TopMenuId/48/cMenu/48/stParentMenuId/48/Default.aspx?index=1&pkcm=2&numberpage=10";
            //page.PageURL = "http://bhxhhn.com.vn/hoidap/tabid/245/TopMenuId/48/cMenu/48/stParentMenuId/48/Default.aspx?index={0}&pkcm=2&numberpage=10";
            //pages.Add(page);

            //page = new pageInfo();
            //page.PageId = "bhxhhn.com.vn";
            //page.FirstPageUrl = "http://bhxhhn.com.vn/hoidap/tabid/245/TopMenuId/48/cMenu/48/stParentMenuId/48/Default.aspx?index=1&pkcm=3&numberpage=10";
            //page.PageURL = "http://bhxhhn.com.vn/hoidap/tabid/245/TopMenuId/48/cMenu/48/stParentMenuId/48/Default.aspx?index={0}&pkcm=3&numberpage=10";
            //pages.Add(page);

            //page = new pageInfo();
            //page.PageId = "bhxhhn.com.vn";
            //page.FirstPageUrl = "http://bhxhhn.com.vn/hoidap/tabid/245/TopMenuId/48/cMenu/48/stParentMenuId/48/Default.aspx?index=1&pkcm=4&numberpage=10";
            //page.PageURL = "http://bhxhhn.com.vn/hoidap/tabid/245/TopMenuId/48/cMenu/48/stParentMenuId/48/Default.aspx?index={0}&pkcm=4&numberpage=10";
            //pages.Add(page);

            //page = new pageInfo();
            //page.PageId = "bhxhhn.com.vn";
            //page.FirstPageUrl = "http://bhxhhn.com.vn/hoidap/tabid/245/TopMenuId/48/cMenu/48/stParentMenuId/48/Default.aspx?index=1&pkcm=5&numberpage=10";
            //page.PageURL = "http://bhxhhn.com.vn/hoidap/tabid/245/TopMenuId/48/cMenu/48/stParentMenuId/48/Default.aspx?index={0}&pkcm=5&numberpage=10";
            //pages.Add(page);

            //page = new pageInfo();
            //page.PageId = "bhxhhn.com.vn";
            //page.FirstPageUrl = "http://bhxhhn.com.vn/hoidap/tabid/245/TopMenuId/48/cMenu/48/stParentMenuId/48/Default.aspx?index=1&pkcm=6&numberpage=10";
            //page.PageURL = "http://bhxhhn.com.vn/hoidap/tabid/245/TopMenuId/48/cMenu/48/stParentMenuId/48/Default.aspx?index={0}&pkcm=6&numberpage=10";
            //pages.Add(page);

            //page = new pageInfo();
            //page.PageId = "bhxhhn.com.vn";
            //page.FirstPageUrl = "http://bhxhhn.com.vn/hoidap/tabid/245/TopMenuId/48/cMenu/48/stParentMenuId/48/Default.aspx?index=1&pkcm=7&numberpage=10";
            //page.PageURL = "http://bhxhhn.com.vn/hoidap/tabid/245/TopMenuId/48/cMenu/48/stParentMenuId/48/Default.aspx?index={0}&pkcm=7&numberpage=10";
            //pages.Add(page);

            //page = new pageInfo();
            //page.PageId = "bhxhhn.com.vn";
            //page.FirstPageUrl = "http://bhxhhn.com.vn/hoidap/tabid/245/TopMenuId/48/cMenu/48/stParentMenuId/48/Default.aspx?index=1&pkcm=8&numberpage=10";
            //page.PageURL = "http://bhxhhn.com.vn/hoidap/tabid/245/TopMenuId/48/cMenu/48/stParentMenuId/48/Default.aspx?index={0}&pkcm=8&numberpage=10";
            //pages.Add(page);

            //page = new pageInfo();
            //page.PageId = "bhxhhn.com.vn";
            //page.FirstPageUrl = "http://bhxhhn.com.vn/hoidap/tabid/245/TopMenuId/48/cMenu/48/stParentMenuId/48/Default.aspx?index=1&pkcm=9&numberpage=10";
            //page.PageURL = "http://bhxhhn.com.vn/hoidap/tabid/245/TopMenuId/48/cMenu/48/stParentMenuId/48/Default.aspx?index={0}&pkcm=9&numberpage=10";
            //pages.Add(page);

            //page = new pageInfo();
            //page.PageId = "bhxhhn.com.vn";
            //page.FirstPageUrl = "http://bhxhhn.com.vn/hoidap/tabid/245/TopMenuId/48/cMenu/48/stParentMenuId/48/Default.aspx?index=1&pkcm=10&numberpage=10";
            //page.PageURL = "http://bhxhhn.com.vn/hoidap/tabid/245/TopMenuId/48/cMenu/48/stParentMenuId/48/Default.aspx?index={0}&pkcm=10&numberpage=10";
            //pages.Add(page);

            //page = new pageInfo();
            //page.PageId = "bhxhhn.com.vn";
            //page.FirstPageUrl = "http://bhxhhn.com.vn/hoidap/tabid/245/TopMenuId/48/cMenu/48/stParentMenuId/48/Default.aspx?index=1&pkcm=11&numberpage=10";
            //page.PageURL = "http://bhxhhn.com.vn/hoidap/tabid/245/TopMenuId/48/cMenu/48/stParentMenuId/48/Default.aspx?index={0}&pkcm=11&numberpage=10";
            //pages.Add(page);

            //page = new pageInfo();
            //page.PageId = "bhxhhn.com.vn";
            //page.FirstPageUrl = "http://bhxhhn.com.vn/hoidap/tabid/245/TopMenuId/48/cMenu/48/stParentMenuId/48/Default.aspx?index=1&pkcm=12&numberpage=10";
            //page.PageURL = "http://bhxhhn.com.vn/hoidap/tabid/245/TopMenuId/48/cMenu/48/stParentMenuId/48/Default.aspx?index={0}&pkcm=12&numberpage=10";
            //pages.Add(page);


            //
            Console.WriteLine("Start crawler: " + DateTime.Now.ToString("dd/MM/yyyy HH:mm:ss"));
            Console.WriteLine("-----");

            bool exit = false;

            while (!exit)
            {
                foreach (pageInfo pInfo in pages)
                {
                    crawler robot = new crawler(pInfo);

                    if (pInfo.PageId == "ketoanthienung.org")
                    {
                        robot.start_kttu();
                    }
                    else if (pInfo.PageId == "bhxhhn.com.vn")
                    {
                        robot.start_bhhn();
                    }
                    else if (pInfo.PageId == "bhxhdanang.gov.vn")
                    {
                        robot.start_bhdn();
                    }
                    else if (pInfo.PageId == "baohiemxahoi.gov.vn")
                    {
                        robot.start_bhxhgovvn();
                    }

                    //string sexit = Console.ReadLine();
                    //if (sexit == "exit")
                    //    exit = true;
                    //else
                    Thread.Sleep(6000);//
                }
            }
        }
예제 #3
0
파일: crawler.cs 프로젝트: hiepdh/crawler
 public crawler(pageInfo pageInfo)
 {
     this.pageInfo = pageInfo;
 }