コード例 #1
0
ファイル: FrmKindleSpider.cs プロジェクト: chcaty/Caty.Zone
        private void Timer1_Elapsed(object sender, System.Timers.ElapsedEventArgs e)
        {
            //得到 hour minute second 如果等于某个值就开始执行程序
            int intHour = e.SignalTime.Hour;
            int intMinute = e.SignalTime.Minute;
            int intSecond = e.SignalTime.Second;
            int iHour = 1, iMinute = 1;

            //定制时间 如 在10:30:00的时候执行某个函数
            if (Convert.ToBoolean(ConnectionStrings.GetArgsValue("IsSql")))
            {
                //iHour = Convert.ToInt32(argsDal.LoadEntities(b => true).First().Hour);
                //iMinute = Convert.ToInt32(argsDal.LoadEntities(b => true).First().Minute);
            }
            else
            {
                iHour   = Convert.ToInt32(ConnectionStrings.GetArgsValue("Hour").Trim());
                iMinute = Convert.ToInt32(ConnectionStrings.GetArgsValue("Minute").Trim());
            }
            int iSecond = 00;

            spiderTask.Start();
            // 设置  每秒钟的开始执行一次
            if (intHour == iHour && intMinute == iMinute)
            {
                //SetMessage("每秒钟的开始执行一次!");
                spiderTask.Start();
            }
            else
            {
                this.BeginInvoke(new MethodInvoker(() =>
                {
                    txtLog.Text += String.Format("当前时间:{0},爬虫将在{1}启动\r\n", DateTime.Now.ToString(), iHour + ":" + iMinute);
                }));
            }
            //设置时间 开始执行程序
            //if (intHour == iHour && intMinute == iMinute && intSecond == iSecond)
            //{
            //    SetMessage("定时测试~~~");
            //}
        }
コード例 #2
0
ファイル: FrmConfig.cs プロジェクト: chcaty/Caty.Zone
        private void GetSetArgs()
        {
            SpiderArgs args = new SpiderArgs
            {
                Hour       = ConnectionStrings.GetArgsValue("Hour"),
                Minute     = ConnectionStrings.GetArgsValue("Minute"),
                SpiderType = Convert.ToInt32(ConnectionStrings.GetArgsValue("SpiderType"))
            };
            bool IsSpl;

            if (Convert.ToBoolean(ConnectionStrings.GetArgsValue("IsSql")))
            {
                // args = ArgsDal.LoadEntities(b => true).First();
                IsSpl = true;
            }
            else
            {
                IsSpl = false;
            }
            this.BeginInvoke(new MethodInvoker(() =>
            {
                checkIsSql.Checked = IsSpl;
                if (args.SpiderType == 1)
                {
                    rbtnDay.Checked = true;
                    numHour.Value   = Convert.ToDecimal(args.Hour.Trim());
                    numMinute.Value = Convert.ToDecimal(args.Minute.Trim());
                }
                else
                {
                    rbtnHour.Checked = true;
                    numMinute2.Value = Convert.ToDecimal(args.Minute.Trim());
                }
                SetSetControlEnable(false);
            }));
        }
コード例 #3
0
ファイル: FrmKindleSpider.cs プロジェクト: chcaty/Caty.Zone
        public void KindleCrawler()
        {
            var Url           = "http://mebook.cc/";
            var kindleCrawler = new SimpleCrawler();

            kindleCrawler.OnStart += (s, e) =>
            {
                Console.WriteLine("爬虫开始抓取地址:" + e.Uri.ToString());
                SetMessage("爬虫开始抓取地址:" + e.Uri.ToString());
            };
            kindleCrawler.OnError += (s, e) =>
            {
                Console.WriteLine("爬虫抓取出现错误:" + e.Uri.ToString() + ",异常消息:" + e.Exception.Message);
                SetMessage("爬虫抓取出现错误: " + e.Uri.ToString() + ",异常消息:" + e.Exception.Message + "时间:" + DateTime.Now.ToString());
            };
            kindleCrawler.OnCompleted += (s, e) =>
            {
                var dom  = htmlParser.Parse(e.PageSource);
                var link = dom.QuerySelectorAll("div.pagenavi");
                var temp = GetPageList(link);
                //var temp = new List<string>() { "http://mebook.cc/page/2"  };
                foreach (var t in temp)
                {
                    BookCrawler(t);
                    foreach (var b in bookList)
                    {
                        string url = b.BookLink;
                        BookDetailCrawler(b);
                    }
                    foreach (var b in bookList)
                    {
                        string url = b.DownloadLink;
                        if (!String.IsNullOrEmpty(url))
                        {
                            BookDownloadCrawler(b);
                        }
                    }
                    if (Convert.ToBoolean(ConnectionStrings.GetArgsValue("IsSql").Trim()))
                    {
                        //bookDal.SaveChange();
                    }
                    else
                    {
                        DataTable dt = ListToDataTable.ToDataTable <Book>(bookList);
                        //excelHelper.DataTableToExcel(dt, "第" + Convert.ToString(temp.IndexOf(t) + 1) + "页全部书籍信息", true);

                        this.BeginInvoke(new MethodInvoker(() =>
                        {
                            filePath = dirPath + "/Kindle资源爬虫第" + Convert.ToString(temp.IndexOf(t) + 1) + "页书籍信息.xlsx";
                            CreateExcelFile();
                            ExcelHelper excelHelper = new ExcelHelper(filePath);
                            excelHelper.DataTableToExcel(dt, "第" + Convert.ToString(temp.IndexOf(t) + 1) + "页全部书籍信息", true);
                        }));

                        //DataExcel.DataTableToExcel("/第" + Convert.ToString(temp.IndexOf(t) + 1) + "页全部书籍信息.xls", dt, true);
                    }
                    bookList.Clear();
                }
                Console.WriteLine("爬虫抓取任务完成!合计 " + link.Length + " 个页面。");
                SetMessage("爬虫抓取任务完成!合计 " + link.Length + " 个页面。");
                Console.WriteLine("爬虫抓取任务完成!合计 " + count + " 个书籍。");
                SetMessage("爬虫抓取任务完成!合计 " + count + " 个书籍。");
                Console.WriteLine("耗时:" + e.Milliseconds + "毫秒");
                SetMessage("耗时:" + e.Milliseconds + "毫秒");
                Console.WriteLine("线程:" + e.ThreadId);
                SetMessage("线程:" + e.ThreadId);
                Console.WriteLine("地址:" + e.Uri.ToString());
                SetMessage("地址:" + e.Uri.ToString());
                Console.WriteLine("===============================================");
                SetMessage("===============================================");
            };
            kindleCrawler.Start(new Uri(Url)).Wait();//没被封锁就别使用代理:60.221.50.118:8090
        }
コード例 #4
0
ファイル: FrmKindleSpider.cs プロジェクト: chcaty/Caty.Zone
        public void BookDownloadCrawler(Book book)
        {
            var Url = book.DownloadLink;
            var bookdownloadCrawler = new SimpleCrawler();

            bookdownloadCrawler.OnStart += (s, e) =>
            {
                Console.WriteLine("爬虫开始抓取地址:" + e.Uri.ToString());
                SetMessage("爬虫开始抓取地址:" + e.Uri.ToString());
            };
            bookdownloadCrawler.OnError += (s, e) =>
            {
                Console.WriteLine("爬虫抓取出现错误:" + e.Uri.ToString() + ",异常消息:" + e.Exception.Message);
                SetMessage("爬虫抓取出现错误: " + e.Uri.ToString() + ",异常消息:" + e.Exception.Message + "时间:" + DateTime.Now.ToString());
            };
            bookdownloadCrawler.OnCompleted += (s, e) =>
            {
                //使用正则表达式清洗网页源代码中的数据
                var downloaddom      = htmlParser.Parse(e.PageSource);
                var downloadlinkinfo = downloaddom.QuerySelectorAll("div.list");
                foreach (var Info in downloadlinkinfo)
                {
                    List <string> linklist = new List <string>();
                    Info.QuerySelectorAll("a").ToList().ForEach(
                        a =>
                    {
                        var onlineURL = a.GetAttribute("href");
                        linklist.Add(onlineURL);
                        //book.DownloadLink = onlineURL;
                        //bookList.Find(b => b.BookLink.Equals(Url)).DownloadLink = onlineURL;
                    });
                    book.DownloadLink_BDYP = linklist[0];
                    book.DownloadLink_CTWP = linklist.Count > 1 ? linklist[1] : String.Empty;
                    book.DownloadLink_TYYP = linklist.Count > 2 ? linklist[2] : String.Empty;
                }
                var      downloadpwdinfo = downloaddom.QuerySelectorAll("div.desc p").ToList();
                var      info            = downloadpwdinfo[downloadpwdinfo.Count - 3].InnerHtml;
                string[] str             = info.Split(':');
                book.DownloadPsw_BDYP = str.Length > 2 ? str[2].Substring(0, 4) : String.Empty;
                book.DownloadPsw_TYYP = str.Length > 3 ? str[3].Substring(0, 4) : String.Empty;
                if (Convert.ToBoolean(ConnectionStrings.GetArgsValue("IsSql").Trim()))
                {
                    //if (!bookDal.IsExist(book))
                    //{
                    //    bookDal.AddEntity(book);
                    //}
                    //else
                    //{
                    //    Book oldbook = bookDal.LoadEntities(b => b.BookName == book.BookName).First();
                    //    book.BookId = oldbook.BookId;
                    //    bookDal.EditEntity(book);
                    //}
                    sqliteDb.Insert(book);
                }
                Console.WriteLine(book.BookName + "下载链接抓取任务完成!");
                SetMessage(book.BookName + "下载链接抓取任务完成!");
                Console.WriteLine("耗时:" + e.Milliseconds + "毫秒");
                SetMessage("耗时:" + e.Milliseconds + "毫秒");
                Console.WriteLine("线程:" + e.ThreadId);
                SetMessage("线程:" + e.ThreadId);
                Console.WriteLine("地址:" + e.Uri.ToString());
                SetMessage("地址:" + e.Uri.ToString());
                Console.WriteLine("===============================================");
                SetMessage("===============================================");
                Thread.Sleep(1000);
            };
            bookdownloadCrawler.Start(new Uri(Url)).Wait();//没被封锁就别使用代理:60.221.50.118:8090
        }