private async Task RunTask()
        {
            var buffer = new byte[1024 * 4];
            var seg    = new ArraySegment <byte>(buffer);

            while (this.Socket.State == WebSocketState.Open)
            {
                var input = await this.Socket.ReceiveAsync(seg, CancellationToken.None);

                string tmp = Encoding.UTF8.GetString(seg.Array, 0,
                                                     input.Count);
                if (!string.IsNullOrEmpty(tmp))
                {
                    var p             = JsonHelper.ToObject <TaskRunOrStopInput>(tmp);
                    var spiderManager = ContainerManager.Resolve <SpiderManager>();
                    var config        = new SpiderConfig()
                    {
                        CallBack = (msg) =>
                        {
                            Socket.SendAsync(new ArraySegment <byte>(Encoding.UTF8.GetBytes(msg)), WebSocketMessageType.Text, true, CancellationToken.None);
                        },
                        Uris = p.Uris
                    };
                    spiderManager.RunTask(p.SpiderId, config);
                }
            }
        }
Example #2
0
 public AbstractSpider(SpiderConfig spiderConfig = null)
 {
     SpiderConfig = spiderConfig ?? new SpiderConfig(
         downloadDelay: TimeSpan.FromSeconds(1.5),
         concurrencyRequests: 10,
         retryRequests: 3
         );
 }
Example #3
0
        /// <summary>
        /// 启动爬虫
        /// </summary>
        /// <param name="crawlerId">爬虫id</param>
        /// <param name="appService">数据服务对象</param>
        /// <param name="callback">回调函数</param>
        public void RunTask(string crawlerId, SpiderConfig config)
        {
            var crawler       = GetSpiderCrawler(crawlerId);
            var spiderCrawler = (ISpiderCrawler)Activator.CreateInstance(crawler);

            spiderCrawler.InitConfig(config);
            spiderCrawler.Run();
        }
Example #4
0
        static void Main(string[] args)
        {
            DonetSpider.SaveMessage save   = SaveMessage;
            SpiderConfig            config = new SpiderConfig
            {
                MainUrl    = "http://www.dytt8.net/html/gndy/dyzz/index.html",
                HttpConfig = new HttpConfig
                {
                    Timeout = 20000
                },
                Select = new List <SelectQuery> {
                    new SelectQuery {
                        Name  = "name",
                        Query = new HtmlQuery {
                            Query = "div.co_content8 table a"
                        },
                        Select = new List <HtmlSelect> {
                            new HtmlSelect {
                                ResultKey = "name",
                                Attribute = "html"
                            },
                            new HtmlSelect {
                                ResultKey = "url",
                                Attribute = "href"
                            },
                            new HtmlSelect {
                                ResultKey = "url",
                                Attribute = "href",
                                Url       = new List <SelectQuery> {
                                    new SelectQuery {
                                        Query = new HtmlQuery {
                                            Query = "#Zoom table a",
                                        },
                                        Name   = "Details",
                                        Select = new List <HtmlSelect> {
                                            new HtmlSelect {
                                                Attribute = "href",
                                            }
                                        }
                                    },
                                    //new SelectQuery {
                                    //    Query = new HtmlQuery{
                                    //        Query = "#Zoom span",
                                    //    },
                                    //    Select = new List<HtmlSelect> {
                                    //        new HtmlSelect {
                                    //            Attribute = "html",
                                    //        },
                                    //    }

                                    //},
                                    new SelectQuery {
                                        Query = new HtmlQuery {
                                            Query = "#Zoom span img",
                                        },
                                        Select = new List <HtmlSelect> {
                                            new HtmlSelect {
                                                Attribute = "src",
                                            },
                                        }
                                    }
                                }
                            }
                        }
                    },
                },
                NextPage = new NextPage
                {
                    next = new NextPageByNext
                    {
                    }
                }
            };
            SpiderBase s = new SpiderBase(new HttpHelper(), config, save);

            s.Start();
            Console.WriteLine("完毕");
            Console.ReadLine();
        }
Example #5
0
 public void SaveSpiderConfig(SpiderConfig spiderConfig)
 {
     new SpiderConfigDB().SaveSpiderConfig(spiderConfig);
 }
 public void CaseInit(SpiderConfig config)
 {
     int error = 0;
     do
     {
         try
         {
             RegProList = new RegProListBll().GetRegProList();
             MessageCenter.ShowBox("正则数据导入完毕!", 2);
             SiteClassList = new SiteClassBll().GetBingCat(config.ClassInfoId,config.SiteInfoId);
             MessageCenter.ShowBox("更新数据导入完毕!", 2);
             ClassList = new ClassInfoBll().GetAllCatInfo();
             MessageCenter.ShowBox("分类数据导入完毕!", 2);
             ProListCount = SiteClassList.Count;
             break;
         }
         catch (Exception ex)
         {
             error++;
             Thread.Sleep(60000);
             LogServer.WriteLog(ex);
         }
     } while (error<5);
 }
Example #7
0
 private void SpiderSystem(SpiderConfig config)
 {
     switch (config.CaseType)
        {
        case 1:
             UpdateSiteCat();
             break;
        case 3:
             UpdateSiteCat(10);
            break;
        }
 }
Example #8
0
        private void CaseSystem(SpiderConfig config)
        {
            if (config.StartTime == DateTime.MinValue)
               config.StartTime = DateTime.Parse(DateTime.Now.ToString("yyyy-MM-dd") + " 8:00:00");
               if (config.StopTime == DateTime.MinValue)
               config.StopTime = DateTime.MaxValue;
               var timeArea = config.StopTime - config.StartTime;
               if (timeArea.TotalSeconds < 0)
               return;
               int totalCount = 1;
               while (true)
               {
               if (DateTime.Now < config.StartTime)
               {
                   TimeSpan temp = config.StartTime - DateTime.Now;
                   LogServer.WriteLog(config.TaskName + "将在" + temp + "/s 后 执行 ", "RunInfo");
                   Thread.Sleep((int)temp.TotalMilliseconds);
               }
               if (DateTime.Now > config.StopTime)
               {
                   DateTime tempStop = config.StopTime;
                   do
                   {
                       config.StartTime = config.TimeSpan < 24*3600 ? config.StartTime.AddDays(1) : config.StartTime.AddSeconds(config.TimeSpan);
                   } while (config.StartTime < DateTime.Now);
                   config.StopTime = config.StartTime.Add(timeArea);
                   if (tempStop.AddSeconds(config.TimeSpan) > DateTime.Now)
                   {
                       config.TaskRemark = "今天更新结束,将在" + config.StartTime + "开始执行 ";
                       new SpiderConfigBll().SaveSpiderConfig(config);
                       TimeSpan temp = config.StartTime - DateTime.Now;
                       Thread.Sleep((int) temp.TotalMilliseconds);
                   }
                   else
                   {
                       config.TaskRemark = "程序已超过轮询间隔时间没有更新 将立即更新";
                       new SpiderConfigBll().SaveSpiderConfig(config);
                       Thread.Sleep(10);
                   }

                   LogServer.WriteLog(config.TaskName + "\t" + config.TaskRemark, "RunInfo");

               }

               Stopwatch t1 = new Stopwatch();
               t1.Start();
               try
               {
                   LogServer.WriteLog(config.TaskName + "\t开始执行运行 " + totalCount + "次", "RunInfo");
                   SpiderSystem(config);
               }
               catch (Exception ex)
               {
                   LogServer.WriteLog(ex);
               }

               t1.Stop();
               config.StartTime = config.StartTime.AddSeconds(config.TimeSpan);
               config.StopTime = config.StartTime.Add(timeArea);
               new SpiderConfigBll().SaveSpiderConfig(config);

               //double lessTime = config.TimeSpan * 1000 - t1.ElapsedMilliseconds;
               double lessTime = (config.StartTime - DateTime.Now).TotalMilliseconds;

               if (lessTime < 0)
               {
                   lessTime = 10;
                   config.TaskRemark = "更新完毕 运行 " + totalCount + "次,耗时" + t1.Elapsed + " 超出间隔时间 请优化程序或者调整间隔时间";
                   LogServer.WriteLog(config.TaskName + "\t执行时间超过间隔时间 运行 " + totalCount + "次", "RunInfo");
               }
               else
               {
                   config.TaskRemark = "更新完毕 运行 " + totalCount + "次,耗时" + t1.Elapsed + "/s 将在" + config.StartTime.AddSeconds(config.TimeSpan) + "开始执行sleep:" + (lessTime/3600000).ToString("0.00")+"小时";
                   LogServer.WriteLog(config.TaskName + "\t" + config.TaskRemark, "RunInfo");
               }

               Thread.Sleep((int) lessTime);
               totalCount++;
               }
        }
Example #9
0
        /// <summary>
        /// 将视频封面照,拿去百度检查
        /// </summary>
        private static void DetectFace2(int maxGetCount = 60 * 2 * 30) // 30分钟的数据)
        {
            var      baiduai      = new FaceDetect();
            int      i            = 0;
            DateTime nextCallTime = DateTime.Now;

            using (var db = DBSet.GetCon(DBSet.SqliteDBName.Bilibili))
            {
                foreach (var up in db.Select <UP>(o => o.follower > 3000).OrderByDescending(o => o.follower).ToArray())
                {
                    foreach (var av in db.Select <AV>(o => o.UpId == up.Id))
                    {
                        if (isExit)
                        {
                            break;
                        }

                        var pic = new Uri(av.pic).AbsolutePath.Replace("/", "_");

                        // 只按照本地文件名做验证
                        var detect = db.Single <ImageDetect>(o => o.LocalFile == pic);

                        if (detect == null)
                        {
                            byte[] bytes = null;

                            if (FromWeb)
                            {
                                try
                                {
                                    bytes = new WebClient().DownloadData(av.pic);
                                }
                                catch (Exception e)
                                {
                                    Console.WriteLine(e);
                                }
                            }
                            else
                            {
                                // 封面照落地,根据目前采集到数据,如果将宅舞区的封面照落地的话,估计要100多G
                                // 再加上三次元区,估计服务器硬盘干不动
                                var imagePath = SpiderConfig.GetPath($"imgs/{av.UpId}/{av.Id}");

                                var imageFile = Path.Combine(imagePath, pic);
                                if (!File.Exists(imageFile))
                                {
                                    continue;
                                }

                                bytes = File.ReadAllBytes(imageFile);
                            }

                            if (bytes == null)
                            {
                                continue;
                            }

                            var wait = (int)(nextCallTime - DateTime.Now).TotalMilliseconds + 1;

                            if (wait > 0)
                            {
                                Console.WriteLine($"wait {wait}");
                                Thread.Sleep(wait);
                            }

                            var start = DateTime.Now;
                            var ret   = baiduai.DetectFromBytes(bytes);
                            Console.Write($"useTime:{ (DateTime.Now - start).TotalMilliseconds} ms ");
                            nextCallTime = DateTime.Now.AddMilliseconds(500);

                            if (ret != null)
                            {
                                var dbItem = new ImageDetect
                                {
                                    AVId      = av.Id,
                                    UpId      = av.UpId,
                                    LocalFile = pic,
                                    Url       = av.pic,
                                    Detect    = ret.result,
                                };

                                if (ret.error_code == 0)
                                {
                                    dbItem.face_num = ret.result.face_num;
                                    if (ret.result.face_num > 0)
                                    {
                                        dbItem.max_face_probability = ret.result.face_list.Max(o => o.face_probability);
                                        dbItem.max_quality          = ret.result.face_list.Max(o => GetQuality(o));
                                    }
                                }

                                db.Insert(dbItem);

                                Console.WriteLine(av.title);
                                if (maxGetCount-- < 0)
                                {
                                    return;
                                }

                                // Thread.Sleep(500); // 百度的免费接口只有 2 qps,所以在这里做一下延迟。
                            }
                        }
                        else
                        {
                            // Console.WriteLine("忽略 " + av.title);
                        }
                    }
                }
            }
        }