Exemple #1
0
        public async Task TestLimitsCancel()
        {
            var observer          = CreateObserver();
            var app               = CreateAppDailyLimit(100);
            var appInfo           = AppInfo.Create(app.Name);
            var secondAppInfo     = AppInfo.Create("some app");
            var logInfoObservable = Observable.ToObservable(new List <AppChangedArgs>()
            {
                new AppChangedArgs(LogInfo.Create(appInfo, "")), new AppChangedArgs(LogInfo.Create(secondAppInfo, ""))
            });

            logInfoObservable = Observable.ToObservable(await logInfoObservable.Buffer(TimeSpan.FromMilliseconds(100)));

            windowChangedNotifier.Setup(n => n.AppChangedObservable).Returns(logInfoObservable);
            repository.Setup(d => d.GetFiltered <Aplication>(It.IsAny <Expression <Func <Aplication, bool> > >(),
                                                             It.IsAny <Expression <Func <Aplication, object> > >()))
            .Returns(new List <Aplication> {
                app
            });
            repository.Setup(d => d.GetFilteredAsync <Aplication>(It.IsAny <Expression <Func <Aplication, bool> > >())).ReturnsAsync(new List <Aplication>()
            {
                app
            });
            appDurationCalc.Setup(t => t.GetDuration(app.Name, LimitSpan.Day)).ReturnsAsync(0);


            observer.Initialize(new Setting()
            {
                TrackingEnabled = true
            });

            await Task.Delay(50);

            limitHandler.Verify(h => h.Handle(It.IsAny <AppLimit>()), Times.Never);
        }
Exemple #2
0
        public async Task TestDelayedLimitHandle()
        {
            var observer          = CreateObserver();
            var app               = CreateAppDailyLimit(100);
            var appInfo           = AppInfo.Create(app.Name);
            var logInfoObservable = Observable.Return(new AppChangedArgs(LogInfo.Create(appInfo, "")));

            windowChangedNotifier.Setup(n => n.AppChangedObservable).Returns(logInfoObservable);
            repository.Setup(d => d.GetFiltered <Aplication>(It.IsAny <Expression <Func <Aplication, bool> > >(),
                                                             It.IsAny <Expression <Func <Aplication, object> > >()))
            .Returns(new List <Aplication> {
                app
            });
            repository.Setup(d => d.GetFilteredAsync <Aplication>(It.IsAny <Expression <Func <Aplication, bool> > >())).ReturnsAsync(new List <Aplication>()
            {
                app
            });
            appDurationCalc.Setup(t => t.GetDuration(app.Name, LimitSpan.Day)).ReturnsAsync(0);

            observer.Initialize(new Setting()
            {
                TrackingEnabled = true
            });

            limitHandler.Verify(h => h.Handle(app.Limits.First()), Times.Never);

            await Task.Delay(200);

            limitHandler.Verify(h => h.Handle(app.Limits.First()), Times.Once);
        }
Exemple #3
0
        public override void Process(ResultItems resultItems)
        {
            try
            {
                string   filePath = $"{BasePath}{PathSeperator}{Spider.Identity}{PathSeperator}{Encrypt.Md5Encrypt(resultItems.Request.Url.ToString())}.fd";
                FileInfo file     = PrepareFile(filePath);
                using (StreamWriter printWriter = new StreamWriter(file.OpenWrite(), Encoding.UTF8))
                {
                    printWriter.WriteLine("url:\t" + resultItems.Request.Url);

                    foreach (var entry in resultItems.Results)
                    {
                        var value = entry.Value as IList;
                        if (value != null)
                        {
                            IList list = value;
                            printWriter.WriteLine(entry.Key + ":");
                            foreach (var o in list)
                            {
                                printWriter.WriteLine(o);
                            }
                        }
                        else
                        {
                            printWriter.WriteLine(entry.Key + ":\t" + entry.Value);
                        }
                    }
                }
            }
            catch (Exception e)
            {
                Logger.SaveLog(LogInfo.Create("Write file error.", Logger.Name, Spider, LogLevel.Warn, e));
                throw;
            }
        }
Exemple #4
0
 public override string Formate(string value)
 {
     try
     {
         var name = Path.GetFileName(value);
         if (name != null)
         {
             var    fileData = Client.GetByteArrayAsync(value).Result;
             string file     = Path.Combine(SpiderEnviroment.GlobalDirectory, "images", name);
             if (File.Exists(file))
             {
                 return(value);
             }
             var stream = BasePipeline.PrepareFile(file).OpenWrite();
             foreach (var b in fileData)
             {
                 stream.WriteByte(b);
             }
             stream.Flush();
             stream.Dispose();
         }
         return(value);
     }
     catch (Exception e)
     {
         Logger.SaveLog(LogInfo.Create($"Download file: {value} failed.", Logger.Name, null, LogLevel.Error, e));
         throw;
     }
 }
Exemple #5
0
        protected override dynamic FormateValue(dynamic value)
        {
            var name = Path.GetFileName(value);

            if (name != null)
            {
                Task <byte[]> task = Client.GetByteArrayAsync(value);
                task.ContinueWith(t =>
                {
                    if (t.Exception != null)
                    {
                        Logger.SaveLog(LogInfo.Create($"下载文件: {value} 失败.", Logger.Name, null, LogLevel.Warn, t.Exception));
                        return;
                    }
                    var fileData = t.Result;
                    string file  = Path.Combine(SpiderEnviroment.GlobalDirectory, "images", name);
                    if (!File.Exists(file))
                    {
                        var stream = BasePipeline.PrepareFile(file).OpenWrite();
                        foreach (var b in fileData)
                        {
                            stream.WriteByte(b);
                        }
                        stream.Flush();
                        stream.Dispose();
                    }
                });
            }
            return(name);
        }
Exemple #6
0
        public override void InitPipeline(ISpider spider)
        {
            if (!IsEnabled)
            {
                return;
            }

            if (string.IsNullOrEmpty(ConnectString))
            {
                if (UpdateConnectString == null)
                {
                    throw new SpiderException("Can't find ConnectString or IUpdateConnectString.");
                }
                else
                {
                    for (int i = 0; i < 5; ++i)
                    {
                        try
                        {
                            ConnectString = UpdateConnectString.GetNew();
                            break;
                        }
                        catch (Exception e)
                        {
                            Logger.SaveLog(LogInfo.Create("Update ConnectString failed.", Logger.Name, spider, LogLevel.Error, e));
                            Thread.Sleep(1000);
                        }
                    }

                    if (string.IsNullOrEmpty(ConnectString))
                    {
                        throw new SpiderException("Can't updadate ConnectString via IUpdateConnectString.");
                    }
                }
            }

            base.InitPipeline(spider);

            if (Mode == PipelineMode.Update)
            {
                return;
            }

            NetworkCenter.Current.Execute("db-init", () =>
            {
                using (DbConnection conn = CreateConnection())
                {
                    var command         = conn.CreateCommand();
                    command.CommandText = GetCreateSchemaSql();
                    command.CommandType = CommandType.Text;
                    command.ExecuteNonQuery();

                    command.CommandText = GetCreateTableSql();
                    command.CommandType = CommandType.Text;
                    command.ExecuteNonQuery();
                    conn.Close();
                }
            });
        }
Exemple #7
0
        private void NotifyAppChanged()
        {
            var appInfo = AppInfo.Create(activeWindowHandle);
            var logInfo = LogInfo.Create(appInfo, activeWindowTitle);
            var args    = new AppChangedArgs(logInfo);

            subject.OnNext(args);
        }
Exemple #8
0
 public override void InitiEntity(EntityMetadata metadata)
 {
     if (metadata.Schema == null)
     {
         Logger.SaveLog(LogInfo.Create("Miss pipeline because: Schema is necessary", Logger.Name, Spider, LogLevel.Warn));
         IsEnabled = false;
     }
 }
Exemple #9
0
        public override void InitiEntity(EntityMetadata metadata)
        {
            if (metadata.Schema == null)
            {
                Logger.SaveLog(LogInfo.Create("Miss pipeline because: Schema is necessary", Logger.Name, Spider, LogLevel.Warn));
                IsEnabled = false;
                return;
            }

            Schema = BaseEntityDbPipeline.GenerateSchema(metadata.Schema);
            MongoClient client = new MongoClient(ConnectString);
            var         db     = client.GetDatabase(metadata.Schema.Database);

            _collection = db.GetCollection <BsonDocument>(metadata.Schema.TableName);
        }
Exemple #10
0
        private void HandleVerifyCollectData()
        {
            if (VerifyCollectedData == null)
            {
                return;
            }
            string key = "locker-validate-" + Identity;

            try
            {
                bool needInitStartRequest = true;
                if (Redis != null)
                {
                    while (!Db.LockTake(key, "0", TimeSpan.FromMinutes(10)))
                    {
                        Thread.Sleep(1000);
                    }

                    var lockerValue = Db.HashGet(ValidateStatusName, Identity);
                    needInitStartRequest = lockerValue != "verify finished";
                }
                if (needInitStartRequest)
                {
                    Logger.SaveLog(LogInfo.Create("开始执行数据验证...", Logger.Name, this, LogLevel.Info));

                    VerifyCollectedData();
                }

                Logger.SaveLog(LogInfo.Create("数据验证已完成.", Logger.Name, this, LogLevel.Info));

                if (needInitStartRequest && Redis != null)
                {
                    Db.HashSet(ValidateStatusName, Identity, "verify finished");
                }
            }
            catch (Exception e)
            {
                Logger.Error(e, e.Message);
                throw;
            }
            finally
            {
                if (Redis != null)
                {
                    Db.LockRelease(key, 0);
                }
            }
        }
 public override void Process(ResultItems resultItems)
 {
     try
     {
         string   path = $"{BasePath}{PathSeperator}{Spider.Identity}{PathSeperator}{Encrypt.Md5Encrypt(resultItems.Request.Url.ToString())}.json";
         FileInfo file = PrepareFile(path);
         using (StreamWriter printWriter = new StreamWriter(file.OpenWrite(), Encoding.UTF8))
         {
             printWriter.WriteLine(JsonConvert.SerializeObject(resultItems.Results));
         }
     }
     catch (IOException e)
     {
         Logger.SaveLog(LogInfo.Create("Write data to json file failed.", Logger.Name, Spider, LogLevel.Warn, e));
         throw;
     }
 }
Exemple #12
0
        protected override Page DowloadContent(Request request, ISpider spider)
        {
            Site site = spider.Site;
            HttpResponseMessage response = null;
            var proxy = site.GetHttpProxy();

            request.PutExtra(Request.Proxy, proxy);
            try
            {
                var httpMessage = GenerateHttpRequestMessage(request, site);

                response = NetworkCenter.Current.Execute("http", m =>
                {
                    HttpClient httpClient = new HttpClient(new GlobalRedirectHandler(new HttpClientHandler()
                    {
                        AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip,
                        UseCookies             = false,
                        UseProxy = true,
                        Proxy    = proxy
                    }));
                    var message     = (HttpRequestMessage)m;
                    var requestTask = httpClient.SendAsync(message);
                    requestTask.Wait(site.Timeout / 1000);
                    if (requestTask.Status == TaskStatus.RanToCompletion)
                    {
                        return(requestTask.Result);
                    }
                    else
                    {
                        return(new HttpResponseMessage(HttpStatusCode.RequestTimeout));
                    }
                }, httpMessage);

                response.EnsureSuccessStatusCode();
                if (!site.AcceptStatCode.Contains(response.StatusCode))
                {
                    throw new DownloadException($"下载 {request.Url} 失败. Code: {response.StatusCode}");
                }
                var httpStatusCode = response.StatusCode;
                request.PutExtra(Request.StatusCode, httpStatusCode);

                Page page = HandleResponse(request, response, httpStatusCode, site);

                // need update
                page.TargetUrl = request.Url.ToString();

                //page.SetRawText(File.ReadAllText(@"C:\Users\Lewis\Desktop\taobao.html"));

                // 这里只要是遇上登录的, 则在拨号成功之后, 全部抛异常在Spider中加入Scheduler调度
                // 因此如果使用多线程遇上多个Warning Custom Validate Failed不需要紧张, 可以考虑用自定义Exception分开

                // 结束后要置空, 这个值存到Redis会导致无限循环跑单个任务
                request.PutExtra(Request.CycleTriedTimes, null);

                //#if !NET_CORE
                //					httpWebRequest.ServicePoint.ConnectionLimit = int.MaxValue;
                //#endif

                return(page);

                //正常结果在上面已经Return了, 到此处必然是下载失败的值.
                //throw new SpiderExceptoin("Download failed.");
            }
            catch (DownloadException)
            {
                throw;
            }
            catch (Exception e)
            {
                Page page = new Page(request, site.ContentType)
                {
                    Exception = e
                };
                return(page);
            }
            finally
            {
                // 先Close Response, 避免前面语句异常导致没有关闭.
                try
                {
                    //ensure the connection is released back to pool
                    //check:
                    //EntityUtils.consume(httpResponse.getEntity());
                    response?.Dispose();
                }
                catch (Exception e)
                {
                    Logger.SaveLog(LogInfo.Create("Close response fail.", Logger.Name, spider, LogLevel.Warn, e));
                }
            }
        }
Exemple #13
0
        public override void InitiEntity(EntityMetadata metadata)
        {
            if (metadata.Schema == null)
            {
                Logger.SaveLog(LogInfo.Create("Miss pipeline because: Schema is necessary", Logger.Name, Spider, LogLevel.Warn));
                IsEnabled = false;
                return;
            }
            Schema = GenerateSchema(metadata.Schema);
            foreach (var f in metadata.Entity.Fields)
            {
                if (!string.IsNullOrEmpty(((Field)f).DataType))
                {
                    Columns.Add((Field)f);
                }
            }
            var primary = metadata.Primary;

            if (primary != null)
            {
                foreach (var p in primary)
                {
                    var col = Columns.FirstOrDefault(c => c.Name == p);
                    if (col == null)
                    {
                        throw new SpiderException("Columns set as primary is not a property of your entity.");
                    }
                    else
                    {
                        Primary.Add(col);
                    }
                }
            }

            if (Mode == PipelineMode.Update)
            {
                if (Primary == null || Primary.Count == 0)
                {
                    throw new SpiderException("Set Primary in the Indexex attribute.");
                }

                if (metadata.Updates != null && metadata.Updates.Count > 0)
                {
                    foreach (var column in metadata.Updates)
                    {
                        var col = Columns.FirstOrDefault(c => c.Name == column);
                        if (col == null)
                        {
                            throw new SpiderException("Columns set as update is not a property of your entity.");
                        }
                        else
                        {
                            UpdateColumns.Add(col);
                        }
                    }

                    UpdateColumns.RemoveAll(c => Primary.Contains(c));

                    if (UpdateColumns.Count == 0)
                    {
                        throw new SpiderException("Can't update primary key.");
                    }
                }
                else
                {
                    UpdateColumns = Columns;
                    UpdateColumns.RemoveAll(c => Primary.Contains(c));

                    if (UpdateColumns.Count == 0)
                    {
                        throw new SpiderException("Can't update primary key.");
                    }
                }
            }

            AutoIncrement = metadata.AutoIncrement;

            if (metadata.Indexes != null)
            {
                foreach (var index in metadata.Indexes)
                {
                    List <string> tmpIndex = new List <string>();
                    foreach (var i in index)
                    {
                        var col = Columns.FirstOrDefault(c => c.Name == i);
                        if (col == null)
                        {
                            throw new SpiderException("Columns set as index is not a property of your entity.");
                        }
                        else
                        {
                            tmpIndex.Add(col.Name);
                        }
                    }
                    if (tmpIndex.Count != 0)
                    {
                        Indexs.Add(tmpIndex);
                    }
                }
            }
            if (metadata.Uniques != null)
            {
                foreach (var unique in metadata.Uniques)
                {
                    List <string> tmpUnique = new List <string>();
                    foreach (var i in unique)
                    {
                        var col = Columns.FirstOrDefault(c => c.Name == i);
                        if (col == null)
                        {
                            throw new SpiderException("Columns set as unique is not a property of your entity.");
                        }
                        else
                        {
                            tmpUnique.Add(col.Name);
                        }
                    }
                    if (tmpUnique.Count != 0)
                    {
                        Uniques.Add(tmpUnique);
                    }
                }
            }
        }
Exemple #14
0
        /// <summary>
        /// Executes this task.
        /// </summary>
        /// <param name="parameter"></param>
        public void Execute(object parameter)
        {
            if (CanExecute(new object()))
            {
                if (this._taskState.Synchron == (short)(eTaskState.Done))
                {
                    this._restoreRequest.Synchron = true;
                    System.Threading.Thread.Sleep(50);
                }

                this._invokeRequest.Synchron = true;

                TcoAppDomain.Current.Logger.Information($"Task '{LogInfo.NameOrSymbol(this)}' invoked. {{@sender}}", LogInfo.Create(this));

                RecordTaskAction?.Invoke(this.CodeProvider);
            }
        }
Exemple #15
0
 private void InitCommands()
 {
     this._enabled.Subscribe(ValidateCanExecute);
     this._isServiceable.Subscribe(ValidateCanExecute);
     CanExecuteChanged += TcoTask_CanExecuteChanged;
     Abort              = new RelayCommand(AbortTask, x => CanAbortTask(), () => TcoAppDomain.Current.Logger.Information($"Task '{LogInfo.NameOrSymbol(this)}' aborted. {{@sender}}", LogInfo.Create(this)));
     Restore            = new RelayCommand(RestoreTask, x => CanRestoreTask(), () => TcoAppDomain.Current.Logger.Information($"Task '{LogInfo.NameOrSymbol(this)}' restored. {{@sender}}", LogInfo.Create(this)));
     this._isServiceable.Subscribe(ValidateCanExecuteAbortRestore);
 }
Exemple #16
0
        public override void Run(params string[] arguments)
        {
            InitEnvorimentAndVerify();

            try
            {
#if !NET_CORE
                if (CookieInterceptor != null)
                {
                    Logger.SaveLog(LogInfo.Create("尝试获取 Cookie...", Logger.Name, this, LogLevel.Info));
                    var cookie = CookieInterceptor.GetCookie();
                    if (cookie == null)
                    {
                        Logger.SaveLog(LogInfo.Create("获取 Cookie 失败, 爬虫无法继续.", Logger.Name, this, LogLevel.Error));
                        return;
                    }
                    else
                    {
                        Site.CookiesStringPart = cookie.CookiesStringPart;
                        Site.Cookies           = cookie.CookiesDictionary;
                    }
                }
#endif

                Logger.SaveLog(LogInfo.Create("创建爬虫...", Logger.Name, this, LogLevel.Info));

                EntityProcessor processor = new EntityProcessor(this);

                foreach (var entity in Entities)
                {
                    processor.AddEntity(entity);
                }
                PageProcessor = processor;
                foreach (var entity in Entities)
                {
                    string entiyName = entity.Entity.Name;
                    var    pipelines = new List <BaseEntityPipeline>();
                    foreach (var pipeline in EntityPipelines)
                    {
                        var newPipeline = pipeline.Clone();
                        newPipeline.InitiEntity(entity);
                        if (newPipeline.IsEnabled)
                        {
                            pipelines.Add(newPipeline);
                        }
                    }
                    if (pipelines.Count > 0)
                    {
                        Pipelines.Add(new EntityPipeline(entiyName, pipelines));
                    }
                }

                CheckIfSettingsCorrect();

                bool   needInitStartRequest = true;
                string key = "locker-" + Identity;
                if (Db != null)
                {
                    while (!Db.LockTake(key, "0", TimeSpan.FromMinutes(10)))
                    {
                        Thread.Sleep(1000);
                    }
                    var lockerValue = Db.HashGet(InitStatusSetName, Identity);
                    needInitStartRequest = lockerValue != "init finished";
                }

                if (arguments.Contains("rerun"))
                {
                    Scheduler.Init(this);
                    Scheduler.Clear();
                    //DELETE verify record.
                    Db?.HashDelete(ValidateStatusName, Identity);
                    needInitStartRequest = true;
                }

                Logger.SaveLog(LogInfo.Create("构建内部模块、准备爬虫数据...", Logger.Name, this, LogLevel.Info));
                InitComponent();

                if (needInitStartRequest)
                {
                    if (PrepareStartUrls != null)
                    {
                        for (int i = 0; i < PrepareStartUrls.Length; ++i)
                        {
                            var prepareStartUrl = PrepareStartUrls[i];
                            Logger.SaveLog(LogInfo.Create($"[步骤 {i + 2}] 添加链接到调度中心.", Logger.Name, this, LogLevel.Info));
                            prepareStartUrl.Build(this, null);
                        }
                    }
                }

                SpiderMonitor.Register(this);

                Db?.LockRelease(key, 0);

                RegisterControl(this);

                if (!arguments.Contains("running-test"))
                {
                    base.Run();
                }
                else
                {
                    IsExited = true;
                }

                TaskFinished();

                HandleVerifyCollectData();
            }
            finally
            {
                Dispose();
                SpiderMonitor.Dispose();
            }
        }
Exemple #17
0
        private void InitEnvorimentAndVerify()
        {
            if (Entities == null || Entities.Count == 0)
            {
                Logger.SaveLog(LogInfo.Create("Count of entity is 0.", Logger.Name, this, LogLevel.Error));
                throw new SpiderException("Count of entity is 0.");
            }

            if (EntityPipelines == null || EntityPipelines.Count == 0)
            {
                Logger.SaveLog(LogInfo.Create("Need at least one entity pipeline.", Logger.Name, this, LogLevel.Error));
                throw new SpiderException("Need at least one entity pipeline.");
            }

            if (RedialExecutor != null)
            {
                RedialExecutor.Init();
                NetworkCenter.Current.Executor = RedialExecutor;
            }

            if (string.IsNullOrEmpty(RedisHost))
            {
                RedisHost     = Configuration.GetValue("redisHost");
                RedisPassword = Configuration.GetValue("redisPassword");
                int port;
                RedisPort = int.TryParse(Configuration.GetValue("redisPort"), out port) ? port : 6379;
            }

            if (!string.IsNullOrEmpty(RedisHost))
            {
                var confiruation = new ConfigurationOptions()
                {
                    ServiceName     = "DotnetSpider",
                    Password        = RedisPassword,
                    ConnectTimeout  = 65530,
                    KeepAlive       = 8,
                    ConnectRetry    = 3,
                    ResponseTimeout = 3000
                };
#if NET_CORE
                if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
                {
                    // Lewis: This is a Workaround for .NET CORE can't use EndPoint to create Socket.
                    var address = Dns.GetHostAddressesAsync(RedisHost).Result.FirstOrDefault();
                    if (address == null)
                    {
                        Logger.SaveLog(LogInfo.Create($"Can't resovle host: {RedisHost}", Logger.Name, this, LogLevel.Error));
                        throw new SpiderException($"Can't resovle host: {RedisHost}");
                    }
                    confiruation.EndPoints.Add(new IPEndPoint(address, RedisPort));
                }
                else
                {
                    confiruation.EndPoints.Add(new DnsEndPoint(RedisHost, RedisPort));
                }
#else
                confiruation.EndPoints.Add(new DnsEndPoint(RedisHost, RedisPort));
#endif
                Redis = ConnectionMultiplexer.Connect(confiruation);
                Db    = Redis.GetDatabase(1);
            }
        }