public void InitComponent() { if (_init) { return; } Console.CancelKeyPress += ConsoleCancelKeyPress; Scheduler.Init(this); if (Downloader == null) { Downloader = new HttpClientDownloader(); } Downloader.ThreadNum = ThreadNum; if (Pipelines.Count == 0) { Pipelines.Add(new FilePipeline()); } if (StartRequests != null) { if (StartRequests.Count > 0) { Logger.Info($"添加网址到调度中心,数量: {StartRequests.Count}"); if ((Scheduler is QueueDuplicateRemovedScheduler) || (Scheduler is PriorityScheduler)) { Parallel.ForEach(StartRequests, new ParallelOptions() { MaxDegreeOfParallelism = 4 }, request => { Scheduler.Push(request, this); }); } else { QueueDuplicateRemovedScheduler scheduler = new QueueDuplicateRemovedScheduler(); Parallel.ForEach(StartRequests, new ParallelOptions() { MaxDegreeOfParallelism = 4 }, request => { scheduler.PushWithoutRedialManager(request, this); }); Scheduler.Load(scheduler.ToList(this), this); ClearStartRequests(); } } else { Logger.Info("不需要添加网址到调度中心.", true); } } _init = true; }
public WyamConfiguration(Engine engine, Build build) : base(engine) { var configurator = new Configurator(engine); configurator.Recipe = new Wyam.Docs.Docs(); configurator.Theme = "Samson"; configurator.Configure(""); configurator.AssemblyLoader.DirectAssemblies.Add(typeof(HtmlKeys).Assembly); configurator.AssemblyLoader.DirectAssemblies.Add(typeof(WebKeys).Assembly); configurator.AssemblyLoader.DirectAssemblies.Add(typeof(FeedKeys).Assembly); configurator.AssemblyLoader.DirectAssemblies.Add(typeof(CodeAnalysisKeys).Assembly); var assemblyFiles = build.PackageSpecs .SelectMany(x => x.Assemblies) .SelectMany(x => GlobFiles(NukeBuild.TemporaryDirectory / "_packages", x.TrimStart('/', '\\'))) .Distinct() .Select(x => GetRelativePath(NukeBuild.RootDirectory / "input", x)); // Logger.Info(string.Join(", ", assemblyFiles)); Settings[DocsKeys.AssemblyFiles] = assemblyFiles; // Settings[DocsKeys.SolutionFiles] = GlobFiles(NukeBuild.TemporaryDirectory, "**/*.sln") // .Select(x => GetRelativePath(NukeBuild.RootDirectory / "input", x)); Settings[DocsKeys.Title] = "Rocket Surgeons Guild"; Settings[Keys.Host] = "rocketsurgeonsguild.github.io/"; Settings[Keys.LinksUseHttps] = true; // Settings[DocsKeys.SourceFiles] = GetRelativePath(NukeBuild.RootDirectory / "input", NukeBuild.TemporaryDirectory).TrimEnd('/') + "/*/src/**/{!bin,!obj,!packages,!*.Tests,}/**/*.cs"; Settings[DocsKeys.IncludeDateInPostPath] = true; Settings[DocsKeys.BaseEditUrl] = "https://github.com/RocketSurgeonsGuild/rocketsurgeonsguild.github.io/blob/dev/input/"; Pipelines.InsertBefore(Docs.Code, "Package", new ReadFiles(NukeBuild.RootDirectory.ToString() + "/packages/*.yml"), new Yaml() ); Pipelines.InsertAfter("Package", "PackageCategories", new GroupByMany((doc, _) => doc.List <string>("Categories"), new Documents("Package") ), new Meta(Keys.WritePath, (doc, _) => new FilePath("packages/" + doc.String(Keys.GroupKey).ToLower().Replace(" ", "-") + "/index.html")), new Meta(Keys.RelativeFilePath, (ctx, _) => ctx.FilePath(Keys.WritePath)), new OrderBy((ctx, _) => ctx.String(Keys.GroupKey)) ); Pipelines.Add("RenderPackage", new Documents("PackageCategories"), new Razor().WithLayout("/_PackageLayout.cshtml"), new WriteFiles() ); }
public void InitComponent() { if (_init) { return; } Console.CancelKeyPress += ConsoleCancelKeyPress; if (Downloader == null) { Downloader = new HttpClientDownloader(); } if (Pipelines.Count == 0) { Pipelines.Add(new FilePipeline()); } foreach (var pipeline in Pipelines) { pipeline.InitPipeline(this); } if (StartRequests != null && StartRequests.Count > 0) { Logger.Info($"添加链接到调度中心, 数量: {StartRequests.Count}."); if ((Scheduler is QueueDuplicateRemovedScheduler) || (Scheduler is PriorityScheduler)) { Parallel.ForEach(StartRequests, new ParallelOptions() { MaxDegreeOfParallelism = 4 }, request => { Scheduler.Push(request); }); } else { Scheduler.Load(new HashSet <Request>(StartRequests)); ClearStartRequests(); } } else { Logger.Info("添加链接到调度中心, 数量: 0."); } _init = true; }
protected void InitComponent() { Scheduler.Init(this); if (Downloader == null) { Downloader = new HttpClientDownloader(); } Downloader.SetThreadNum(ThreadNum); if (Pipelines.Count == 0) { Pipelines.Add(new FilePipeline()); } if (ThreadPool == null || ThreadPool.IsShutdown) { ThreadPool = new CountableThreadPool(ThreadNum); } if (StartRequests != null) { Parallel.ForEach(StartRequests, new ParallelOptions() { MaxDegreeOfParallelism = 100 }, request => { Scheduler.Push((Request)request.Clone(), this); }); ClearStartRequests(); Logger.InfoFormat("Push Request to Scheduler success."); } if (!_registConsoleCtrlHandler) { Console.Title = Identify; Console.CancelKeyPress += Console_CancelKeyPress; _registConsoleCtrlHandler = true; //根据控制台标题找控制台 int windowHandler = FindWindow(null, Identify); //找关闭按钮 IntPtr closeMenu = GetSystemMenu((IntPtr)windowHandler, IntPtr.Zero); int SC_CLOSE = 0xF060; //关闭按钮禁用 RemoveMenu(closeMenu, SC_CLOSE, 0x0); } }
protected override void InitPipelines(params string[] arguments) { if (Pipelines == null || Pipelines.Count == 0) { var defaultPipeline = GetDefaultPipeline(); if (defaultPipeline != null) { Pipelines.Add(defaultPipeline); } } if (!arguments.Contains("skip")) { var entityProcessors = PageProcessors.Where(p => p is IEntityProcessor).ToList(); var entityPipelines = Pipelines.Where(p => p is BaseEntityPipeline).ToList(); if (entityProcessors.Count != 0 && entityPipelines.Count == 0) { throw new SpiderException("You may miss a entity pipeline."); } foreach (var processor in entityProcessors) { foreach (var pipeline in entityPipelines) { var entityProcessor = processor as IEntityProcessor; if (pipeline is BaseEntityPipeline newPipeline) { if (entityProcessor != null) { newPipeline.AddEntity(entityProcessor.EntityDefine); } } } } } if (PageProcessors == null || PageProcessors.Count == 0) { throw new SpiderException("Count of PageProcessor is zero."); } foreach (var pipeline in Pipelines) { pipeline.InitPipeline(this); } }
/// <summary> /// Download urls synchronizing. /// </summary> /// <typeparam name="T"></typeparam> /// <param name="urls"></param> /// <returns></returns> public IList <T> GetAll <T>(params string[] urls) { DestroyWhenExit = false; SpawnUrl = false; foreach (Request request in UrlUtils.ConvertToRequests(urls)) { AddRequest(request); } ICollectorPipeline collectorPipeline = GetCollectorPipeline(); Pipelines.Add(collectorPipeline); Run(); SpawnUrl = true; DestroyWhenExit = true; ICollection collection = collectorPipeline.GetCollected(); return((from object o in collection select(T) o).ToList()); }
public SpiderContext AddPipeline(Configuration.Pipeline pipeline) { Pipelines.Add(pipeline); return this; }
/// <summary> /// Add a pipeline for Spider /// </summary> /// <param name="pipeline"></param> /// <returns></returns> public virtual Spider AddPipeline(IPipeline pipeline) { CheckIfRunning(); Pipelines.Add(pipeline); return(this); }
public void Add(Pipeline pipeline) { Pipelines.Add(pipeline); }
public void InitComponent() { if (_init) { #if NET_CORE Logger.Info($"Component already init.", true); #else Logger.Info("Component already init."); #endif return; } Console.CancelKeyPress += ConsoleCancelKeyPress; Scheduler.Init(this); if (Downloader == null) { //Downloader = new HttpClientDownloader(); } Downloader.ThreadNum = ThreadNum; if (Pipelines.Count == 0) { Pipelines.Add(new FilePipeline()); } if (ThreadPool == null) { ThreadPool = new CountableThreadPool(ThreadNum); } if (StartRequests != null) { if (StartRequests.Count > 0) { Parallel.ForEach(StartRequests, new ParallelOptions() { MaxDegreeOfParallelism = 100 }, request => { Scheduler.Push((Request)request.Clone(), this); }); ClearStartRequests(); #if NET_CORE Logger.Info("Push Request to Scheduler success.", true); #else Logger.Info("Push Request to Scheduler success."); #endif } else { #if NET_CORE Logger.Info("Push Zero Request to Scheduler.", true); #else Logger.Info("Push Request to Scheduler success."); #endif } } Task.Factory.StartNew(() => { if (ShowConsoleStatus) { IMonitorableScheduler monitor = Scheduler as IMonitorableScheduler; if (monitor != null) { while (true) { try { if (Stat == Status.Running && !_waitingToExit) { Console.WriteLine( $"Left: {monitor.GetLeftRequestsCount(this)} Total: {monitor.GetTotalRequestsCount(this)} AliveThread: {ThreadPool.ThreadAlive} ThreadNum: {ThreadPool.ThreadNum}"); } } catch { // ignored } Thread.Sleep(2000); } } } }); _init = true; }
public override void Run(params string[] arguments) { InitEnvorimentAndVerify(); try { #if !NET_CORE if (CookieInterceptor != null) { this.Log("尝试获取 Cookie...", LogLevel.Info); var cookie = CookieInterceptor.GetCookie(); if (cookie == null) { this.Log("获取 Cookie 失败, 爬虫无法继续.", LogLevel.Warn); return; } else { Site.CookiesStringPart = cookie.CookiesStringPart; Site.Cookies = cookie.CookiesDictionary; } } #endif this.Log("创建爬虫...", LogLevel.Info); EntityProcessor processor = new EntityProcessor(this); foreach (var entity in Entities) { processor.AddEntity(entity); } PageProcessor = processor; foreach (var entity in Entities) { string entiyName = entity.Entity.Name; var pipelines = new List <BaseEntityPipeline>(); foreach (var pipeline in EntityPipelines) { var newPipeline = pipeline.Clone(); newPipeline.InitiEntity(entity); if (newPipeline.IsEnabled) { pipelines.Add(newPipeline); } } if (pipelines.Count > 0) { Pipelines.Add(new EntityPipeline(entiyName, pipelines)); } } CheckIfSettingsCorrect(); bool needInitStartRequest = true; string key = "locker-" + Identity; if (Db != null) { while (!Db.LockTake(key, "0", TimeSpan.FromMinutes(10))) { Thread.Sleep(1000); } var lockerValue = Db.HashGet(InitStatusSetName, Identity); needInitStartRequest = lockerValue != "init finished"; } if (arguments.Contains("rerun")) { Scheduler.Init(this); Scheduler.Clear(); //DELETE verify record. Db?.HashDelete(ValidateStatusName, Identity); needInitStartRequest = true; } this.Log("构建内部模块、准备爬虫数据...", LogLevel.Info); InitComponent(); if (needInitStartRequest) { if (PrepareStartUrls != null) { for (int i = 0; i < PrepareStartUrls.Length; ++i) { var prepareStartUrl = PrepareStartUrls[i]; this.Log($"[步骤 {i + 2}] 添加链接到调度中心.", LogLevel.Info); prepareStartUrl.Build(this, null); } } } MonitorCenter.Register(this); Db?.LockRelease(key, 0); RegisterControl(this); if (!arguments.Contains("running-test")) { base.Run(); } else { IsExited = true; } TaskFinished(); HandleVerifyCollectData(); } finally { Dispose(); MonitorCenter.Dispose(); } }