/** Execute Job ***********************************************************/ public Boolean Execute() { DebugMsg(string.Format("Start URL: {0}", this.StartUrl)); //this.LogEntry( string.Format( "Executing with Start URL: {0}", this.StartUrl ) ); this.StartUrl = MacroscopeUrlUtils.SanitizeUrl(Url: this.StartUrl); this.DocCollection.SetStartUrl(Url: this.StartUrl); this.DetermineStartingDirectory(); this.SetThreadsStop(Stopped: false); this.AllowedHosts.AddFromUrl(Url: this.StartUrl); if (!this.PeekUrlQueue()) { string RobotsUrl = MacroscopeRobots.GenerateRobotUrl(Url: this.StartUrl); if (!string.IsNullOrEmpty(RobotsUrl)) { this.AddUrlQueueItem(Url: RobotsUrl); } this.IncludeExcludeUrls.AddExplicitIncludeUrl(Url: this.StartUrl); this.AddUrlQueueItem(Url: this.StartUrl); } this.ProbeRobotsFile(Url: this.StartUrl); this.SetCrawlDelay(Url: this.StartUrl); this.SpawnWorkers(); DebugMsg(string.Format("Pages Found: {0}", this.GetPagesFound())); if (this.TaskController != null) { this.TaskController.ICallbackScanComplete(); } this.AddUpdateDisplayQueue(Url: this.StartUrl); return(true); }
/**************************************************************************/ private void InitializeJobMaster(MacroscopeConstants.RunTimeMode JobRunTimeMode) { GC.Collect(); /* * { * this.JobMasterLog = new EventLog (); * this.JobMasterLog.Source = MacroscopeConstants.MainEventLogSourceName; * this.JobGuid = Guid.NewGuid(); * this.LogEntry( string.Format( "Starting Job" ) ); * } */ this.RunTimeMode = JobRunTimeMode; if (this.TaskController != null) { this.CredentialsHttp = this.TaskController.IGetCredentialsHttp(); } this.DocCollection = new MacroscopeDocumentCollection(JobMaster: this); this.AllowedHosts = new MacroscopeAllowedHosts(); /** BEGIN: Named Queues *************************************************/ this.NamedQueueJobItems = new MacroscopeNamedQueue <MacroscopeJobItem> (); this.NamedQueueJobItems.CreateNamedQueue( Name: MacroscopeConstants.NamedQueueUrlList, QueueMode: MacroscopeNamedQueue <MacroscopeJobItem> .MODE.USE_HISTORY ); this.NamedQueue = new MacroscopeNamedQueue <string> (); { this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayQueue); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayStructure); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayStructureLinkCounts); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayHierarchy); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayCanonicalAnalysis); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayHrefLang); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayErrors); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayHostnames); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayRedirectsAudit); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayLinks); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayHyperlinks); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayUriAnalysis); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayPageTitles); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayPageDescriptions); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayPageKeywords); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayPageHeadings); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayPageText); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayStylesheets); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayImages); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayJavascripts); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayAudios); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayVideos); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplaySitemaps); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayEmailAddresses); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayTelephoneNumbers); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayCustomFilters); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayDataExtractorsCssSelectors); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayDataExtractorsRegexes); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayDataExtractorsXpaths); this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayRemarks); } /** END: Named Queues ***************************************************/ this.CrawlDelay = 0; this.AdjustThreadsMax(); this.ThreadsRunning = 0; this.ThreadsStop = false; this.ThreadsDict = new Dictionary <int, Boolean> (); this.SemaphoreWorkers = new Semaphore(0, this.ThreadsMax); this.SemaphoreWorkers.Release(this.ThreadsMax); this.Depth = MacroscopePreferencesManager.GetDepth(); this.PageLimit = MacroscopePreferencesManager.GetPageLimit(); this.PageLimitCount = 0; this.PagesFound = 0; { this.ParentStartingDirectory = ""; this.ChildStartingDirectory = ""; } this.JobHistory = new MacroscopeJobHistory(); this.InitProgress(); this.Locales = new Dictionary <string, string> (32); this.Robots = new MacroscopeRobots(); this.BlockedByRobots = new Dictionary <string, Boolean> (); }