Example #1
0
        /**************************************************************************/

        public MacroscopeJobWorker(MacroscopeJobMaster JobMaster)
        {
            this.SuppressDebugMsg = true;

            this.JobMaster          = JobMaster;
            this.JobHistory         = this.JobMaster.GetJobHistory();
            this.DocCollection      = this.JobMaster.GetDocCollection();
            this.AllowedHosts       = this.JobMaster.GetAllowedHosts();
            this.IncludeExcludeUrls = this.JobMaster.GetIncludeExcludeUrls();

            if (MacroscopePreferencesManager.GetCrawlDelay() > 0)
            {
                this.CrawlDelay = MacroscopePreferencesManager.GetCrawlDelay();
            }

            if (MacroscopePreferencesManager.GetFollowRobotsProtocol())
            {
                if (this.JobMaster.GetCrawlDelay() > 0)
                {
                    this.CrawlDelay = this.JobMaster.GetCrawlDelay();
                }
            }
        }
        /**************************************************************************/

        private void InitializeJobMaster(MacroscopeConstants.RunTimeMode JobRunTimeMode)
        {
            GC.Collect();

            /*
             * {
             * this.JobMasterLog = new EventLog ();
             * this.JobMasterLog.Source = MacroscopeConstants.MainEventLogSourceName;
             * this.JobGuid = Guid.NewGuid();
             * this.LogEntry( string.Format( "Starting Job" ) );
             * }
             */

            this.RunTimeMode = JobRunTimeMode;

            if (this.TaskController != null)
            {
                this.CredentialsHttp = this.TaskController.IGetCredentialsHttp();
            }

            this.DocCollection = new MacroscopeDocumentCollection(JobMaster: this);
            this.AllowedHosts  = new MacroscopeAllowedHosts();

            /** BEGIN: Named Queues *************************************************/

            this.NamedQueueJobItems = new MacroscopeNamedQueue <MacroscopeJobItem> ();

            this.NamedQueueJobItems.CreateNamedQueue(
                Name: MacroscopeConstants.NamedQueueUrlList,
                QueueMode: MacroscopeNamedQueue <MacroscopeJobItem> .MODE.USE_HISTORY
                );

            this.NamedQueue = new MacroscopeNamedQueue <string> ();

            {
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayQueue);

                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayStructure);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayStructureLinkCounts);

                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayHierarchy);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayCanonicalAnalysis);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayHrefLang);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayErrors);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayHostnames);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayRedirectsAudit);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayLinks);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayHyperlinks);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayUriAnalysis);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayPageTitles);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayPageDescriptions);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayPageKeywords);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayPageHeadings);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayPageText);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayStylesheets);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayImages);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayJavascripts);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayAudios);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayVideos);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplaySitemaps);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayEmailAddresses);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayTelephoneNumbers);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayCustomFilters);

                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayDataExtractorsCssSelectors);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayDataExtractorsRegexes);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayDataExtractorsXpaths);

                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayRemarks);
            }

            /** END: Named Queues ***************************************************/

            this.CrawlDelay = 0;

            this.AdjustThreadsMax();
            this.ThreadsRunning = 0;
            this.ThreadsStop    = false;
            this.ThreadsDict    = new Dictionary <int, Boolean> ();

            this.SemaphoreWorkers = new Semaphore(0, this.ThreadsMax);
            this.SemaphoreWorkers.Release(this.ThreadsMax);

            this.Depth          = MacroscopePreferencesManager.GetDepth();
            this.PageLimit      = MacroscopePreferencesManager.GetPageLimit();
            this.PageLimitCount = 0;

            this.PagesFound = 0;

            {
                this.ParentStartingDirectory = "";
                this.ChildStartingDirectory  = "";
            }

            this.JobHistory = new MacroscopeJobHistory();

            this.InitProgress();

            this.Locales = new Dictionary <string, string> (32);

            this.Robots          = new MacroscopeRobots();
            this.BlockedByRobots = new Dictionary <string, Boolean> ();
        }