Beispiel #1
0
        public static String GetCrawlFolderName(ISpiderEvaluatorBase spider, crawlerDomainTaskMachineSettings crawlerJobEngineSettings, String templateString)
        {
            stringTemplate template = new stringTemplate(templateString);

            PropertyCollection data = GetData(crawlerJobEngineSettings, spider);

            return(template.applyToContent(data));
        }
Beispiel #2
0
        public static PropertyCollection GetData(crawlerDomainTaskMachineSettings crawlerJobEngineSettings, ISpiderEvaluatorBase crawler)
        {
            PropertyCollection data = new PropertyCollection();

            data[nameComposerFields.crawlerClassName]        = crawler.GetType().Name;
            data[nameComposerFields.crawlerTitleName]        = crawler.name;
            data[nameComposerFields.crawlerFileFriendlyName] = crawler.name.getCleanFilepath().Replace("-", "");
            data[nameComposerFields.variablePLmax]           = crawler.settings.limitTotalPageLoad;
            data[nameComposerFields.variableLT]    = crawler.settings.limitIterationNewLinks;
            data[nameComposerFields.variableTCmax] = crawlerJobEngineSettings.TC_max;


            return(data);
        }
Beispiel #3
0
        /// <summary>Crawl Job Engine controls the parallel execution of the Crawl Job.</summary>
        /// <remarks><para>This command sets the most important parameters of the Crawl Job execution</para></remarks>
        /// <param name="TC_max">Maximum number of parallel DLC executing in the same moment</param>
        /// <param name="Tdl_max">Maximum minutes allowed for single DLC to run</param>
        /// <param name="Tll_max">Maximum minutes of single iteration allowed for a DLC before its termination</param>
        /// <param name="Tcjl_max">Maximum minutes for the complete Crawl Job execution</param>
        /// <seealso cref="aceOperationSetExecutorBase"/>
        public void aceOperation_defineCrawlJobEngineSettings(
            [Description("Maximum number of parallel DLC executing in the same moment")] int TC_max = 8,
            [Description("Maximum minutes allowed for single DLC to run")] int Tdl_max = 50,
            [Description("Maximum minutes of single iteration allowed for a DLC before its termination")] int Tll_max = 20,
            [Description("Maximum minutes for the complete Crawl Job execution")] int Tcjl_max = 100)
        {
            var crawlerJobEngineSettings = new crawlerDomainTaskMachineSettings();

            crawlerJobEngineSettings.TC_max   = TC_max;
            crawlerJobEngineSettings.Tdl_max  = Tdl_max;
            crawlerJobEngineSettings.Tll_max  = Tll_max;
            crawlerJobEngineSettings.Tcjl_max = Tcjl_max;

            context.crawlerJobEngineSettings = crawlerJobEngineSettings;
        }