/// <summary>Runs the current crawl job</summary> /// <remarks><para>Starts crawl execution</para></remarks> /// <seealso cref="aceOperationSetExecutorBase"/> public void aceOperation_runRun() { IAceAdvancedConsole console = parent as IAceAdvancedConsole; // your code DateTime start = DateTime.Now; if (context.aRecord == null) { output.log("Error: define Job before calling this command."); return; } int Tdl_max = context.crawlerJobEngineSettings.Tdl_max; int Tll_max = context.crawlerJobEngineSettings.Tll_max; int TC_max = context.crawlerJobEngineSettings.TC_max; var spiderEvals = context.aRecord.GetChildRecords(); context.aRecord.initializeSoft(context.sampleList); int c = 0; DirectoryInfo di = imbWEMManager.index.experimentManager.CurrentSession.sessionReportFolder; var notation = appManager.AppInfo; // ------------------ note creation ------------------- analyticJobNote note = new analyticJobNote(imbWEMManager.index.experimentEntry.sessionCrawlerFolder); note.WriteAboutJob(context, console.workspace, console); note.AppendLine("--------------- Crawl Job configuration overview -------------------------- "); note.AppendLine(" Script var | Article - Description "); note.AppendLine("--------------------------------------------------------------------------- "); note.AppendLine(" Tdl_max | Tdl - Time limit per domain - in minutes | : " + Tdl_max); note.AppendLine(" Tll_max | Tac - Time limit for inactivity - in minutes | : " + Tll_max); note.AppendLine(" TC_max | TC - Maximum number of JLC threads allowed | : " + TC_max); note.AppendLine("--------------------------------------------------------------------------- "); note.AppendHorizontalLine(); note.AppendLine("-- if the test was finished without problem at the last line it will be message [RunJob completed] ---"); note.AppendLine("-- if not: something went wrong - check the logs ---"); note.AppendHorizontalLine(); note.SaveNote(); foreach (modelSpiderTestRecord tRecord in spiderEvals) { c++; spiderWebLoaderControler controler = null; directAnalyticReporter reporter = new directAnalyticReporter(imbWEMManager.index.experimentEntry.CrawlID, imbWEMManager.index.experimentEntry.sessionCrawlerFolder, notation); context.pluginStack.InstallTo(imbWEMManager.index.plugins, plugInGroupEnum.index, true); tRecord.performance = imbWEMManager.index.experimentEntry; output.log(tRecord.instance.name + " crawl start"); crawlerDomainTaskMachine cDTM = new crawlerDomainTaskMachine(tRecord, context.aRecord.sample, reporter, di) { maxThreads = TC_max, _timeLimitForDLC = Tdl_max, TimeLimitForTask = Tll_max }; //state.pluginStack context.pluginStack.InstallTo(cDTM.plugins, plugInGroupEnum.engine, false); context.pluginStack.InstallTo(tRecord.instance.plugins, plugInGroupEnum.crawler, false); context.pluginStack.InstallTo(cDTM.reportPlugins, plugInGroupEnum.report, false); cDTM.startAutoParallel(true); // ----- execution output.log(tRecord.instance.name + " crawl finished"); cDTM.webLoaderControler.Save(); controler = cDTM.webLoaderControler; reporter.reportCrawler(tRecord); note.WriteAboutCrawlerRun(tRecord, cDTM); if (console != null) { console.scriptRunning.getContent().saveStringToFile(imbWEMManager.index.experimentEntry.sessionCrawlerFolder.pathFor("script.ace")); } if (imbWEMManager.settings.directReportEngine.doPublishExperimentSessionTable) { imbWEMManager.index.experimentManager.AddOrUpdate(tRecord.performance as experimentSessionEntry); } } imbWEMManager.index.CloseSession(spiderEvals); output.AppendLine("RunJob done in: " + DateTime.Now.Subtract(start).TotalMinutes.ToString("#0.0##") + " min"); note.AppendLine("[RunJob completed]"); note.SaveNote(); // imbWEMManager.settings.Save(imbWEMManager.index.experimentEntry.sessionCrawlerFolder.pathFor("imbAnalyticEngineSettings.xml")); var sl = context.sampleList.ToList(); sl.saveContentOnFilePath(note.folder.pathFor("sample.txt")); }
/// <summary> /// Initializes a new instance of the <see cref="crawlJobPlugin"/> class. /// </summary> /// <param name="__parent">The parent.</param> public crawlJobPlugin(IAceAdvancedConsole __parent) : base(__parent, "crawlJobPlugin", "This is imbACE advanced console plugin for crawlJobPlugin") { //output = newOutput; imbSCI.Core.screenOutputControl.logToConsoleControl.setAsOutput(output, "WEM Plugin"); }
/// <summary> /// Imports sample from text file /// </summary> /// <param name="path">path to file with samples, if * it will open dialog to select the file</param> /// <param name="inWorkspace">if true, the file path is interpreted as relative to console workspace</param> /// <param name="sampleName">Name of the sample list, if empty it will not change current sample list name</param> /// <param name="replace">if set to true it will replace any existing samples in the list</param> /// <param name="debug">if true it will report on link preprocessing</param> /// <remarks> /// Loads the file and adds domain urls from it into context's sample list /// </remarks> /// <seealso cref="aceOperationSetExecutorBase" /> public void aceOperation_addSampleFile( [Description("path to file with samples, if * it will open dialog to select the file")] String path = "*", [Description("if true, the file path is interpreted as relative to console workspace")] Boolean inWorkspace = true, [Description("Name of the sample list, if empty it will not change current sample list name")] String sampleName = "", [Description("if set to true it will replace any existing samples in the list")] Boolean replace = false, [Description("Number of entries to skip, from the imported file")] Int32 skip = 0, [Description("If set above 0, it limits the total number of domains imported")] Int32 limit = -1, [Description("if true it will report on link preprocessing")] Boolean debug = true) { IAceAdvancedConsole console = parent as IAceAdvancedConsole; if (path == "*") { String defPath = appManager.Application.folder_projects.path; if (inWorkspace) { if (console != null) { defPath = console.workspace.folder.path; } } path = dialogs.openSelectFile(imbACE.Services.textBlocks.smart.dialogSelectFileMode.selectFileToOpen, "*.txt", defPath, "Select file to import web domains sample from"); inWorkspace = false; } if (Path.IsPathRooted(path)) { inWorkspace = false; } if (inWorkspace) { if (console != null) { path = console.workspace.folder.pathFor(path); } } if (limit == -1) { limit = 10000; } if (skip < 0) { skip = 0; } if (File.Exists(path)) { if (replace) { context.sampleList = new webSiteSimpleSample(); } if (!sampleName.isNullOrEmpty()) { context.sampleList.name = sampleName; } var list = path.openFileToList(true); Int32 c = 0; foreach (String l in list) { domainAnalysis da = new domainAnalysis(l); if (c < skip) { if (debug) { output.Append(String.Format("Skipping {0,-20} => {1,-20}", l, da.urlProper)); } } else { if (c >= limit) { break; } if (debug) { output.Append(String.Format("Adding {0,-20} => {1,-20}", l, da.urlProper)); } context.sampleList.Add(da.urlProper); } } } else { output.log("Sample list file not found at [" + path + "]"); } }
public IAceConsolePlugin GetInstance(IAceAdvancedConsole console, String plugin_name, ILogBuilder output = null) { return(GetPluginInstance(plugin_name, "", output, new Object[] { console })); }
public void WriteAboutJob(ICrawlJobContext state, aceAdvancedConsoleWorkspace workspace, IAceAdvancedConsole console) { log(":: " + state.job.name + " ::"); AppendLine("Test ID: " + imbWEMManager.index.experimentEntry.TestID); AppendLine("Test signature: " + imbWEMManager.index.experimentEntry.TestSignature); AppendLine("Test description: " + state.job.description); AppendLine("Test report folder: " + folder.path); AppendHorizontalLine(); AppendLine("Sample size: " + state.sampleList.Count()); // AppendLine("Sample subsets: " + state.sampleTags); AppendLine("Sample block: " + state.aRecord.sampleBlockOrdinalNumber); AppendLine("Sample limit: " + state.aRecord.sampleTakeLimit); //AppendLine("Sample file name: " + state.sampleFile); AppendLine("Sample order hash: " + imbWEMManager.index.experimentEntry.SampleListHash); AppendLine("Index auto-preloaded: " + imbWEMManager.index.doAutoLoad); //AppendLine("Lexicon preloaded: " +semanticLexiconManager.lexiconCache.isLexiconPreloaded); AppendHorizontalLine(); AppendLine("Run stamp: " + state.aRecord.testRunStamp); string hash = md5.GetMd5Hash(objectSerialization.ObjectToXML(imbWEMManager.settings)); state.setupHash_global = hash; AppendLine("Settings [imbAnalyticEngine] hash: " + hash); AppendLine(" ^-- to make sure multiple tests were running under the same settings compare the hash"); if (console.scriptRunning != null) { AppendHorizontalLine(); AppendLine("Job started by ACE script: " + console.scriptRunning.info.Name); AppendLine("=== Content of the script == start ======"); AppendLine(console.scriptRunning.getContent()); AppendLine("=== Content of the script == end ======"); } AppendHorizontalLine(); if (imbWEMManager.commandArgs.Any()) { AppendLine("Application was started with commandline arguments: "); int a = 1; foreach (string arg in imbWEMManager.commandArgs) { AppendLine(a.ToString("D3") + " [" + arg + "]"); } AppendHorizontalLine(); } else { AppendLine("Application was started without commandline arguments "); AppendHorizontalLine(); } var process = Process.GetCurrentProcess(); process.Refresh(); AppendLine("Process name: " + process.ProcessName); AppendLine("Process ID: " + process.Id); AppendHorizontalLine(); }
/// <summary> /// It will run <see cref="deployWorkspace"/> after setting <see cref="console"/> reference /// </summary> /// <param name="__console">The console.</param> public aceAdvancedConsoleWorkspace(IAceAdvancedConsole __console) { console = __console; deployWorkspace(); }
public posResolverPlugin(IAceAdvancedConsole __parent) : base(__parent, "Part-of-Speech resolver", "This plug-in resolves string token or tokens into POS information object") { }
public pipelineModelConsolePlugin(IAceAdvancedConsole __parent) : base(__parent, "Pipeline Model Console Plugin", "This is imbACE advanced console plugin for pipelineModelConsolePlugin") { }
public imbNLPProjectPlugin(IAceAdvancedConsole __parent) : base(__parent, "imbNLPProjectPlugin", "This is imbACE advanced console plugin for imbNLPProjectPlugin") { parentConsole = __parent; }
/// <summary> /// Initializes a new instance of the <see cref="emailPlugin"/> class. /// </summary> /// <param name="__parent">The parent.</param> public emailPlugin(IAceAdvancedConsole __parent) : base(__parent, "emailPlugin", "Creates and sends email messaged") { parentConsole = __parent; }