Exemple #1
0
        /// <summary>Runs the current crawl job</summary>
        /// <remarks><para>Starts crawl execution</para></remarks>
        /// <seealso cref="aceOperationSetExecutorBase"/>
        public void aceOperation_runRun()
        {
            IAceAdvancedConsole console = parent as IAceAdvancedConsole;

            // your code
            DateTime start = DateTime.Now;

            if (context.aRecord == null)
            {
                output.log("Error: define Job before calling this command.");
                return;
            }

            int Tdl_max = context.crawlerJobEngineSettings.Tdl_max;
            int Tll_max = context.crawlerJobEngineSettings.Tll_max;
            int TC_max  = context.crawlerJobEngineSettings.TC_max;

            var spiderEvals = context.aRecord.GetChildRecords();


            context.aRecord.initializeSoft(context.sampleList);



            int c = 0;



            DirectoryInfo di = imbWEMManager.index.experimentManager.CurrentSession.sessionReportFolder;


            var notation = appManager.AppInfo;

            // ------------------ note creation -------------------
            analyticJobNote note = new analyticJobNote(imbWEMManager.index.experimentEntry.sessionCrawlerFolder);

            note.WriteAboutJob(context, console.workspace, console);

            note.AppendLine("--------------- Crawl Job configuration overview -------------------------- ");
            note.AppendLine("   Script var | Article    - Description                             ");
            note.AppendLine("--------------------------------------------------------------------------- ");
            note.AppendLine("   Tdl_max    | Tdl        - Time limit per domain - in minutes     | : " + Tdl_max);
            note.AppendLine("   Tll_max    | Tac        - Time limit for inactivity - in minutes | : " + Tll_max);
            note.AppendLine("   TC_max     | TC         - Maximum number of JLC threads allowed  | : " + TC_max);
            note.AppendLine("--------------------------------------------------------------------------- ");
            note.AppendHorizontalLine();
            note.AppendLine("-- if the test was finished without problem at the last line it will be message [RunJob completed] ---");
            note.AppendLine("-- if not: something went wrong - check the logs ---");
            note.AppendHorizontalLine();
            note.SaveNote();

            foreach (modelSpiderTestRecord tRecord in spiderEvals)
            {
                c++;
                spiderWebLoaderControler controler = null;

                directAnalyticReporter reporter = new directAnalyticReporter(imbWEMManager.index.experimentEntry.CrawlID, imbWEMManager.index.experimentEntry.sessionCrawlerFolder, notation);



                context.pluginStack.InstallTo(imbWEMManager.index.plugins, plugInGroupEnum.index, true);

                tRecord.performance = imbWEMManager.index.experimentEntry;

                output.log(tRecord.instance.name + " crawl start");


                crawlerDomainTaskMachine cDTM = new crawlerDomainTaskMachine(tRecord, context.aRecord.sample, reporter, di)
                {
                    maxThreads       = TC_max,
                    _timeLimitForDLC = Tdl_max,
                    TimeLimitForTask = Tll_max
                };

                //state.pluginStack
                context.pluginStack.InstallTo(cDTM.plugins, plugInGroupEnum.engine, false);
                context.pluginStack.InstallTo(tRecord.instance.plugins, plugInGroupEnum.crawler, false);
                context.pluginStack.InstallTo(cDTM.reportPlugins, plugInGroupEnum.report, false);

                cDTM.startAutoParallel(true); // ----- execution

                output.log(tRecord.instance.name + " crawl finished");

                cDTM.webLoaderControler.Save();

                controler = cDTM.webLoaderControler;


                reporter.reportCrawler(tRecord);


                note.WriteAboutCrawlerRun(tRecord, cDTM);

                if (console != null)
                {
                    console.scriptRunning.getContent().saveStringToFile(imbWEMManager.index.experimentEntry.sessionCrawlerFolder.pathFor("script.ace"));
                }



                if (imbWEMManager.settings.directReportEngine.doPublishExperimentSessionTable)
                {
                    imbWEMManager.index.experimentManager.AddOrUpdate(tRecord.performance as experimentSessionEntry);
                }
            }


            imbWEMManager.index.CloseSession(spiderEvals);



            output.AppendLine("RunJob done in: " + DateTime.Now.Subtract(start).TotalMinutes.ToString("#0.0##") + " min");

            note.AppendLine("[RunJob completed]");
            note.SaveNote();



            // imbWEMManager.settings.Save(imbWEMManager.index.experimentEntry.sessionCrawlerFolder.pathFor("imbAnalyticEngineSettings.xml"));

            var sl = context.sampleList.ToList();

            sl.saveContentOnFilePath(note.folder.pathFor("sample.txt"));
        }
Exemple #2
0
        /// <summary>
        /// Initializes a new instance of the <see cref="crawlJobPlugin"/> class.
        /// </summary>
        /// <param name="__parent">The parent.</param>
        public crawlJobPlugin(IAceAdvancedConsole __parent) : base(__parent, "crawlJobPlugin", "This is imbACE advanced console plugin for crawlJobPlugin")
        {
            //output = newOutput;

            imbSCI.Core.screenOutputControl.logToConsoleControl.setAsOutput(output, "WEM Plugin");
        }
Exemple #3
0
        /// <summary>
        /// Imports sample from text file
        /// </summary>
        /// <param name="path">path to file with samples, if * it will open dialog to select the file</param>
        /// <param name="inWorkspace">if true, the file path is interpreted as relative to console workspace</param>
        /// <param name="sampleName">Name of the sample list, if empty it will not change current sample list name</param>
        /// <param name="replace">if set to true it will replace any existing samples in the list</param>
        /// <param name="debug">if true it will report on link preprocessing</param>
        /// <remarks>
        /// Loads the file and adds domain urls from it into context's sample list
        /// </remarks>
        /// <seealso cref="aceOperationSetExecutorBase" />
        public void aceOperation_addSampleFile(
            [Description("path to file with samples, if * it will open dialog to select the file")] String path              = "*",
            [Description("if true, the file path is interpreted as relative to console workspace")] Boolean inWorkspace      = true,
            [Description("Name of the sample list, if empty it will not change current sample list name")] String sampleName = "",
            [Description("if set to true it will replace any existing samples in the list")] Boolean replace = false,
            [Description("Number of entries to skip, from the imported file")] Int32 skip = 0,
            [Description("If set above 0, it limits the total number of domains imported")] Int32 limit = -1,
            [Description("if true it will report on link preprocessing")] Boolean debug = true)
        {
            IAceAdvancedConsole console = parent as IAceAdvancedConsole;

            if (path == "*")
            {
                String defPath = appManager.Application.folder_projects.path;
                if (inWorkspace)
                {
                    if (console != null)
                    {
                        defPath = console.workspace.folder.path;
                    }
                }
                path        = dialogs.openSelectFile(imbACE.Services.textBlocks.smart.dialogSelectFileMode.selectFileToOpen, "*.txt", defPath, "Select file to import web domains sample from");
                inWorkspace = false;
            }

            if (Path.IsPathRooted(path))
            {
                inWorkspace = false;
            }

            if (inWorkspace)
            {
                if (console != null)
                {
                    path = console.workspace.folder.pathFor(path);
                }
            }

            if (limit == -1)
            {
                limit = 10000;
            }
            if (skip < 0)
            {
                skip = 0;
            }

            if (File.Exists(path))
            {
                if (replace)
                {
                    context.sampleList = new webSiteSimpleSample();
                }

                if (!sampleName.isNullOrEmpty())
                {
                    context.sampleList.name = sampleName;
                }

                var list = path.openFileToList(true);

                Int32 c = 0;

                foreach (String l in list)
                {
                    domainAnalysis da = new domainAnalysis(l);
                    if (c < skip)
                    {
                        if (debug)
                        {
                            output.Append(String.Format("Skipping {0,-20} => {1,-20}", l, da.urlProper));
                        }
                    }
                    else
                    {
                        if (c >= limit)
                        {
                            break;
                        }

                        if (debug)
                        {
                            output.Append(String.Format("Adding   {0,-20} => {1,-20}", l, da.urlProper));
                        }
                        context.sampleList.Add(da.urlProper);
                    }
                }
            }
            else
            {
                output.log("Sample list file not found at [" + path + "]");
            }
        }
 public IAceConsolePlugin GetInstance(IAceAdvancedConsole console, String plugin_name, ILogBuilder output = null)
 {
     return(GetPluginInstance(plugin_name, "", output, new Object[] { console }));
 }
Exemple #5
0
        public void WriteAboutJob(ICrawlJobContext state, aceAdvancedConsoleWorkspace workspace, IAceAdvancedConsole console)
        {
            log(":: " + state.job.name + " ::");

            AppendLine("Test ID: " + imbWEMManager.index.experimentEntry.TestID);
            AppendLine("Test signature: " + imbWEMManager.index.experimentEntry.TestSignature);

            AppendLine("Test description: " + state.job.description);
            AppendLine("Test report folder: " + folder.path);



            AppendHorizontalLine();

            AppendLine("Sample size: " + state.sampleList.Count());

            // AppendLine("Sample subsets: " + state.sampleTags);
            AppendLine("Sample block: " + state.aRecord.sampleBlockOrdinalNumber);
            AppendLine("Sample limit: " + state.aRecord.sampleTakeLimit);
            //AppendLine("Sample file name: " + state.sampleFile);
            AppendLine("Sample order hash: " + imbWEMManager.index.experimentEntry.SampleListHash);


            AppendLine("Index auto-preloaded: " + imbWEMManager.index.doAutoLoad);
            //AppendLine("Lexicon preloaded: " +semanticLexiconManager.lexiconCache.isLexiconPreloaded);

            AppendHorizontalLine();
            AppendLine("Run stamp: " + state.aRecord.testRunStamp);
            string hash = md5.GetMd5Hash(objectSerialization.ObjectToXML(imbWEMManager.settings));

            state.setupHash_global = hash;

            AppendLine("Settings [imbAnalyticEngine] hash: " + hash);
            AppendLine(" ^-- to make sure multiple tests were running under the same settings compare the hash");



            if (console.scriptRunning != null)
            {
                AppendHorizontalLine();
                AppendLine("Job started by ACE script: " + console.scriptRunning.info.Name);
                AppendLine("=== Content of the script == start ======");
                AppendLine(console.scriptRunning.getContent());
                AppendLine("=== Content of the script == end   ======");
            }

            AppendHorizontalLine();

            if (imbWEMManager.commandArgs.Any())
            {
                AppendLine("Application was started with commandline arguments: ");
                int a = 1;
                foreach (string arg in imbWEMManager.commandArgs)
                {
                    AppendLine(a.ToString("D3") + " [" + arg + "]");
                }
                AppendHorizontalLine();
            }
            else
            {
                AppendLine("Application was started without commandline arguments ");
                AppendHorizontalLine();
            }

            var process = Process.GetCurrentProcess();

            process.Refresh();

            AppendLine("Process name: " + process.ProcessName);
            AppendLine("Process ID: " + process.Id);


            AppendHorizontalLine();
        }
 /// <summary>
 /// It will run <see cref="deployWorkspace"/> after setting <see cref="console"/> reference
 /// </summary>
 /// <param name="__console">The console.</param>
 public aceAdvancedConsoleWorkspace(IAceAdvancedConsole __console)
 {
     console = __console;
     deployWorkspace();
 }
 public posResolverPlugin(IAceAdvancedConsole __parent) : base(__parent, "Part-of-Speech resolver", "This plug-in resolves string token or tokens into POS information object")
 {
 }
 public pipelineModelConsolePlugin(IAceAdvancedConsole __parent) : base(__parent, "Pipeline Model Console Plugin", "This is imbACE advanced console plugin for pipelineModelConsolePlugin")
 {
 }
Exemple #9
0
 public imbNLPProjectPlugin(IAceAdvancedConsole __parent) : base(__parent, "imbNLPProjectPlugin", "This is imbACE advanced console plugin for imbNLPProjectPlugin")
 {
     parentConsole = __parent;
 }
 /// <summary>
 /// Initializes a new instance of the <see cref="emailPlugin"/> class.
 /// </summary>
 /// <param name="__parent">The parent.</param>
 public emailPlugin(IAceAdvancedConsole __parent) : base(__parent, "emailPlugin", "Creates and sends email messaged")
 {
     parentConsole = __parent;
 }