Ejemplo n.º 1
0
        /// <summary>
        /// Sets the web loader controler.
        /// </summary>
        /// <param name="__folder">The folder.</param>
        /// <returns></returns>
        public spiderWebLoaderControler SetWebLoaderControler(folderNode __folder)
        {
            webLoaderControler = new spiderWebLoaderControler();
            if (tRecord != null)
            {
                webLoaderControler.prepare(tRecord.logBuilder, folder);
            }
            else
            {
                webLoaderControler.prepare(null, folder);
            }

            return(webLoaderControler);
        }
Ejemplo n.º 2
0
        /// <summary>Runs the current crawl job</summary>
        /// <remarks><para>Starts crawl execution</para></remarks>
        /// <seealso cref="aceOperationSetExecutorBase"/>
        public void aceOperation_runRun()
        {
            IAceAdvancedConsole console = parent as IAceAdvancedConsole;

            // your code
            DateTime start = DateTime.Now;

            if (context.aRecord == null)
            {
                output.log("Error: define Job before calling this command.");
                return;
            }

            int Tdl_max = context.crawlerJobEngineSettings.Tdl_max;
            int Tll_max = context.crawlerJobEngineSettings.Tll_max;
            int TC_max  = context.crawlerJobEngineSettings.TC_max;

            var spiderEvals = context.aRecord.GetChildRecords();


            context.aRecord.initializeSoft(context.sampleList);



            int c = 0;



            DirectoryInfo di = imbWEMManager.index.experimentManager.CurrentSession.sessionReportFolder;


            var notation = appManager.AppInfo;

            // ------------------ note creation -------------------
            analyticJobNote note = new analyticJobNote(imbWEMManager.index.experimentEntry.sessionCrawlerFolder);

            note.WriteAboutJob(context, console.workspace, console);

            note.AppendLine("--------------- Crawl Job configuration overview -------------------------- ");
            note.AppendLine("   Script var | Article    - Description                             ");
            note.AppendLine("--------------------------------------------------------------------------- ");
            note.AppendLine("   Tdl_max    | Tdl        - Time limit per domain - in minutes     | : " + Tdl_max);
            note.AppendLine("   Tll_max    | Tac        - Time limit for inactivity - in minutes | : " + Tll_max);
            note.AppendLine("   TC_max     | TC         - Maximum number of JLC threads allowed  | : " + TC_max);
            note.AppendLine("--------------------------------------------------------------------------- ");
            note.AppendHorizontalLine();
            note.AppendLine("-- if the test was finished without problem at the last line it will be message [RunJob completed] ---");
            note.AppendLine("-- if not: something went wrong - check the logs ---");
            note.AppendHorizontalLine();
            note.SaveNote();

            foreach (modelSpiderTestRecord tRecord in spiderEvals)
            {
                c++;
                spiderWebLoaderControler controler = null;

                directAnalyticReporter reporter = new directAnalyticReporter(imbWEMManager.index.experimentEntry.CrawlID, imbWEMManager.index.experimentEntry.sessionCrawlerFolder, notation);



                context.pluginStack.InstallTo(imbWEMManager.index.plugins, plugInGroupEnum.index, true);

                tRecord.performance = imbWEMManager.index.experimentEntry;

                output.log(tRecord.instance.name + " crawl start");


                crawlerDomainTaskMachine cDTM = new crawlerDomainTaskMachine(tRecord, context.aRecord.sample, reporter, di)
                {
                    maxThreads       = TC_max,
                    _timeLimitForDLC = Tdl_max,
                    TimeLimitForTask = Tll_max
                };

                //state.pluginStack
                context.pluginStack.InstallTo(cDTM.plugins, plugInGroupEnum.engine, false);
                context.pluginStack.InstallTo(tRecord.instance.plugins, plugInGroupEnum.crawler, false);
                context.pluginStack.InstallTo(cDTM.reportPlugins, plugInGroupEnum.report, false);

                cDTM.startAutoParallel(true); // ----- execution

                output.log(tRecord.instance.name + " crawl finished");

                cDTM.webLoaderControler.Save();

                controler = cDTM.webLoaderControler;


                reporter.reportCrawler(tRecord);


                note.WriteAboutCrawlerRun(tRecord, cDTM);

                if (console != null)
                {
                    console.scriptRunning.getContent().saveStringToFile(imbWEMManager.index.experimentEntry.sessionCrawlerFolder.pathFor("script.ace"));
                }



                if (imbWEMManager.settings.directReportEngine.doPublishExperimentSessionTable)
                {
                    imbWEMManager.index.experimentManager.AddOrUpdate(tRecord.performance as experimentSessionEntry);
                }
            }


            imbWEMManager.index.CloseSession(spiderEvals);



            output.AppendLine("RunJob done in: " + DateTime.Now.Subtract(start).TotalMinutes.ToString("#0.0##") + " min");

            note.AppendLine("[RunJob completed]");
            note.SaveNote();



            // imbWEMManager.settings.Save(imbWEMManager.index.experimentEntry.sessionCrawlerFolder.pathFor("imbAnalyticEngineSettings.xml"));

            var sl = context.sampleList.ToList();

            sl.saveContentOnFilePath(note.folder.pathFor("sample.txt"));
        }