public void eventCrawlJobFinished(analyticJob aJob, crawlerDomainTaskMachine __machine, modelSpiderTestRecord __tRecord)
 {
     foreach (IPlugInCommonBase <indexMaintenanceStageEnum, experimentSessionEntry> plug in allPlugins)
     {
         plug.eventCrawlJobFinished(aJob, __machine, __tRecord);
     }
 }
Beispiel #2
0
 public void eventCrawlJobFinished(analyticJob aJob, crawlerDomainTaskMachine __machine, modelSpiderTestRecord __tRecord)
 {
     foreach (IPlugInCommonBase <crawlReportingStageEnum, directReporterBase> plug in allPlugins)
     {
         plug.eventCrawlJobFinished(aJob, __machine, __tRecord);
     }
 }
 public void eventCrawlJobFinished(analyticJob aJob, crawlerDomainTaskMachine __machine, modelSpiderTestRecord __tRecord)
 {
     foreach (IPlugInCommonBase <crawlerDomainTaskIterationPhase, spiderEvaluatorBase> plug in allPlugins)
     {
         plug.eventCrawlJobFinished(aJob, __machine, __tRecord);
     }
 }
Beispiel #4
0
        /// <summary>
        /// AnaliticJob declares one experimental run, this is the first command to call in scripts with experiment definitions
        /// </summary>
        /// <param name="jobName">Name of the Job to define</param>
        /// <param name="jobDesc">Description for the job</param>
        /// <param name="defaultStage">If true it will prepare default crawler stage to execute crawl in</param>
        /// <param name="stampPrefix">Prefix at timestamp</param>
        /// <param name="stampCount">Stamp version count</param>
        /// <remarks>
        /// Creates new instance of ActivityJog and assigns it to the current state.
        /// </remarks>
        /// <seealso cref="aceOperationSetExecutorBase" />
        public void aceOperation_defineJob(
            [Description("Name of the Job to define")] String jobName = "job",
            [Description("Description for the job")] String jobDesc   = "",
            [Description("If true it will prepare default crawler stage to execute crawl in")] Boolean defaultStage = true,
            [Description("Prefix at timestamp")] String stampPrefix = "",
            [Description("Stamp version count")] Int32 stampCount   = 1)

        {
            context = new crawlJobContext();
            var job = new analyticJob();

            job.name        = jobName;
            job.description = jobDesc;
            context.job     = job;
            context.aRecord = new analyticJobRecord(job);

            aceLog.consoleControl.setAsOutput(context.aRecord, "aRecord");

            if (defaultStage)
            {
                context.stageControl = new macroStageControlFullScan(jobName, "Common stage control");
            }
        }
Beispiel #5
0
 public override void eventCrawlJobFinished(analyticJob aJob, crawlerDomainTaskMachine __machine, modelSpiderTestRecord __tRecord)
 {
     //  imbWEMManager.index.Recheck(loger);
 }
Beispiel #6
0
 public abstract void eventCrawlJobFinished(analyticJob aJob, crawlerDomainTaskMachine __machine, modelSpiderTestRecord __tRecord);
Beispiel #7
0
 public override void eventCrawlJobFinished(analyticJob aJob, crawlerDomainTaskMachine __machine, modelSpiderTestRecord __tRecord) => eventCrawlJobFinished(__machine, __tRecord);
        public override void eventCrawlJobFinished(analyticJob aJob, crawlerDomainTaskMachine __machine, modelSpiderTestRecord __tRecord)
        {
            string indexID = imbWEMManager.index.current_indexID;

            var MasterTFIDF = imbWEMManager.index.experimentManager.CurrentSession.GetTFIDF_Master(loger, false, true);

            loger.log("Master TF-IDF [" + MasterTFIDF.name + "] has [" + MasterTFIDF.Count + "] terms defined.");

            MasterTFIDF.GetDataTable().GetReportAndSave(imbWEMManager.index.experimentManager.CurrentSession.sessionReportFolder, imbWEMManager.authorNotation, "master_tf_idf", true);



            //loger.log("--- saved to: " + path); // + " + globalTFIDFConstruct.Count() + "] DLC -> aggregated into MasterTF_IDF table by crawler [" + __tRecord.name + "]");



            //String path = imbWEMManager.index.experimentEntry.crawlRecordFolder.pathFor(indexID.add("xml", "."), aceCommonTypes.enums.getWritableFileMode.overwrite);



            //  imbWEMManager.index.experimentEntry.globalTFIDFCompiled = globalTFIDFConstruct.AggregateDocument.GetCompiledTable(loger);

            //loger.log("[" + imbWEMManager.index.experimentEntry.globalTFIDFCompiled.Count + "] terms were aggregated from [" + __tRecord.name + "] crawl data");

            /*
             * tfd.SetTitle("MasterTFIDF");
             *
             * tfd.saveObjectToXML(path);
             * tfd.GetReportAndSave(imbWEMManager.index.experimentEntry.recordFolder, imbWEMManager.authorNotation, "lemma", true);
             *
             *
             * path = imbWEMManager.index.folder.pathFor(experimentSessionRegistry.PATH_AggregateFTIDF, aceCommonTypes.enums.getWritableFileMode.overwrite);
             *
             * tfd.saveObjectToXML(path);
             * tfd.GetReportAndSave(imbWEMManager.index.folder, imbWEMManager.authorNotation, "lemma", true);
             *
             * loger.log("[" + globalTFIDFConstruct.Count() + "] DLC -> aggregated into MasterTF_IDF table by crawler [" + __tRecord.name + "]");
             * loger.log("--- saved to: " + path); // + " + globalTFIDFConstruct.Count() + "] DLC -> aggregated into MasterTF_IDF table by crawler [" + __tRecord.name + "]");
             *
             *
             *
             * var allTerms = globalTFIDFConstruct.AggregateDocument.GetAllTerms();
             *
             * Double IPd = 0;
             *
             * foreach (termSpark t in allTerms)
             * {
             *  var tfidf_entry =  globalTFIDFCompiled.GetOrCreate(t.nominalForm);
             *  tfidf_entry.termInstanceList = t.GetAllTermString();
             *  tfidf_entry.termInstances = tfidf_entry.termInstanceList.toCsvInLine();
             *  tfidf_entry.freqNorm = globalTFIDFConstruct.AggregateDocument.GetNFreq(t);
             *  tfidf_entry.freqAbs = globalTFIDFConstruct.AggregateDocument.GetAFreq(t);
             *  tfidf_entry.df = globalTFIDFConstruct.AggregateDocument.GetBDFreq(t);
             *  tfidf_entry.idf = globalTFIDFConstruct.AggregateDocument.GetIDF(t);
             *  tfidf_entry.tf_idf = globalTFIDFConstruct.AggregateDocument.GetTF_IDF(t);
             *  globalTFIDFCompiled.AddOrUpdate(tfidf_entry);
             *  IPd += tfidf_entry.tf_idf;
             * }
             *
             *
             * loger.log("[" + globalTFIDFConstruct.Count() + "] IP sum ->  [" + IPd.ToString("F3") + "]");
             *
             *
             * path = imbWEMManager.index.folder.pathFor(experimentSessionRegistry.PATH_CompiledFTIDF, aceCommonTypes.enums.getWritableFileMode.overwrite);
             * globalTFIDFCompiled.SaveAs(path);
             * DataTable gdt = globalTFIDFCompiled.GetDataTable();
             *
             * // <--------- ubaciti dopunske informacije
             * //gdt.SetAdditionalInfoEntry("Crawled domains", )
             * gdt.GetReportAndSave(imbWEMManager.index.folder, imbWEMManager.authorNotation, "tf_idf_compiled");
             *
             */

            /*
             * var domains = __tRecord. //__spider.state.sampleList.getIndexDomains();
             *
             * distinct = new List<string>();
             * allterms = new List<string>();
             * foreach (IWeightTableTerm t in wTFIDF.GetAllTerms())
             * {
             *  if (wTFIDF.GetBDFreq(t) == 1)
             *  {
             *      distinct.Add(t.nominalForm);
             *  }
             *  allterms.Add(t.nominalForm);
             * }
             *
             * idomain.DistinctLemmas = distinct.toCsvInLine();
             *
             * allterms.saveContentOnFilePath(__spider.indexSubFolder.pathFor(idomain.domain.getFilename(".txt"), aceCommonTypes.enums.getWritableFileMode.overwrite));
             */



            // -----------------------------------------------

            //var domains = imbWEMManager.index.experimentEntry.state.sampleList.getIndexDomains();
            //// <---------- nije zavrseno

            //foreach (indexDomain idomain in domains)
            //{
            //    List<indexPage> pages = imbWEMManager.index.pageIndexTable.GetPagesForDomain(idomain.domain);



            //    imbWEMManager.index.domainIndexTable.AddOrUpdate(idomain);
            //}
        }