public void eventCrawlJobFinished(analyticJob aJob, crawlerDomainTaskMachine __machine, modelSpiderTestRecord __tRecord) { foreach (IPlugInCommonBase <indexMaintenanceStageEnum, experimentSessionEntry> plug in allPlugins) { plug.eventCrawlJobFinished(aJob, __machine, __tRecord); } }
public void eventCrawlJobFinished(analyticJob aJob, crawlerDomainTaskMachine __machine, modelSpiderTestRecord __tRecord) { foreach (IPlugInCommonBase <crawlReportingStageEnum, directReporterBase> plug in allPlugins) { plug.eventCrawlJobFinished(aJob, __machine, __tRecord); } }
public void eventCrawlJobFinished(analyticJob aJob, crawlerDomainTaskMachine __machine, modelSpiderTestRecord __tRecord) { foreach (IPlugInCommonBase <crawlerDomainTaskIterationPhase, spiderEvaluatorBase> plug in allPlugins) { plug.eventCrawlJobFinished(aJob, __machine, __tRecord); } }
/// <summary> /// AnaliticJob declares one experimental run, this is the first command to call in scripts with experiment definitions /// </summary> /// <param name="jobName">Name of the Job to define</param> /// <param name="jobDesc">Description for the job</param> /// <param name="defaultStage">If true it will prepare default crawler stage to execute crawl in</param> /// <param name="stampPrefix">Prefix at timestamp</param> /// <param name="stampCount">Stamp version count</param> /// <remarks> /// Creates new instance of ActivityJog and assigns it to the current state. /// </remarks> /// <seealso cref="aceOperationSetExecutorBase" /> public void aceOperation_defineJob( [Description("Name of the Job to define")] String jobName = "job", [Description("Description for the job")] String jobDesc = "", [Description("If true it will prepare default crawler stage to execute crawl in")] Boolean defaultStage = true, [Description("Prefix at timestamp")] String stampPrefix = "", [Description("Stamp version count")] Int32 stampCount = 1) { context = new crawlJobContext(); var job = new analyticJob(); job.name = jobName; job.description = jobDesc; context.job = job; context.aRecord = new analyticJobRecord(job); aceLog.consoleControl.setAsOutput(context.aRecord, "aRecord"); if (defaultStage) { context.stageControl = new macroStageControlFullScan(jobName, "Common stage control"); } }
public override void eventCrawlJobFinished(analyticJob aJob, crawlerDomainTaskMachine __machine, modelSpiderTestRecord __tRecord) { // imbWEMManager.index.Recheck(loger); }
public abstract void eventCrawlJobFinished(analyticJob aJob, crawlerDomainTaskMachine __machine, modelSpiderTestRecord __tRecord);
public override void eventCrawlJobFinished(analyticJob aJob, crawlerDomainTaskMachine __machine, modelSpiderTestRecord __tRecord) => eventCrawlJobFinished(__machine, __tRecord);
public override void eventCrawlJobFinished(analyticJob aJob, crawlerDomainTaskMachine __machine, modelSpiderTestRecord __tRecord) { string indexID = imbWEMManager.index.current_indexID; var MasterTFIDF = imbWEMManager.index.experimentManager.CurrentSession.GetTFIDF_Master(loger, false, true); loger.log("Master TF-IDF [" + MasterTFIDF.name + "] has [" + MasterTFIDF.Count + "] terms defined."); MasterTFIDF.GetDataTable().GetReportAndSave(imbWEMManager.index.experimentManager.CurrentSession.sessionReportFolder, imbWEMManager.authorNotation, "master_tf_idf", true); //loger.log("--- saved to: " + path); // + " + globalTFIDFConstruct.Count() + "] DLC -> aggregated into MasterTF_IDF table by crawler [" + __tRecord.name + "]"); //String path = imbWEMManager.index.experimentEntry.crawlRecordFolder.pathFor(indexID.add("xml", "."), aceCommonTypes.enums.getWritableFileMode.overwrite); // imbWEMManager.index.experimentEntry.globalTFIDFCompiled = globalTFIDFConstruct.AggregateDocument.GetCompiledTable(loger); //loger.log("[" + imbWEMManager.index.experimentEntry.globalTFIDFCompiled.Count + "] terms were aggregated from [" + __tRecord.name + "] crawl data"); /* * tfd.SetTitle("MasterTFIDF"); * * tfd.saveObjectToXML(path); * tfd.GetReportAndSave(imbWEMManager.index.experimentEntry.recordFolder, imbWEMManager.authorNotation, "lemma", true); * * * path = imbWEMManager.index.folder.pathFor(experimentSessionRegistry.PATH_AggregateFTIDF, aceCommonTypes.enums.getWritableFileMode.overwrite); * * tfd.saveObjectToXML(path); * tfd.GetReportAndSave(imbWEMManager.index.folder, imbWEMManager.authorNotation, "lemma", true); * * loger.log("[" + globalTFIDFConstruct.Count() + "] DLC -> aggregated into MasterTF_IDF table by crawler [" + __tRecord.name + "]"); * loger.log("--- saved to: " + path); // + " + globalTFIDFConstruct.Count() + "] DLC -> aggregated into MasterTF_IDF table by crawler [" + __tRecord.name + "]"); * * * * var allTerms = globalTFIDFConstruct.AggregateDocument.GetAllTerms(); * * Double IPd = 0; * * foreach (termSpark t in allTerms) * { * var tfidf_entry = globalTFIDFCompiled.GetOrCreate(t.nominalForm); * tfidf_entry.termInstanceList = t.GetAllTermString(); * tfidf_entry.termInstances = tfidf_entry.termInstanceList.toCsvInLine(); * tfidf_entry.freqNorm = globalTFIDFConstruct.AggregateDocument.GetNFreq(t); * tfidf_entry.freqAbs = globalTFIDFConstruct.AggregateDocument.GetAFreq(t); * tfidf_entry.df = globalTFIDFConstruct.AggregateDocument.GetBDFreq(t); * tfidf_entry.idf = globalTFIDFConstruct.AggregateDocument.GetIDF(t); * tfidf_entry.tf_idf = globalTFIDFConstruct.AggregateDocument.GetTF_IDF(t); * globalTFIDFCompiled.AddOrUpdate(tfidf_entry); * IPd += tfidf_entry.tf_idf; * } * * * loger.log("[" + globalTFIDFConstruct.Count() + "] IP sum -> [" + IPd.ToString("F3") + "]"); * * * path = imbWEMManager.index.folder.pathFor(experimentSessionRegistry.PATH_CompiledFTIDF, aceCommonTypes.enums.getWritableFileMode.overwrite); * globalTFIDFCompiled.SaveAs(path); * DataTable gdt = globalTFIDFCompiled.GetDataTable(); * * // <--------- ubaciti dopunske informacije * //gdt.SetAdditionalInfoEntry("Crawled domains", ) * gdt.GetReportAndSave(imbWEMManager.index.folder, imbWEMManager.authorNotation, "tf_idf_compiled"); * */ /* * var domains = __tRecord. //__spider.state.sampleList.getIndexDomains(); * * distinct = new List<string>(); * allterms = new List<string>(); * foreach (IWeightTableTerm t in wTFIDF.GetAllTerms()) * { * if (wTFIDF.GetBDFreq(t) == 1) * { * distinct.Add(t.nominalForm); * } * allterms.Add(t.nominalForm); * } * * idomain.DistinctLemmas = distinct.toCsvInLine(); * * allterms.saveContentOnFilePath(__spider.indexSubFolder.pathFor(idomain.domain.getFilename(".txt"), aceCommonTypes.enums.getWritableFileMode.overwrite)); */ // ----------------------------------------------- //var domains = imbWEMManager.index.experimentEntry.state.sampleList.getIndexDomains(); //// <---------- nije zavrseno //foreach (indexDomain idomain in domains) //{ // List<indexPage> pages = imbWEMManager.index.pageIndexTable.GetPagesForDomain(idomain.domain); // imbWEMManager.index.domainIndexTable.AddOrUpdate(idomain); //} }