public override void eventUniversal(crawlReportingStageEnum stage, directReporterBase __parent, crawlerDomainTask __task, modelSpiderSiteRecord wRecord) { switch (stage) { case crawlReportingStageEnum.DLCPreinitiation: wRecord.context.OnTargetPageAttached += new modelSpiderSiteRecordEvent(onTargetPageAttached); imbMCRepository mcRepo = mcm.activeRepository; imbMCWebSite wRepo = mcRepo.GetWebSite(wRecord.domainInfo, true, loger); if (!webSiteReposByDomain.ContainsKey(wRecord.domain)) { webSiteReposByDomain.Add(wRecord.domain, wRepo); } else { loger.log("DLC sent to CrawlToMC plugin second time: " + wRecord.domain); } mcRepo.siteTable.AddOrUpdate(wRepo.entry); wRepo.SaveDataStructure(mcRepo.folder, loger); break; } }
public override void eventDLCFinished(directReporterBase __spider, crawlerDomainTask __task, modelSpiderSiteRecord __wRecord) { imbMCRepository mcRepo = mcm.activeRepository; imbMCWebSite wRepo = webSiteReposByDomain[__wRecord.domain]; mcRepo.siteTable.AddOrUpdate(wRepo.entry); wRepo.SaveDataStructure(mcRepo.folder, loger); }
public override void eventDLCInitiated(directReporterBase __spider, crawlerDomainTask __task, modelSpiderSiteRecord __wRecord) { if (imbWEMManager.settings.directReportEngine.doDomainReport) { string dlc_config = imbWEMManager.index.experimentEntry.sessionCrawlerFolder["sites"].pathFor("dlc_config_" + __wRecord.domainInfo.domainRootName.getFilename(".txt")); builderForMarkdown builder = new builderForMarkdown(); spiderTools.Describe(__task.evaluator, builder); builder.ToString().saveStringToFile(dlc_config); } }
public override void eventDLCFinished(directReporterBase __spider, crawlerDomainTask __task, modelSpiderSiteRecord __wRecord) { if (__task.status == crawlerDomainTaskStatusEnum.aborted) { return; } if (__wRecord.iterationTableRecord == null) { return; } if (__wRecord.iterationTableRecord.Count == 0) { return; } indexDomain iDomain = records.GetOrCreate(__wRecord.instanceID); iDomain.url = __wRecord.domainInfo.urlProper; iDomain.domain = __wRecord.domain; var lastRec = __wRecord.iterationTableRecord.LastOrDefault(); var firstRec = __wRecord.iterationTableRecord.FirstOrDefault(); iDomain.relevantPages = lastRec.relevantPageCount; iDomain.notRelevantPages = lastRec.irrelevantPageCount; iDomain.detected = __wRecord.web.webActiveLinks.Count(); iDomain.Words = __wRecord.context.targets.termsAll.Count(); iDomain.LandingLanguage = firstRec.targetLanguage; iDomain.LandingRelevant = firstRec.relevantPageCount > 0; records.AddOrUpdate(iDomain, objectTableUpdatePolicy.updateIfHigher); DLCCount++; if (DLCCount >= imbWEMManager.settings.supportEngine.reportPlugIn_sideIndexer_DLCToSave) { DLCCount = 0; SaveAll(); output.log("Side Index save and publish triggered on [" + __task.parent.parent.taskDone + "] DLC completed"); } }
public void eventUniversal(crawlReportingStageEnum stage, directReporterBase __parent, crawlerDomainTask __task, modelSpiderSiteRecord wRecord) { //if (!IsEnabled) return; if (plugins[stage].Any(x => x.IsEnabled)) { foreach (reportPlugIn_base plug in allPlugins) { try { switch (stage) { case crawlReportingStageEnum.domain: plug.eventDLCFinished(__parent, __task, wRecord); break; case crawlReportingStageEnum.init: plug.eventDLCInitiated(__parent, __task, wRecord); break; default: plug.eventUniversal(stage, __parent, __task, wRecord); break; } //if (plug is ISpiderPlugInForContent) ((ISpiderPlugInForContent)plug).processAfterResultReceived(wRecord, wTask); } catch (Exception ex) { aceLog.log("Reporting Plugin [" + plug.name + "]:" + plug.GetType().Name + " at " + stage.ToString() + " execution crashed: " + ex.Message); crawlerErrorLog cel = new crawlerErrorLog(ex, null, null, crawlerErrorEnum.indexPlugin); cel.SaveXML(); } } } else { } }
public override void eventUniversal(crawlReportingStageEnum stage, directReporterBase __parent, crawlerDomainTask __task, modelSpiderSiteRecord wRecord) { }
public override void eventDLCFinished(directReporterBase __spider, crawlerDomainTask __task, modelSpiderSiteRecord __wRecord) { plugin_state.doCheckCriteria(__task.parent.parent, __wRecord.tRecord, this, imbWEMManager.index.experimentEntry); }
public override void eventDLCInitiated(directReporterBase __spider, crawlerDomainTask __task, modelSpiderSiteRecord __wRecord) { }
public reportingPlugInCollection(directReporterBase __parent, crawlerDomainTaskMachine __machine) : base(__parent) { __reporter = __parent; __engine = __machine; parent = __parent; }
public virtual void eventPluginInstalled(directReporterBase __spider) { eventPluginInstalled(); }