Пример #1
0
        public override void eventUniversal(crawlReportingStageEnum stage, directReporterBase __parent, crawlerDomainTask __task, modelSpiderSiteRecord wRecord)
        {
            switch (stage)
            {
            case crawlReportingStageEnum.DLCPreinitiation:

                wRecord.context.OnTargetPageAttached += new modelSpiderSiteRecordEvent(onTargetPageAttached);

                imbMCRepository mcRepo = mcm.activeRepository;

                imbMCWebSite wRepo = mcRepo.GetWebSite(wRecord.domainInfo, true, loger);
                if (!webSiteReposByDomain.ContainsKey(wRecord.domain))
                {
                    webSiteReposByDomain.Add(wRecord.domain, wRepo);
                }
                else
                {
                    loger.log("DLC sent to CrawlToMC plugin second time: " + wRecord.domain);
                }

                mcRepo.siteTable.AddOrUpdate(wRepo.entry);

                wRepo.SaveDataStructure(mcRepo.folder, loger);
                break;
            }
        }
Пример #2
0
        public override void eventDLCFinished(directReporterBase __spider, crawlerDomainTask __task, modelSpiderSiteRecord __wRecord)
        {
            imbMCRepository mcRepo = mcm.activeRepository;
            imbMCWebSite    wRepo  = webSiteReposByDomain[__wRecord.domain];

            mcRepo.siteTable.AddOrUpdate(wRepo.entry);
            wRepo.SaveDataStructure(mcRepo.folder, loger);
        }
Пример #3
0
 public override void eventDLCInitiated(directReporterBase __spider, crawlerDomainTask __task, modelSpiderSiteRecord __wRecord)
 {
     if (imbWEMManager.settings.directReportEngine.doDomainReport)
     {
         string             dlc_config = imbWEMManager.index.experimentEntry.sessionCrawlerFolder["sites"].pathFor("dlc_config_" + __wRecord.domainInfo.domainRootName.getFilename(".txt"));
         builderForMarkdown builder    = new builderForMarkdown();
         spiderTools.Describe(__task.evaluator, builder);
         builder.ToString().saveStringToFile(dlc_config);
     }
 }
Пример #4
0
        public override void eventDLCFinished(directReporterBase __spider, crawlerDomainTask __task, modelSpiderSiteRecord __wRecord)
        {
            if (__task.status == crawlerDomainTaskStatusEnum.aborted)
            {
                return;
            }

            if (__wRecord.iterationTableRecord == null)
            {
                return;
            }
            if (__wRecord.iterationTableRecord.Count == 0)
            {
                return;
            }

            indexDomain iDomain = records.GetOrCreate(__wRecord.instanceID);

            iDomain.url    = __wRecord.domainInfo.urlProper;
            iDomain.domain = __wRecord.domain;

            var lastRec = __wRecord.iterationTableRecord.LastOrDefault();

            var firstRec = __wRecord.iterationTableRecord.FirstOrDefault();

            iDomain.relevantPages    = lastRec.relevantPageCount;
            iDomain.notRelevantPages = lastRec.irrelevantPageCount;
            iDomain.detected         = __wRecord.web.webActiveLinks.Count();
            iDomain.Words            = __wRecord.context.targets.termsAll.Count();
            iDomain.LandingLanguage  = firstRec.targetLanguage;
            iDomain.LandingRelevant  = firstRec.relevantPageCount > 0;

            records.AddOrUpdate(iDomain, objectTableUpdatePolicy.updateIfHigher);
            DLCCount++;

            if (DLCCount >= imbWEMManager.settings.supportEngine.reportPlugIn_sideIndexer_DLCToSave)
            {
                DLCCount = 0;
                SaveAll();
                output.log("Side Index save and publish triggered on [" + __task.parent.parent.taskDone + "] DLC completed");
            }
        }
Пример #5
0
        public void eventUniversal(crawlReportingStageEnum stage, directReporterBase __parent, crawlerDomainTask __task, modelSpiderSiteRecord wRecord)
        {
            //if (!IsEnabled) return;

            if (plugins[stage].Any(x => x.IsEnabled))
            {
                foreach (reportPlugIn_base plug in allPlugins)
                {
                    try
                    {
                        switch (stage)
                        {
                        case crawlReportingStageEnum.domain:
                            plug.eventDLCFinished(__parent, __task, wRecord);
                            break;

                        case crawlReportingStageEnum.init:
                            plug.eventDLCInitiated(__parent, __task, wRecord);
                            break;

                        default:
                            plug.eventUniversal(stage, __parent, __task, wRecord);
                            break;
                        }

                        //if (plug is ISpiderPlugInForContent) ((ISpiderPlugInForContent)plug).processAfterResultReceived(wRecord, wTask);
                    }
                    catch (Exception ex)
                    {
                        aceLog.log("Reporting Plugin [" + plug.name + "]:" + plug.GetType().Name + " at " + stage.ToString() + " execution crashed: " + ex.Message);
                        crawlerErrorLog cel = new crawlerErrorLog(ex, null, null, crawlerErrorEnum.indexPlugin);
                        cel.SaveXML();
                    }
                }
            }
            else
            {
            }
        }
Пример #6
0
 public override void eventUniversal(crawlReportingStageEnum stage, directReporterBase __parent, crawlerDomainTask __task, modelSpiderSiteRecord wRecord)
 {
 }
Пример #7
0
 public override void eventDLCFinished(directReporterBase __spider, crawlerDomainTask __task, modelSpiderSiteRecord __wRecord)
 {
     plugin_state.doCheckCriteria(__task.parent.parent, __wRecord.tRecord, this, imbWEMManager.index.experimentEntry);
 }
Пример #8
0
 public override void eventDLCInitiated(directReporterBase __spider, crawlerDomainTask __task, modelSpiderSiteRecord __wRecord)
 {
 }
Пример #9
0
 public reportingPlugInCollection(directReporterBase __parent, crawlerDomainTaskMachine __machine) : base(__parent)
 {
     __reporter = __parent;
     __engine   = __machine;
     parent     = __parent;
 }
Пример #10
0
 public virtual void eventPluginInstalled(directReporterBase __spider)
 {
     eventPluginInstalled();
 }