/// <summary> /// Evaluation procedure -- implementation for modules without layers /// </summary> /// <param name="input">The input.</param> /// <param name="wRecord">The w record.</param> /// <returns></returns> public override ISpiderModuleData evaluate(ISpiderModuleData input, modelSpiderSiteRecord wRecord) { List <spiderLink> output = new List <spiderLink>(); spiderModuleData <spiderLink> outdata = new spiderModuleData <spiderLink>(); moduleDLCRecord moduleLevelReportTable = ((spiderModuleData <spiderLink>)input).moduleDLC; moduleIterationRecord moduleDLCRecordTableEntry = ((spiderModuleData <spiderLink>)input).moduleDLCRecordTableEntry; if (imbWEMManager.settings.directReportEngine.DR_ReportModules) { //dataInput.moduleDLCRecordTableEntry = dataInput.moduleDLC.GetOrCreate(wRecord.iteration.ToString("D3") + module.name); moduleDLCRecordTableEntry.reportEvaluateStart(input as spiderModuleData <spiderLink>, wRecord, this); // <--- module level report --- start } input.active.ForEach(x => output.Add(x as spiderLink)); // ----- this is part where the layer modules are emulated if (imbWEMManager.settings.directReportEngine.DR_ReportModules) { moduleDLCRecordTableEntry.reportEvaluateEnd(output, wRecord, this); // <--- module level report --- start } outdata.active.AddRange(rankLinks(output, wRecord.iteration)); if (imbWEMManager.settings.directReportEngine.DR_ReportModules) { moduleDLCRecordTableEntry.reportEvaluateAlterRanking(outdata.active, wRecord, this); // <--- module level report --- start } return(outdata); }
public void reportEvaluateStart(spiderModuleData <spiderLink> input, modelSpiderSiteRecord wRecord, spiderModuleBase moduleInstance) { start = DateTime.Now; iteration = wRecord.iteration; int cyclers_c = 0; int recyclers_c = 0; int cyclers_age_c = 0; int input_age = 0; foreach (spiderLink link in input.active) { inputTargets_collection.Add(link.url); if (link.marks.cycleCount > 0) { if (link.marks.cycleLastIteration == (iteration - 1)) { cyclers_c++; cyclers_age_c += iteration - link.iterationDiscovery; } else if (link.marks.cycleLastIteration < (iteration - 1)) { recyclers_c++; } } input_age += iteration - link.iterationDiscovery; } inputTargets = input.active.Count(); processed = inputTargets; // <-- razlika je samo u agregaciji age = input_age.GetRatio(inputTargets); inputTargets_assertion = imbWEMManager.index.pageIndexTable.GetUrlAssertion(inputTargets_collection); inputPotentialPrecission = inputTargets_assertion.relevant; evaluationCertainty = inputTargets_assertion.certainty; inputTargets_assertion.performInfoGainEstimation(); PotInputIP = inputTargets_assertion.IPnominal; targets = inputTargets; layerModule = moduleInstance as spiderLayerModuleBase; cyclers = cyclers_c.GetRatio(inputTargets); recyclers = recyclers_c.GetRatio(inputTargets); if (layerModule != null) { accumulation = layerModule.layers.CountAll; targets += accumulation; } }
public static string GetInlineDescription(this spiderModuleData <spiderLink> data, string prefix = "input") { StringBuilder sb = new StringBuilder(); if (data.isModuleGaveUp) { sb.AppendLine(" ---- module gave up ----- "); } sb.AppendLine(prefix + " a[" + data.active.Count().ToString("D4") + "] p[" + data.inactive.Count().ToString("D4") + "] i[" + data.iteration + "]"); return(sb.ToString()); }
/// <summary> /// Called after the all modules at end of FRA /// </summary> /// <param name="__wRecord">The w record.</param> /// <param name="entry">The entry.</param> /// <param name="output">The output.</param> /// <returns></returns> public frontierRankingAlgorithmIterationRecord reportEndOfFRA(modelSpiderSiteRecord __wRecord, frontierRankingAlgorithmIterationRecord entry, spiderModuleData <spiderLink> output) { entry.output = output.active.Count; if (entry.inputTargets > entry.output) { entry.accumulation = entry.inputTargets - entry.output; } else { entry.drain = entry.output - entry.inputTargets; } Dictionary <string, spiderLink> urls = new Dictionary <string, spiderLink>(); foreach (var pair in output.active) { urls.Add(pair.url, pair); } var assertion = imbWEMManager.index.pageIndexTable.GetUrlAssertion(urls.Keys); entry.outputPotentialPrecission = assertion.relevant; ///[indexPageEvaluationEntryState.isRelevant].Count.GetRatio(assertion[indexPageEvaluationEntryState.haveEvaluationEntry].Count); assertion.performInfoGainEstimation(entry.PLleft); entry.PotOutputIP = assertion.IPnominal; entry.PotChangeIP = entry.PotOutputIP - entry.PotInputIP; entry.potentialPrecissionChange = entry.outputPotentialPrecission - entry.inputPotentialPrecission; entry.moduleUse = 0; foreach (var modPair in modRecords) { moduleIterationRecord moduleReport = modPair.Value.GetFirstWhere(nameof(moduleIterationRecord.iteration) + " = " + entry.iteration); if (moduleReport != null) { entry.moduleUse++; if (modPair.Key == typeof(languageModule).Name) { entry.accumulatedLanguage = moduleReport.accumulated; } else if (modPair.Key == typeof(structureModule).Name) { entry.accumulatedTemplate = moduleReport.accumulated; } else if (modPair.Key == typeof(templateModule).Name) { entry.accumulatedStructure = moduleReport.accumulated; } } } entry.duration = DateTime.Now.Subtract(entry.start).TotalSeconds; generalRecords.AddOrUpdate(entry); return(entry); }
/// <summary> /// Reports the start iteration: posle ekstrakcije, pre rangiranja /// </summary> /// <param name="currentIteration">The current iteration.</param> /// <param name="__wRecord">The w record.</param> /// <returns></returns> public frontierRankingAlgorithmIterationRecord reportStartOfFRA(int currentIteration, modelSpiderSiteRecord __wRecord, spiderModuleData <spiderLink> input) { var entry = generalRecords.GetOrCreate(crawlerName + currentIteration.ToString("D3")); entry.iteration = currentIteration; Dictionary <string, spiderLink> urls = new Dictionary <string, spiderLink>(); int newUrls = 0; int oldUrls = 0; foreach (var pair in input.active) { urls.Add(pair.url, pair); if (pair.iterationDiscovery == currentIteration) { newUrls++; } else { oldUrls++; } } var assertion = imbWEMManager.index.pageIndexTable.GetUrlAssertion(urls.Keys); entry.PLleft = __wRecord.context.GetPageLoadsToLimit(__wRecord.tRecord.instance.settings.limitTotalPageLoad); entry.evaluationKnown = assertion[indexPageEvaluationEntryState.haveEvaluationEntry].Count(); entry.evaluationUnknown = assertion[indexPageEvaluationEntryState.haveNoEvaluationEntry].Count() + assertion[indexPageEvaluationEntryState.notInTheIndex].Count(); entry.evaluationCertainty = assertion.certainty; entry.inputTargets = urls.Count; entry.newTargets = newUrls; entry.oldTargets = oldUrls; entry.inputPotentialPrecission = assertion.relevant; assertion.performInfoGainEstimation(entry.PLleft); entry.PotInputIP = assertion.IPnominal; return(entry); }
public override spiderObjectiveSolutionSet operation_applyLinkRules(modelSpiderSiteRecord wRecord) { spiderModuleData <spiderLink> dataInput = new spiderModuleData <spiderLink>(); dataInput.iteration = wRecord.iteration; dataInput.active.AddRange(wRecord.web.webActiveLinks); frontierRankingAlgorithmIterationRecord frontierReportEntry = null; if (imbWEMManager.settings.directReportEngine.DR_ReportModules) { frontierReportEntry = wRecord.frontierDLC.reportStartOfFRA(wRecord.iteration, wRecord, dataInput); // <----------------- reporting on module activity -- START } foreach (ISpiderModuleBase module in modules) { module.startIteration(wRecord.iteration, wRecord); } bool breakExecution = false; foreach (ISpiderModuleBase module in modules) { if (imbWEMManager.settings.directReportEngine.DR_ReportModules) { dataInput.moduleDLC = wRecord.frontierDLC.modRecords[module.GetType().Name]; dataInput.moduleDLCRecordTableEntry = dataInput.moduleDLC.StartNewRecord(wRecord.iteration); } spiderModuleData <spiderLink> dataOutput = null; if (!breakExecution) { dataOutput = module.evaluate(dataInput, wRecord) as spiderModuleData <spiderLink>; } //dataInput.moduleDLC.reportEvaluateAlterRanking(dataOutput.active, wRecord, dataInput.moduleDLCRecordTableEntry, module as spiderModuleBase); if (imbWEMManager.settings.directReportEngine.DR_ReportModules) { dataInput.moduleDLC.AddOrUpdate(dataInput.moduleDLCRecordTableEntry); dataInput.moduleDLCRecordTableEntry.disposeResources(); } if (!breakExecution) { dataInput = dataOutput.CreateNext(); if (dataInput.active.Count == 1) { wRecord.log("Module " + module.name + " returned single link instance -- skipping other modules"); breakExecution = true; } } } if (imbWEMManager.settings.directReportEngine.DR_ReportModules) { frontierReportEntry = wRecord.frontierDLC.reportEndOfFRA(wRecord, frontierReportEntry, dataInput); // <--------------------------------------------- reporting on module activity -- END } wRecord.currentModuleData = dataInput; // <------------------ Objective control rules spiderObjectiveSolutionSet output = new spiderObjectiveSolutionSet(); foreach (controlObjectiveRuleBase aRule in controlRules) { aRule.startIteration(wRecord.iteration, wRecord); output.listen(aRule.evaluate(wRecord)); } return(output); }