public void start(spiderModuleBase __module, modelSpiderSiteRecord __wRecord) { module = __module; wRecord = __wRecord; //spider = __spider; jobName = wRecord.tRecord.aJob.name; crawlerName = wRecord.tRecord.instance.name; domainName = wRecord.domain; name = module.name; //+ "_" + crawlerName + "_" + wRecord.domainInfo.domainRootName; table.TableName = name + "_" + wRecord.domainInfo.domainName; jobName = wRecord.tRecord.aJob.name; crawlerName = wRecord.tRecord.instance.name; domainName = wRecord.domain; moduleName = module.name; moduleType = module.GetType().BaseType.Name; moduleClass = module.GetType(); if (moduleClass == typeof(languageModule)) { moduleSummaryEnum = moduleIterationRecordSummary.language; } if (moduleClass == typeof(templateModule)) { moduleSummaryEnum = moduleIterationRecordSummary.template; } if (moduleClass == typeof(structureModule)) { moduleSummaryEnum = moduleIterationRecordSummary.structure; } if (moduleClass == typeof(diversityModule)) { moduleSummaryEnum = moduleIterationRecordSummary.diversity; } //moduleSlot = spider.modules.IndexOf(module); }
public void reportEvaluateStart(spiderModuleData <spiderLink> input, modelSpiderSiteRecord wRecord, spiderModuleBase moduleInstance) { start = DateTime.Now; iteration = wRecord.iteration; int cyclers_c = 0; int recyclers_c = 0; int cyclers_age_c = 0; int input_age = 0; foreach (spiderLink link in input.active) { inputTargets_collection.Add(link.url); if (link.marks.cycleCount > 0) { if (link.marks.cycleLastIteration == (iteration - 1)) { cyclers_c++; cyclers_age_c += iteration - link.iterationDiscovery; } else if (link.marks.cycleLastIteration < (iteration - 1)) { recyclers_c++; } } input_age += iteration - link.iterationDiscovery; } inputTargets = input.active.Count(); processed = inputTargets; // <-- razlika je samo u agregaciji age = input_age.GetRatio(inputTargets); inputTargets_assertion = imbWEMManager.index.pageIndexTable.GetUrlAssertion(inputTargets_collection); inputPotentialPrecission = inputTargets_assertion.relevant; evaluationCertainty = inputTargets_assertion.certainty; inputTargets_assertion.performInfoGainEstimation(); PotInputIP = inputTargets_assertion.IPnominal; targets = inputTargets; layerModule = moduleInstance as spiderLayerModuleBase; cyclers = cyclers_c.GetRatio(inputTargets); recyclers = recyclers_c.GetRatio(inputTargets); if (layerModule != null) { accumulation = layerModule.layers.CountAll; targets += accumulation; } }
public void reportEvaluateAlterRanking(IList <spiderLink> output, modelSpiderSiteRecord wRecord, spiderModuleBase moduleInstance) { if (layerModule != null) { switch (layerModule.layers.layer_id) { case 0: primaryLayerForOutput = 1; break; case 1: secondaryLayerForOutput = 1; break; case 2: reserveLayerForOutput = 1; break; } } // Single Output -- reducing the chain if (output.Count == wRecord.tRecord.instance.settings.limitIterationNewLinks) { singleTargetOutput = 1; } indexURLAssertionResult LT_assertion = imbWEMManager.index.pageIndexTable.GetUrlAssertion(output); potentialLoadTakePrecission = LT_assertion.relevant; rankingEffects = potentialLoadTakePrecission - outCut_assertion.relevant; duration = DateTime.Now.Subtract(start).TotalSeconds; }
public void reportEvaluateEnd(IList <spiderLink> __output, modelSpiderSiteRecord wRecord, spiderModuleBase moduleInstance) { indexURLAssertionResult L01_assertion = new indexURLAssertionResult(); indexURLAssertionResult L02_assertion = new indexURLAssertionResult(); indexURLAssertionResult L03_assertion = new indexURLAssertionResult(); layerModule = moduleInstance as spiderLayerModuleBase; if (layerModule != null) { accumulated = layerModule.layers.CountAll - accumulation; // << --- AGE int input_age = 0; primaryLayerTargets = layerModule.layers[0].Count; foreach (spiderLink link in layerModule.layers[0]) { input_age += (iteration - link.iterationDiscovery); L01_assertion.Add(imbWEMManager.index.pageIndexTable.GetPageAssertion(link.url), link.url); } primaryLayerTargetAge = input_age.GetRatio(primaryLayerTargets); primaryLayerInproperDistribution = L01_assertion.notRelevant; input_age = 0; if (layerModule.layers.Count > 1) { secondaryLayerTargets = layerModule.layers[1].Count; foreach (spiderLink link in layerModule.layers[1]) { input_age += (iteration - link.iterationDiscovery); L02_assertion.Add(imbWEMManager.index.pageIndexTable.GetPageAssertion(link.url), link.url); } secondaryLayerTargetAge = input_age.GetRatio(secondaryLayerTargets); if (primaryLayerTargets > 0) { secondaryLayerInproperDistribution = L02_assertion.relevant; } } input_age = 0; if (layerModule.layers.Count > 2) { reserveLayerTargets = layerModule.layers[2].Count; foreach (spiderLink link in layerModule.layers[2]) { input_age += (iteration - link.iterationDiscovery); L03_assertion.Add(imbWEMManager.index.pageIndexTable.GetPageAssertion(link.url), link.url); } reserveLayerTargetAge = input_age.GetRatio(reserveLayerTargets); if (secondaryLayerTargets > 0) { reserveLayerInproperDistribution = L03_assertion.relevant; } } } output = __output.Count; if (inputTargets > output) { Reduction = (inputTargets - output).GetRatio(inputTargets); } else { drain = output - inputTargets; //).GetRatio(inputTargets); } out_assertion = imbWEMManager.index.pageIndexTable.GetUrlAssertion(__output); out_assertion.performInfoGainEstimation(); LTn = wRecord.context.GetNextIterationLTSize(__output); outCut_assertion = imbWEMManager.index.pageIndexTable.GetUrlAssertion(__output.Take(LTn)); PotOutputIP = out_assertion.IPnominal; PotChangeIP = PotOutputIP - PotInputIP; // out_preRanking.AddRange(__output.active.Take(wRecord.tRecord.instance.settings.limitIterationNewLinks)); outputPotentialPrecission = out_assertion.relevant; potentialPrecissionChange = outputPotentialPrecission - inputPotentialPrecission; }