Exemple #1
0
        public void start(spiderModuleBase __module, modelSpiderSiteRecord __wRecord)
        {
            module = __module;

            wRecord = __wRecord;
            //spider = __spider;

            jobName     = wRecord.tRecord.aJob.name;
            crawlerName = wRecord.tRecord.instance.name;
            domainName  = wRecord.domain;

            name            = module.name; //+ "_" + crawlerName + "_" + wRecord.domainInfo.domainRootName;
            table.TableName = name + "_" + wRecord.domainInfo.domainName;

            jobName     = wRecord.tRecord.aJob.name;
            crawlerName = wRecord.tRecord.instance.name;
            domainName  = wRecord.domain;

            moduleName = module.name;
            moduleType = module.GetType().BaseType.Name;

            moduleClass = module.GetType();

            if (moduleClass == typeof(languageModule))
            {
                moduleSummaryEnum = moduleIterationRecordSummary.language;
            }
            if (moduleClass == typeof(templateModule))
            {
                moduleSummaryEnum = moduleIterationRecordSummary.template;
            }
            if (moduleClass == typeof(structureModule))
            {
                moduleSummaryEnum = moduleIterationRecordSummary.structure;
            }
            if (moduleClass == typeof(diversityModule))
            {
                moduleSummaryEnum = moduleIterationRecordSummary.diversity;
            }

            //moduleSlot = spider.modules.IndexOf(module);
        }
        public void reportEvaluateStart(spiderModuleData <spiderLink> input, modelSpiderSiteRecord wRecord, spiderModuleBase moduleInstance)
        {
            start     = DateTime.Now;
            iteration = wRecord.iteration;

            int cyclers_c     = 0;
            int recyclers_c   = 0;
            int cyclers_age_c = 0;
            int input_age     = 0;

            foreach (spiderLink link in input.active)
            {
                inputTargets_collection.Add(link.url);

                if (link.marks.cycleCount > 0)
                {
                    if (link.marks.cycleLastIteration == (iteration - 1))
                    {
                        cyclers_c++;
                        cyclers_age_c += iteration - link.iterationDiscovery;
                    }
                    else if (link.marks.cycleLastIteration < (iteration - 1))
                    {
                        recyclers_c++;
                    }
                }

                input_age += iteration - link.iterationDiscovery;
            }

            inputTargets = input.active.Count();
            processed    = inputTargets; // <-- razlika je samo u agregaciji


            age = input_age.GetRatio(inputTargets);


            inputTargets_assertion   = imbWEMManager.index.pageIndexTable.GetUrlAssertion(inputTargets_collection);
            inputPotentialPrecission = inputTargets_assertion.relevant;
            evaluationCertainty      = inputTargets_assertion.certainty;
            inputTargets_assertion.performInfoGainEstimation();

            PotInputIP = inputTargets_assertion.IPnominal;


            targets = inputTargets;

            layerModule = moduleInstance as spiderLayerModuleBase;

            cyclers   = cyclers_c.GetRatio(inputTargets);
            recyclers = recyclers_c.GetRatio(inputTargets);

            if (layerModule != null)
            {
                accumulation = layerModule.layers.CountAll;

                targets += accumulation;
            }
        }
        public void reportEvaluateAlterRanking(IList <spiderLink> output, modelSpiderSiteRecord wRecord, spiderModuleBase moduleInstance)
        {
            if (layerModule != null)
            {
                switch (layerModule.layers.layer_id)
                {
                case 0:

                    primaryLayerForOutput = 1;
                    break;

                case 1:

                    secondaryLayerForOutput = 1;
                    break;

                case 2:

                    reserveLayerForOutput = 1;
                    break;
                }
            }


            // Single Output -- reducing the chain
            if (output.Count == wRecord.tRecord.instance.settings.limitIterationNewLinks)
            {
                singleTargetOutput = 1;
            }


            indexURLAssertionResult LT_assertion = imbWEMManager.index.pageIndexTable.GetUrlAssertion(output);

            potentialLoadTakePrecission = LT_assertion.relevant;

            rankingEffects = potentialLoadTakePrecission - outCut_assertion.relevant;


            duration = DateTime.Now.Subtract(start).TotalSeconds;
        }
        public void reportEvaluateEnd(IList <spiderLink> __output, modelSpiderSiteRecord wRecord, spiderModuleBase moduleInstance)
        {
            indexURLAssertionResult L01_assertion = new indexURLAssertionResult();
            indexURLAssertionResult L02_assertion = new indexURLAssertionResult();
            indexURLAssertionResult L03_assertion = new indexURLAssertionResult();

            layerModule = moduleInstance as spiderLayerModuleBase;
            if (layerModule != null)
            {
                accumulated = layerModule.layers.CountAll - accumulation;

                // << --- AGE
                int input_age = 0;
                primaryLayerTargets = layerModule.layers[0].Count;
                foreach (spiderLink link in layerModule.layers[0])
                {
                    input_age += (iteration - link.iterationDiscovery);
                    L01_assertion.Add(imbWEMManager.index.pageIndexTable.GetPageAssertion(link.url), link.url);
                }
                primaryLayerTargetAge            = input_age.GetRatio(primaryLayerTargets);
                primaryLayerInproperDistribution = L01_assertion.notRelevant;
                input_age = 0;

                if (layerModule.layers.Count > 1)
                {
                    secondaryLayerTargets = layerModule.layers[1].Count;
                    foreach (spiderLink link in layerModule.layers[1])
                    {
                        input_age += (iteration - link.iterationDiscovery);
                        L02_assertion.Add(imbWEMManager.index.pageIndexTable.GetPageAssertion(link.url), link.url);
                    }
                    secondaryLayerTargetAge = input_age.GetRatio(secondaryLayerTargets);
                    if (primaryLayerTargets > 0)
                    {
                        secondaryLayerInproperDistribution = L02_assertion.relevant;
                    }
                }

                input_age = 0;
                if (layerModule.layers.Count > 2)
                {
                    reserveLayerTargets = layerModule.layers[2].Count;
                    foreach (spiderLink link in layerModule.layers[2])
                    {
                        input_age += (iteration - link.iterationDiscovery);
                        L03_assertion.Add(imbWEMManager.index.pageIndexTable.GetPageAssertion(link.url), link.url);
                    }
                    reserveLayerTargetAge = input_age.GetRatio(reserveLayerTargets);

                    if (secondaryLayerTargets > 0)
                    {
                        reserveLayerInproperDistribution = L03_assertion.relevant;
                    }
                }
            }

            output = __output.Count;

            if (inputTargets > output)
            {
                Reduction = (inputTargets - output).GetRatio(inputTargets);
            }
            else
            {
                drain = output - inputTargets; //).GetRatio(inputTargets);
            }

            out_assertion = imbWEMManager.index.pageIndexTable.GetUrlAssertion(__output);
            out_assertion.performInfoGainEstimation();

            LTn = wRecord.context.GetNextIterationLTSize(__output);

            outCut_assertion = imbWEMManager.index.pageIndexTable.GetUrlAssertion(__output.Take(LTn));


            PotOutputIP = out_assertion.IPnominal;

            PotChangeIP = PotOutputIP - PotInputIP;

            // out_preRanking.AddRange(__output.active.Take(wRecord.tRecord.instance.settings.limitIterationNewLinks));

            outputPotentialPrecission = out_assertion.relevant;

            potentialPrecissionChange = outputPotentialPrecission - inputPotentialPrecission;
        }