Beispiel #1
0
        /// <summary>
        /// Evaluation procedure -- implementation for modules without layers
        /// </summary>
        /// <param name="input">The input.</param>
        /// <param name="wRecord">The w record.</param>
        /// <returns></returns>
        public override ISpiderModuleData evaluate(ISpiderModuleData input, modelSpiderSiteRecord wRecord)
        {
            List <spiderLink>             output  = new List <spiderLink>();
            spiderModuleData <spiderLink> outdata = new spiderModuleData <spiderLink>();

            moduleDLCRecord       moduleLevelReportTable    = ((spiderModuleData <spiderLink>)input).moduleDLC;
            moduleIterationRecord moduleDLCRecordTableEntry = ((spiderModuleData <spiderLink>)input).moduleDLCRecordTableEntry;

            if (imbWEMManager.settings.directReportEngine.DR_ReportModules)
            {
                //dataInput.moduleDLCRecordTableEntry = dataInput.moduleDLC.GetOrCreate(wRecord.iteration.ToString("D3") + module.name);
                moduleDLCRecordTableEntry.reportEvaluateStart(input as spiderModuleData <spiderLink>, wRecord, this); // <--- module level report --- start
            }

            input.active.ForEach(x => output.Add(x as spiderLink)); // ----- this is part where the layer modules are emulated

            if (imbWEMManager.settings.directReportEngine.DR_ReportModules)
            {
                moduleDLCRecordTableEntry.reportEvaluateEnd(output, wRecord, this);                                                             // <--- module level report --- start
            }
            outdata.active.AddRange(rankLinks(output, wRecord.iteration));


            if (imbWEMManager.settings.directReportEngine.DR_ReportModules)
            {
                moduleDLCRecordTableEntry.reportEvaluateAlterRanking(outdata.active, wRecord, this);                                                            // <--- module level report --- start
            }
            return(outdata);
        }
        public void reportEvaluateStart(spiderModuleData <spiderLink> input, modelSpiderSiteRecord wRecord, spiderModuleBase moduleInstance)
        {
            start     = DateTime.Now;
            iteration = wRecord.iteration;

            int cyclers_c     = 0;
            int recyclers_c   = 0;
            int cyclers_age_c = 0;
            int input_age     = 0;

            foreach (spiderLink link in input.active)
            {
                inputTargets_collection.Add(link.url);

                if (link.marks.cycleCount > 0)
                {
                    if (link.marks.cycleLastIteration == (iteration - 1))
                    {
                        cyclers_c++;
                        cyclers_age_c += iteration - link.iterationDiscovery;
                    }
                    else if (link.marks.cycleLastIteration < (iteration - 1))
                    {
                        recyclers_c++;
                    }
                }

                input_age += iteration - link.iterationDiscovery;
            }

            inputTargets = input.active.Count();
            processed    = inputTargets; // <-- razlika je samo u agregaciji


            age = input_age.GetRatio(inputTargets);


            inputTargets_assertion   = imbWEMManager.index.pageIndexTable.GetUrlAssertion(inputTargets_collection);
            inputPotentialPrecission = inputTargets_assertion.relevant;
            evaluationCertainty      = inputTargets_assertion.certainty;
            inputTargets_assertion.performInfoGainEstimation();

            PotInputIP = inputTargets_assertion.IPnominal;


            targets = inputTargets;

            layerModule = moduleInstance as spiderLayerModuleBase;

            cyclers   = cyclers_c.GetRatio(inputTargets);
            recyclers = recyclers_c.GetRatio(inputTargets);

            if (layerModule != null)
            {
                accumulation = layerModule.layers.CountAll;

                targets += accumulation;
            }
        }
        public static string GetInlineDescription(this spiderModuleData <spiderLink> data, string prefix = "input")
        {
            StringBuilder sb = new StringBuilder();

            if (data.isModuleGaveUp)
            {
                sb.AppendLine(" ---- module gave up ----- ");
            }

            sb.AppendLine(prefix + " a[" + data.active.Count().ToString("D4") + "] p[" + data.inactive.Count().ToString("D4") + "]  i[" + data.iteration + "]");



            return(sb.ToString());
        }
Beispiel #4
0
        /// <summary>
        /// Called after the all modules at end of FRA
        /// </summary>
        /// <param name="__wRecord">The w record.</param>
        /// <param name="entry">The entry.</param>
        /// <param name="output">The output.</param>
        /// <returns></returns>
        public frontierRankingAlgorithmIterationRecord reportEndOfFRA(modelSpiderSiteRecord __wRecord, frontierRankingAlgorithmIterationRecord entry, spiderModuleData <spiderLink> output)
        {
            entry.output = output.active.Count;

            if (entry.inputTargets > entry.output)
            {
                entry.accumulation = entry.inputTargets - entry.output;
            }
            else
            {
                entry.drain = entry.output - entry.inputTargets;
            }

            Dictionary <string, spiderLink> urls = new Dictionary <string, spiderLink>();


            foreach (var pair in output.active)
            {
                urls.Add(pair.url, pair);
            }

            var assertion = imbWEMManager.index.pageIndexTable.GetUrlAssertion(urls.Keys);

            entry.outputPotentialPrecission = assertion.relevant; ///[indexPageEvaluationEntryState.isRelevant].Count.GetRatio(assertion[indexPageEvaluationEntryState.haveEvaluationEntry].Count);

            assertion.performInfoGainEstimation(entry.PLleft);
            entry.PotOutputIP = assertion.IPnominal;

            entry.PotChangeIP = entry.PotOutputIP - entry.PotInputIP;

            entry.potentialPrecissionChange = entry.outputPotentialPrecission - entry.inputPotentialPrecission;



            entry.moduleUse = 0;

            foreach (var modPair in modRecords)
            {
                moduleIterationRecord moduleReport = modPair.Value.GetFirstWhere(nameof(moduleIterationRecord.iteration) + " = " + entry.iteration);
                if (moduleReport != null)
                {
                    entry.moduleUse++;

                    if (modPair.Key == typeof(languageModule).Name)
                    {
                        entry.accumulatedLanguage = moduleReport.accumulated;
                    }
                    else if (modPair.Key == typeof(structureModule).Name)
                    {
                        entry.accumulatedTemplate = moduleReport.accumulated;
                    }
                    else if (modPair.Key == typeof(templateModule).Name)
                    {
                        entry.accumulatedStructure = moduleReport.accumulated;
                    }
                }
            }



            entry.duration = DateTime.Now.Subtract(entry.start).TotalSeconds;


            generalRecords.AddOrUpdate(entry);

            return(entry);
        }
Beispiel #5
0
        /// <summary>
        /// Reports the start iteration: posle ekstrakcije, pre rangiranja
        /// </summary>
        /// <param name="currentIteration">The current iteration.</param>
        /// <param name="__wRecord">The w record.</param>
        /// <returns></returns>
        public frontierRankingAlgorithmIterationRecord reportStartOfFRA(int currentIteration, modelSpiderSiteRecord __wRecord, spiderModuleData <spiderLink> input)
        {
            var entry = generalRecords.GetOrCreate(crawlerName + currentIteration.ToString("D3"));

            entry.iteration = currentIteration;



            Dictionary <string, spiderLink> urls = new Dictionary <string, spiderLink>();
            int newUrls = 0;
            int oldUrls = 0;

            foreach (var pair in input.active)
            {
                urls.Add(pair.url, pair);
                if (pair.iterationDiscovery == currentIteration)
                {
                    newUrls++;
                }
                else
                {
                    oldUrls++;
                }
            }

            var assertion = imbWEMManager.index.pageIndexTable.GetUrlAssertion(urls.Keys);

            entry.PLleft = __wRecord.context.GetPageLoadsToLimit(__wRecord.tRecord.instance.settings.limitTotalPageLoad);

            entry.evaluationKnown     = assertion[indexPageEvaluationEntryState.haveEvaluationEntry].Count();
            entry.evaluationUnknown   = assertion[indexPageEvaluationEntryState.haveNoEvaluationEntry].Count() + assertion[indexPageEvaluationEntryState.notInTheIndex].Count();
            entry.evaluationCertainty = assertion.certainty;

            entry.inputTargets             = urls.Count;
            entry.newTargets               = newUrls;
            entry.oldTargets               = oldUrls;
            entry.inputPotentialPrecission = assertion.relevant;


            assertion.performInfoGainEstimation(entry.PLleft);
            entry.PotInputIP = assertion.IPnominal;


            return(entry);
        }
        public override spiderObjectiveSolutionSet operation_applyLinkRules(modelSpiderSiteRecord wRecord)
        {
            spiderModuleData <spiderLink> dataInput = new spiderModuleData <spiderLink>();

            dataInput.iteration = wRecord.iteration;
            dataInput.active.AddRange(wRecord.web.webActiveLinks);

            frontierRankingAlgorithmIterationRecord frontierReportEntry = null;


            if (imbWEMManager.settings.directReportEngine.DR_ReportModules)
            {
                frontierReportEntry = wRecord.frontierDLC.reportStartOfFRA(wRecord.iteration, wRecord, dataInput); // <----------------- reporting on module activity -- START
            }

            foreach (ISpiderModuleBase module in modules)
            {
                module.startIteration(wRecord.iteration, wRecord);
            }


            bool breakExecution = false;

            foreach (ISpiderModuleBase module in modules)
            {
                if (imbWEMManager.settings.directReportEngine.DR_ReportModules)
                {
                    dataInput.moduleDLC = wRecord.frontierDLC.modRecords[module.GetType().Name];
                    dataInput.moduleDLCRecordTableEntry = dataInput.moduleDLC.StartNewRecord(wRecord.iteration);
                }

                spiderModuleData <spiderLink> dataOutput = null;
                if (!breakExecution)
                {
                    dataOutput = module.evaluate(dataInput, wRecord) as spiderModuleData <spiderLink>;
                }

                //dataInput.moduleDLC.reportEvaluateAlterRanking(dataOutput.active, wRecord, dataInput.moduleDLCRecordTableEntry, module as spiderModuleBase);

                if (imbWEMManager.settings.directReportEngine.DR_ReportModules)
                {
                    dataInput.moduleDLC.AddOrUpdate(dataInput.moduleDLCRecordTableEntry);
                    dataInput.moduleDLCRecordTableEntry.disposeResources();
                }

                if (!breakExecution)
                {
                    dataInput = dataOutput.CreateNext();

                    if (dataInput.active.Count == 1)
                    {
                        wRecord.log("Module " + module.name + " returned single link instance -- skipping other modules");
                        breakExecution = true;
                    }
                }
            }

            if (imbWEMManager.settings.directReportEngine.DR_ReportModules)
            {
                frontierReportEntry = wRecord.frontierDLC.reportEndOfFRA(wRecord, frontierReportEntry, dataInput); // <--------------------------------------------- reporting on module activity -- END
            }
            wRecord.currentModuleData = dataInput;



            // <------------------ Objective control rules

            spiderObjectiveSolutionSet output = new spiderObjectiveSolutionSet();

            foreach (controlObjectiveRuleBase aRule in controlRules)
            {
                aRule.startIteration(wRecord.iteration, wRecord);
                output.listen(aRule.evaluate(wRecord));
            }



            return(output);
        }