/// <summary>
        /// E2: Applies passive link rules to new Active links
        /// </summary>
        /// <param name="wRecord">The s record.</param>
        public override spiderObjectiveSolutionSet operation_applyLinkRules(modelSpiderSiteRecord wRecord)
        {
            spiderObjectiveSolutionSet output = new spiderObjectiveSolutionSet();

            int c = 0;

            foreach (spiderLink sLink in Enumerable.Where(wRecord.web.webActiveLinks, x => !x.flags.HasFlag(spiderLinkFlags.passiveEvaluated)))
            {
                foreach (spiderEvalRuleForLinkBase rule in linkPassiveRules)
                {
                    sLink.marks.deploy(rule.evaluate(sLink));
                }
                c++;
                sLink.flags |= spiderLinkFlags.passiveEvaluated;
            }
            if (c > 0)
            {
                wRecord.log("Passive evaluation of [" + c + "] new links");
            }

            /// cleaning rule memory
            foreach (ruleActiveBase aRule in linkActiveRules)
            {
                aRule.startIteration(wRecord.iteration, wRecord);
            }

            /// perceiving current situation
            foreach (spiderLink sLink in wRecord.web.webActiveLinks)
            {
                sLink.linkAge++; // <---------------------------------------------------- adding link age points
                foreach (ruleActiveBase aRule in linkActiveRules)
                {
                    aRule.learn(sLink);
                }
            }

            /// apply update on results
            foreach (spiderLink sLink in wRecord.web.webActiveLinks)
            {
                foreach (ruleActiveBase aRule in linkActiveRules)
                {
                    sLink.marks.deploy(aRule.evaluate(sLink));
                }
                sLink.marks.calculate(wRecord.iteration);
            }


            // <----------------------------sorts the links
            wRecord.web.webActiveLinks.items.Sort((x, y) => y.marks.score.CompareTo(x.marks.score));


            foreach (controlObjectiveRuleBase aRule in controlRules)
            {
                aRule.startIteration(wRecord.iteration, wRecord);
                output.listen(aRule.evaluate(wRecord));
            }



            return(output);
        }
Exemplo n.º 2
0
        //public abstract spiderTask operation_GetLoadTask(modelSpiderSiteRecord wRecord);

        /// <summary>
        /// E3: Performes ranking, selects the next task and drops links below
        /// </summary>
        /// <param name="stResult">The st result.</param>
        /// <param name="wRecord">The s record.</param>
        public virtual List <spiderPage> operation_evaluatePages(modelSpiderSiteRecord wRecord)
        {
            pageScoreRules.prepare();
            List <spiderPage> output = new List <spiderPage>();

            foreach (spiderPage pg in wRecord.web.webPages.items.Values)
            {
                if (pg.webpage.status == pageStatus.loaded)
                {
                    foreach (spiderEvalRuleForPageBase ruleForPage in pageScoreRules)
                    {
                        ruleForPage.learn(pg);
                    }
                    output.Add(pg);
                }
            }

            //foreach (spiderEvalRuleForPageBase ruleForPage in pageScoreRules)
            //{
            //    ruleForPage.AppendDataFields(wRecord.stats);
            //}


            List <spiderPage> outputTwo = new List <spiderPage>();

            foreach (spiderPage pg in output)
            {
                foreach (spiderEvalRuleForPageBase ruleForPage in pageScoreRules)
                {
                    spiderEvalRuleResult ruleResult = ruleForPage.evaluate(pg);
                    pg.marks.deploy(ruleResult);
                }
                int score = pg.marks.calculate(wRecord.iteration);

                if (score > -1)
                {
                    outputTwo.Add(pg);
                }
            }


            // <---------------------------------------------------------------------------------------- Application of page control rules
            spiderObjectiveSolutionSet obSet = new spiderObjectiveSolutionSet();

            foreach (controlPageRuleBase aRule in controlPageRules)
            {
                aRule.startIteration(wRecord.iteration, wRecord);
                foreach (spiderPage pg in output)
                {
                    obSet.listen(aRule.evaluate(pg, wRecord));
                }
            }


            foreach (spiderPage page in obSet.links)
            {
                if (outputTwo.Count() > settings.primaryPageSetSize)
                {
                    outputTwo.Remove(page);
                }
                else
                {
                    break;
                }
            }
            // <-------------------------------------------------------------------------------------------------------------------------

            outputTwo.Sort((x, y) => x.marks.score.CompareTo(y.marks.score)); // <----------------------- sorts the pages after cut


            if (settings.flags.HasFlag(spiderEvaluatorExecutionFlags.doTrimPrimaryOutput)) // <------------------- does the final trim if it is turned on
            {
                int tkc = Math.Min(settings.primaryPageSetSize, outputTwo.Count());
                outputTwo = outputTwo.Take(tkc).ToList();
            }

            wRecord.resultPageSet = outputTwo; // <------------------------------------------------------ transfers the final set to the record

            foreach (spiderPage pg in outputTwo)
            {
                var pRecord = wRecord.children.GetRecord(pg.spiderResult.target);

                pRecord.recordFinish(wRecord.resultPageSet); // <---------------------------------------- calls record finish for page records
            }

            return(outputTwo);
        }
Exemplo n.º 3
0
        protected virtual void operation_doControlAndStats(modelSpiderSiteRecord wRecord)
        {
            var stats = wRecord.web.webActiveLinks.calculateTotalAndAvgScore();

            wRecord.timeseries[wRecord.iteration].avg_score_l = stats.Item2;
            //   wRecord.timeseries[wRecord.iteration].tc_detected_l = stats.Item1;


            // <---------------- Control rules
            spiderObjectiveSolutionSet output = new spiderObjectiveSolutionSet();

            /// cleaning rule memory
            foreach (controlLinkRuleBase aRule in controlLinkRules)
            {
                aRule.startIteration(wRecord.iteration, wRecord);
            }

            /// perceiving current situation
            foreach (spiderLink sLink in wRecord.web.webActiveLinks)
            {
                foreach (controlLinkRuleBase aRule in controlLinkRules)
                {
                    aRule.learn(sLink, wRecord);
                }
            }


            /// --------- TRIM BELOW ZERO ------------ ///
            if (settings.FRONTIER_PullDecayModes.HasFlag(spiderPullDecayModes.belowZeroScoreRemoval))
            {
                foreach (spiderLink sLink in wRecord.web.webActiveLinks.ToList())
                {
                    if (sLink.marks.score < 0)
                    {
                        wRecord.web.webActiveLinks.Remove(sLink);
                        wRecord.log("Link [" + sLink.url + "] had score below zero");
                    }
                }
            }

            /// apply update on results
            foreach (spiderLink sLink in wRecord.web.webActiveLinks)
            {
                foreach (controlLinkRuleBase aRule in controlLinkRules)
                {
                    output.listen(aRule.evaluate(sLink, wRecord));
                }
            }

            int removed = 0;

            foreach (var link in output.links)
            { // <--------------------------------------------------- removes any links found at control solution set
                wRecord.web.webActiveLinks.Remove(link);
                removed++;
            }


            if (removed > 0)
            {
                wRecord.log("Control rules removed: " + removed.ToString() + " links from active links collection");
            }


            //wRecord.logBuilder.log("Link drop-out:" + output.links.Count + ". Now have " + wRecord.web.webActiveLinks.Count() + " links waiting.");

            stats = wRecord.web.webActiveLinks.items.calculateTotalAndAvgScore();
            wRecord.timeseries[wRecord.iteration].avg_scoreADO_l = stats.Item2;
            wRecord.timeseries[wRecord.iteration].tc_scoreADO_l  = stats.Item1;
            wRecord.timeseries[wRecord.iteration].nw_ruledout_l  = output.links.Count;
        }
        public override spiderObjectiveSolutionSet operation_applyLinkRules(modelSpiderSiteRecord wRecord)
        {
            spiderModuleData <spiderLink> dataInput = new spiderModuleData <spiderLink>();

            dataInput.iteration = wRecord.iteration;
            dataInput.active.AddRange(wRecord.web.webActiveLinks);

            frontierRankingAlgorithmIterationRecord frontierReportEntry = null;


            if (imbWEMManager.settings.directReportEngine.DR_ReportModules)
            {
                frontierReportEntry = wRecord.frontierDLC.reportStartOfFRA(wRecord.iteration, wRecord, dataInput); // <----------------- reporting on module activity -- START
            }

            foreach (ISpiderModuleBase module in modules)
            {
                module.startIteration(wRecord.iteration, wRecord);
            }


            bool breakExecution = false;

            foreach (ISpiderModuleBase module in modules)
            {
                if (imbWEMManager.settings.directReportEngine.DR_ReportModules)
                {
                    dataInput.moduleDLC = wRecord.frontierDLC.modRecords[module.GetType().Name];
                    dataInput.moduleDLCRecordTableEntry = dataInput.moduleDLC.StartNewRecord(wRecord.iteration);
                }

                spiderModuleData <spiderLink> dataOutput = null;
                if (!breakExecution)
                {
                    dataOutput = module.evaluate(dataInput, wRecord) as spiderModuleData <spiderLink>;
                }

                //dataInput.moduleDLC.reportEvaluateAlterRanking(dataOutput.active, wRecord, dataInput.moduleDLCRecordTableEntry, module as spiderModuleBase);

                if (imbWEMManager.settings.directReportEngine.DR_ReportModules)
                {
                    dataInput.moduleDLC.AddOrUpdate(dataInput.moduleDLCRecordTableEntry);
                    dataInput.moduleDLCRecordTableEntry.disposeResources();
                }

                if (!breakExecution)
                {
                    dataInput = dataOutput.CreateNext();

                    if (dataInput.active.Count == 1)
                    {
                        wRecord.log("Module " + module.name + " returned single link instance -- skipping other modules");
                        breakExecution = true;
                    }
                }
            }

            if (imbWEMManager.settings.directReportEngine.DR_ReportModules)
            {
                frontierReportEntry = wRecord.frontierDLC.reportEndOfFRA(wRecord, frontierReportEntry, dataInput); // <--------------------------------------------- reporting on module activity -- END
            }
            wRecord.currentModuleData = dataInput;



            // <------------------ Objective control rules

            spiderObjectiveSolutionSet output = new spiderObjectiveSolutionSet();

            foreach (controlObjectiveRuleBase aRule in controlRules)
            {
                aRule.startIteration(wRecord.iteration, wRecord);
                output.listen(aRule.evaluate(wRecord));
            }



            return(output);
        }