Esempio n. 1
0
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this);

            output.score = 0;
            if (link.originPage != null)
            {
                foreach (var pair in link.originPage.relationship.crossLinks)
                {
                    spiderPage crossPage = wRecord.web.webPages[pair.Value.targetHash]; //.GetPageByLink(pair.Value);

                    if (crossPage.relationship.outflowLinks.ContainsAsTarget(link.targetHash))
                    {
                        output.score += scoreUnit;
                    }
                }
            }
            else
            {
                throw new aceGeneralException("Link origin page not set!", null, link, "ruleActiveCrossLink->evaluate()");
            }


            return(output);
        }
Esempio n. 2
0
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderTarget target = wRecord.context.targets.GetOrCreateTarget(link, false, false);

            spiderEvalRuleResult output = new spiderEvalRuleResult(this);

            double weight = 0;

            foreach (IWeightTableTerm term in target.tokens)
            {
                if (language.isKnownWord(term.nominalForm))
                {
                    weight += target.tokens.GetTF_IDF(term);
                }
                else
                {
                    weight -= target.tokens.GetTF_IDF(term);
                }
            }

            if (weight > 0)
            {
                output.layer = layerID;
            }
            else
            {
                output.layer = layer2ID;
            }
            output.weightScore = weight;

            return(output);
        }
Esempio n. 3
0
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this);

            linknodeElement node = wRecord.linkHierarchy.GetByOriginalPath(link.url);

            if (node == null)
            {
                wRecord.log("Link not found in the hierarchy");
                return(output);
            }
            if (node.level == 0)
            {
                return(output);
            }

            int range = (max - min);

            if (range > 0)
            {
                double coeficient = ((double)node.level) / range;

                output.score = Convert.ToInt32(penaltyUnit * coeficient);
            }
            return(output);
        }
Esempio n. 4
0
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this);

            if (scoreList.Count() > 0)
            {
                return(null);
            }
            if (q1 == int.MinValue)
            {
                double __q1;
                double __q3;
                Measures.Quartiles(scoreList.ToArray(), out __q1, out __q3, true);
                q1 = Convert.ToInt32(__q1);
                q3 = Convert.ToInt32(__q3);
            }

            if (link.countOnTheDomain > q3)
            {
                output.score = scoreUnit;
            }
            else if (link.countOnTheDomain <= q1)
            {
                output.score = penaltyUnit;
            }

            return(output);
        }
Esempio n. 5
0
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this);

            output.score = 0;

            double          coeficient = 0;
            linknodeElement node       = wRecord.linkHierarchy.GetByOriginalPath(link.link.originalUrl);

            if (node == null)
            {
                return(output);
            }
            if (node.level == 0)
            {
                return(output);
            }
            if (rootScore > 0)
            {
                coeficient   = ((double)node.score) / ((double)rootScore);
                output.score = Convert.ToInt32(coeficient * scoreUnit);
            }

            return(output);
        }
Esempio n. 6
0
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult result = new spiderEvalRuleResult(this);
            spiderTarget         target = wRecord.context.targets.GetOrCreateTarget(link, false, false);

            termQueryDocument query = target.getQuery(expansionSteps, wRecord.logBuilder);


            wRecord.logBuilder.AppendLine("Target [" + link.url + "] query => [" + query.GetAllTermString().toCsvInLine(",") + "]");

            weightTableMatchCollection <termSpark, termSpark> matchLinks = query.GetSparkMatchAgainst <termSpark>((termDocument)wRecord.context.targets.dlTargetLinkTokens.AggregateDocument);
            weightTableMatchCollection <termSpark, termSpark> matchPage  = query.GetSparkMatchAgainst <termSpark>((termDocument)wRecord.context.targets.dlTargetPageTokens.AggregateDocument);

            if ((!matchLinks.Any()) && (!matchPage.Any()))
            {
                result.score = scoreUnit;
                wRecord.logBuilder.AppendLine("D[" + link.url + "][" + target.tokens.GetAllTermString().toCsvInLine(",") + "] = no matches with query");
                return(result);
            }
            else
            {
                wRecord.logBuilder.AppendLine("matchLinks => " + matchLinks.ToString());

                wRecord.logBuilder.AppendLine("matchPage => " + matchPage.ToString());
            }


            double pLSim = matchLinks.GetSemanticSimilarity() * target_sd;
            double pPSim = matchPage.GetSemanticSimilarity() * page_sd;

            double sim = (pLSim + pPSim);

            double sc = sim * (double)scoreUnit;

            double score = ((double)scoreUnit) - sc;

            if (doAdjustScoreByLanguageDetection)
            {
                // < ---- modification of diversity score
                List <string> tkns = new List <string>();
                foreach (IWeightTableTerm spark in query)
                {
                    tkns.Add(spark.nominalForm);
                }
                textEvaluation evaluation = new textEvaluation(wRecord.aJob.langTextEvaluator, null);
                evaluation.evaluateTokens(tkns, null, false);

                double evalAdj = Math.Pow(evaluation.ratioA, 2);
                result.score = Convert.ToInt32((double)score * evalAdj);  //Convert.ToInt32(sim_inv * (Double) scoreUnit);
                wRecord.logBuilder.AppendLine();
                wRecord.logBuilder.AppendLine("Score is adjusted by language evaluation ratioA ^ 2: " + evalAdj);
            }

            wRecord.logBuilder.AppendLine("D[" + link.url + "][" + target.tokens.GetAllTermString().toCsvInLine(",") + "]=[pL:" + pLSim.ToString("P2") + "][pP:" + pPSim.ToString("P2") + "]=" + sim.ToString("#0.0000") + " (" + result.score + ")");


            return(result);
        }
Esempio n. 7
0
        /// <summary>
        /// Evaluates the specified link.
        /// </summary>
        /// <param name="link">The link.</param>
        /// <returns></returns>
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this);

            output.score = scoreUnit;


            return(output);
        }
Esempio n. 8
0
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this);


            output.score = rnd.Next(penaltyUnit, scoreUnit);


            return(output);
        }
Esempio n. 9
0
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this);

            int depth = link.link.html.XPath.Replace("/", "\\").getPathParts().Count();


            double k = 1 - ((double)depth) / ((double)max);

            output.score = Convert.ToInt32(((double)scoreUnit) * k);
            return(output);
        }
Esempio n. 10
0
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this);

            spiderTarget target = wRecord.context.targets.GetOrCreateTarget(link, false, false);


            double score = hits[target] * scoreUnit;

            output.score = (int)score;

            return(output);
        }
Esempio n. 11
0
        /// <summary>
        /// Evaluates the specified link.
        /// </summary>
        /// <param name="link">The link.</param>
        /// <returns></returns>
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this);

            if (link.link.pathAndQuery.isNullOrEmpty())
            {
                return(output);
            }

            bool score = false;

            List <string> words = link.link.pathAndQuery.getStringTokens();

            foreach (string wrd in words)
            {
                if (languageNames.Contains(wrd.ToLower()))
                {
                    output.comment = "url_found";
                    score          = true;
                    break;
                }
            }

            if (score == false)
            {
                if (!link.link.caption.isNullOrEmpty())
                {
                    words = link.link.caption.getStringTokens();

                    foreach (string wrd in words)
                    {
                        if (languageNames.Contains(wrd.ToLower()))
                        {
                            output.comment = "caption_found";
                            score          = true;
                            break;
                        }
                    }
                }
            }

            if (score)
            {
                output.score = scoreUnit;
            }

            return(output);
        }
        /// <summary>
        /// Evaluates the specified link.
        /// </summary>
        /// <param name="link">The link.</param>
        /// <returns></returns>
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this);
            bool penalty = false;

            if (link.link.path.isNullOrEmpty())
            {
                output.comment = "path null";
                return(output);
            }

            if (paths.Contains(link.link.path))
            {
                output.comment = "";
                penalty        = true;
            }
            else
            {
                paths.Add(link.link.path);
                output.score += scoreUnit;
            }



            if (link.link.pathDirectoryPath.isNullOrEmpty())
            {
                return(output);
            }

            if (folderPaths.Contains(link.link.pathDirectoryPath))
            {
                penalty = true;
            }
            else
            {
                folderPaths.Add(link.link.pathDirectoryPath);
                output.score += scoreUnit;
            }



            if (penalty)
            {
                output.score += penaltyUnit;
            }

            return(output);
        }
Esempio n. 13
0
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult result = new spiderEvalRuleResult(this);

            if (tree.Gd == null)
            {
                tree.buildGd();
            }
            int max = tree.bestNode.score;

            linknodeElement linkNode = tree.Gd.sourceNodes[link.url];

            double score = ((double)scoreUnit) * ((double)linkNode.score / ((double)max));

            result.score = Convert.ToInt32(score);

            return(result);
        }
Esempio n. 14
0
        /// <summary>
        /// Evaluates the specified link.
        /// </summary>
        /// <param name="link">The link.</param>
        /// <returns></returns>
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this);
            string cp = link.link.caption;

            if (cp.isNullOrEmpty())
            {
                output.comment = "caption is empty";
                return(output);
            }

            //if (cp.IsEmptyOrWhiteSpace())
            //{
            //    output.comment = "caption is whitespace";
            //    return output;
            //}

            if (cp.isNumber())
            {
                output.comment = "caption is numeric";
                return(output);
            }

            if (cp.isSymbolicContentOnly())
            {
                output.comment = "caption is symbolic content";
                return(output);
            }


            if (knownCaptions.Contains(cp))
            {
                output.score = penaltyUnit;
            }
            else
            {
                knownCaptions.Add(cp);
                output.score = scoreUnit;
            }

            return(output);
        }
Esempio n. 15
0
        /// <summary>
        /// Evaluates the specified link.
        /// </summary>
        /// <param name="page">The page.</param>
        /// <returns></returns>
        public override spiderEvalRuleResult evaluate(spiderPage page)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this, spiderEvalRuleResultEnum.passive);


            int score = page.relationship.inflowLinks.Count;

            if (maxInboundLinks > 0)
            {
                decimal ratio = Convert.ToDecimal(score) / Convert.ToDecimal(maxInboundLinks);

                decimal result = scoreUnit * ratio;

                output.comment = ratio.ToString("P2");

                output.score = Convert.ToInt32(result);
            }

            return(output);
        }
Esempio n. 16
0
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult result = new spiderEvalRuleResult(this);

            if (tree.Gd == null)
            {
                tree.buildGd();
            }

            if (tree.Gd == null)
            {
                result.layer = layer3ID;
                return(result);
            }

            linknodeElement linkNode = tree.GetLinkNode(link.url); //.Gd.sourceNodes[link.url];

            if (linkNode == null)
            {
                result.layer = layer3ID;
                return(result);
            }
            if (tree.bestNode == linkNode)
            {
                result.layer = layerID;
            }
            else if (tree.bestNode.items.Values.Contains(linkNode))
            {
                result.layer = layerID;
            }
            else if (tree.bestNode.items.Values.Any(x => x.items.Values.Contains(linkNode)))
            {
                result.layer = layer2ID;
            }
            else
            {
                result.layer = layer3ID;
            }

            return(result);
        }
        /// <summary>
        /// Evaluates the specified link.
        /// </summary>
        /// <param name="page">The page.</param>
        /// <returns></returns>
        public override spiderEvalRuleResult evaluate(spiderPage page)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this, spiderEvalRuleResultEnum.passive);

            bool score = false;


            List <string> words = page.webpage.pageCaption.getStringTokensMinLength();

            foreach (string wrd in words)
            {
                if (titleWords[wrd] == 1)
                {
                    output.comment = " [" + wrd + "]:IsUni ";

                    if (language.isKnownWord(wrd))
                    {
                        output.comment += "& IsKnown";
                        output.score    = scoreUnit;
                        return(output);
                    }
                    else
                    {
                    }
                }
                else
                {
                }
            }

            if (score)
            {
                // output.score = scoreUnit;
            }
            else
            {
                output.score = penaltyUnit;
            }

            return(output);
        }
Esempio n. 18
0
        /// <summary>
        /// Evaluates the specified link.
        /// </summary>
        /// <param name="link">The link.</param>
        /// <returns></returns>
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this);

            if (link.link.pathAndQuery.isNullOrEmpty())
            {
                output.comment = "no pathAndQuery";
                return(output);
            }

            List <string> words = link.link.pathAndQuery.getStringTokensMinLength();

            bool score = false;

            if (!words.Any())
            {
                output.comment = "no words in url";
                return(output);
            }

            foreach (string wrd in words)
            {
                if (language.isKnownWord(wrd))
                {
                    score = true;
                    break;
                }
            }

            if (score)
            {
                output.score = scoreUnit;
            }
            else
            {
                output.score = penaltyUnit;
            }

            return(output);
        }
Esempio n. 19
0
        /// <summary>
        /// Evaluates the specified link.
        /// </summary>
        /// <param name="page">The page.</param>
        /// <returns></returns>
        public override spiderEvalRuleResult evaluate(spiderPage page)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this, spiderEvalRuleResultEnum.active);

            if (page.spiderResult != null)
            {
                if (linkScoreMax > 0)
                {
                    int score = page.spiderResult.target.marks.score;

                    decimal ratio = Convert.ToDecimal(score) / Convert.ToDecimal(linkScoreMax);

                    decimal result = scoreUnit * ratio;

                    output.comment = ratio.ToString("P2");

                    output.score = Convert.ToInt32(result);
                }
            }

            return(output);
        }
Esempio n. 20
0
        /// <summary>
        /// Evaluates the specified link.
        /// </summary>
        /// <param name="link">The link.</param>
        /// <returns></returns>
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderTarget target = wRecord.context.targets.GetOrCreateTarget(link, false, false);

            spiderEvalRuleResult output = new spiderEvalRuleResult(this);

            output.layer = layer2ID;
            foreach (IWeightTableTerm term in target.tokens)
            {
                if (needles.Contains(term.nominalForm))
                {
                    //if (needles.Contains("ru"))
                    //{
                    //    aceLog.log("Target [" + target.url + "] triggered by : " + needles.Join(","));
                    //}
                    output.layer = layerID;
                    break;
                }
            }

            return(output);
        }
Esempio n. 21
0
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderTarget target = wRecord.context.targets.GetOrCreateTarget(link, false, false);

            spiderEvalRuleResult output = new spiderEvalRuleResult(this);

            double weight = 0;

            foreach (IWeightTableTerm term in target.tokens)
            {
                var response = semanticLexiconManager.lexiconCache.getLexiconItems(term.nominalForm, wRecord.logBuilder);
                switch (response.type)
                {
                case lexiconResponse.responseType.cachedLexicon:
                case lexiconResponse.responseType.lexicon:
                case lexiconResponse.responseType.askingLexiconContext:

                    weight += target.tokens.GetTF_IDF(term);
                    break;

                default:
                    weight -= target.tokens.GetTF_IDF(term);
                    break;
                }
            }

            if (weight > 0)
            {
                output.layer = layerID;
            }
            else
            {
                output.layer = layer2ID;
            }
            output.weightScore = weight;

            return(output);
        }
Esempio n. 22
0
        /// <summary>
        /// Evaluates the specified link.
        /// </summary>
        /// <param name="page">The page.</param>
        /// <returns></returns>
        public override spiderEvalRuleResult evaluate(spiderPage page)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this);

            int score = page.relationship.crossLinks.Count();

            if (maxCrosslinkScore > 0)
            {
                decimal ratio = Convert.ToDecimal(score) / Convert.ToDecimal(maxCrosslinkScore);

                decimal result = scoreUnit * ratio;

                output.comment = ratio.ToString("P2");

                output.score = Convert.ToInt32(result);
            }
            else
            {
                output.comment = " [maxCLS=0] ";
            }

            return(output);
        }
Esempio n. 23
0
        /// <summary>
        /// Evaluates the specified link.
        /// </summary>
        /// <param name="link">The link.</param>
        /// <returns></returns>
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            try
            {
                spiderEvalRuleResult result = new spiderEvalRuleResult(this);
                spiderTarget         target = wRecord.context.targets.GetByOrigin(link);

                if (target == null)
                {
                    result.layer = layer2ID;
                    return(result);
                }

                nodeBlock bl = target.contentBlocks.GetBlockByXPath(link.link.xPath);

                if (bl == null)
                {
                    result.layer = layer2ID;
                    return(result);
                }

                if (bl.role == semanticRole)
                {
                    result.layer = layerID;
                }
                else
                {
                    result.layer = layer2ID;
                }

                return(result);
            } catch (Exception ex)
            {
                throw new aceGeneralException(ex.Message, ex, this, "layerBlockRolePRule broken");
            }
            return(new spiderEvalRuleResult(this));
        }
Esempio n. 24
0
        /// <summary>
        /// Evaluates the specified link.
        /// </summary>
        /// <param name="page">The page.</param>
        /// <returns></returns>
        public override spiderEvalRuleResult evaluate(spiderPage page)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this, mode);

            bool score = false;

            output.comment = " freq:[" + pageTitleCount[page.webpage.pageCaption] + "] ";

            if (pageTitleCount[page.webpage.pageCaption] == 1)
            {
                score = true;
            }

            if (score)
            {
                output.score = scoreUnit;
            }
            else
            {
                output.score = penaltyUnit;
            }

            return(output);
        }
Esempio n. 25
0
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this);

            spiderTarget target = wRecord.context.targets.GetOrCreateTarget(link, false, false);

            if (!ranks.Any())
            {
                output.score = scoreUnit;
                return(output);
            }

            if (ranks.ContainsKey(target))
            {
                output.score = ranks[target];
            }
            else
            {
                output.score = penaltyUnit;
            }


            return(output);
        }
Esempio n. 26
0
        /// <summary>
        /// Ranks the links.
        /// </summary>
        /// <param name="input">The input.</param>
        /// <returns></returns>
        public List <spiderLink> rankLinks(List <spiderLink> input, int iteration)
        {
            List <spiderLink> output = new List <spiderLink>();

            foreach (spiderLink link in output)
            {
                foreach (layerDistributionActiveRuleBase activeRule in rankingTargetActiveRules)
                {
                    activeRule.learn(link);
                }
            }

            foreach (spiderLink link in input)
            {
                foreach (spiderEvalRuleForLinkBase passiveRule in rankingTargetPassiveRules)
                {
                    spiderEvalRuleResult lres = link.marks[passiveRule];
                    if (lres == null)
                    {
                        link.marks.deploy(passiveRule.evaluate(link));
                    }
                }

                foreach (IRuleForTarget rule in rankingTargetActiveRules)
                {
                    link.marks.deploy(rule.evaluate(link));
                }

                link.marks.calculate(iteration);

                output.Add(link);
            }

            output.Sort((x, y) => y.marks.score.CompareTo(x.marks.score));
            return(output);
        }
Esempio n. 27
0
        /// <summary>
        /// Evaluates the specified link.
        /// </summary>
        /// <param name="link">The link.</param>
        /// <returns></returns>
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this);

            string cp = link.link.caption;

            if (cp.isNullOrEmpty())
            {
                output.comment = "caption is empty";
                return(output);
            }

            //if (cp.isEmptyOrWhiteSpace())
            //{
            //    output.comment = "caption is whitespace";
            //    return output;
            //}

            if (cp.isNumber())
            {
                output.comment = "caption is numeric";
                return(output);
            }

            if (cp.isSymbolicContentOnly())
            {
                output.comment = "caption is symbolic content";
                return(output);
            }


            List <string> words = cp.getStringTokens();

            bool score   = false;
            bool penalty = false;

            output.comment = "lang_ok:";

            foreach (string wrd in words)
            {
                if (wrd.isCleanWord())
                {
                    if (language.isKnownWord(wrd))
                    {
                        output.comment += wrd + ";";
                        score           = true;
                    }
                    else
                    {
                        output.comment = wrd + " is not lang_ok";
                        score          = false;
                        penalty        = true;
                        break;
                    }
                }
            }

            if (score)
            {
                output.score = scoreUnit;
            }

            if (penalty)
            {
                output.score = penaltyUnit;
            }

            return(output);
        }
Esempio n. 28
0
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this);

            string cp = link.link.caption;

            if (cp.isNullOrEmpty())
            {
                output.comment = "caption is empty";
                return(output);
            }

            //if (cp.IsEmptyOrWhiteSpace())
            //{
            //    output.comment = "caption is whitespace";
            //    return output;
            //}

            if (cp.isNumber())
            {
                output.comment = "caption is numeric";
                return(output);
            }

            if (cp.isSymbolicContentOnly())
            {
                output.comment = "caption is symbolic content";
                return(output);
            }



            List <string> words = link.link.caption.getStringTokens();
            string        url   = link.link.pathAndQuery;


            //url = url.Replace(link.link.domain, "");

            bool score       = false;
            bool doubleScore = true;


            output.comment = "url[" + url + "]";

            foreach (string wrd in words)
            {
                if (url.Contains(wrd))
                {
                    output.comment += "[" + wrd + "]ok ";
                    score           = true;
                }
                else
                {
                    output.comment += "[" + wrd + "]no ";
                    doubleScore     = false;
                }
            }
            if (score)
            {
                output.score = scoreUnit;

                if (doubleScore)
                {
                    output.score += scoreUnit;
                }
            }
            else
            {
                output.score = penaltyUnit;
            }
            return(output);
        }
Esempio n. 29
0
        /// <summary>
        /// Evaluates the specified input with links
        /// </summary>
        /// <param name="input">The input.</param>
        /// <returns></returns>
        public spiderModuleData <spiderLink> evaluate(spiderModuleData <spiderLink> input, modelSpiderSiteRecord wRecord)
        {
            // wRecord.lastInput = input.Clone();

            //  moduleDLCRecord moduleLevelReportTable = wRecord.frontierDLC[this.name];

            //     moduleIterationRecord moduleDLCRecordTableEntry = null;

            moduleIterationRecord moduleDLCRecordTableEntry = input.moduleDLCRecordTableEntry;

            if (imbWEMManager.settings.directReportEngine.DR_ReportModules)
            {
                moduleDLCRecordTableEntry.reportEvaluateStart(input, wRecord, this);
            }

            wRecord.logBuilder.Append(input.GetInlineDescription("Input (" + name + ") "));

            spiderModuleData <spiderLink> output = new spiderModuleData <spiderLink>();

            output.inactive.AddRange(input.inactive);

            List <spiderLink> rest = new List <spiderLink>();

            // < --- starting iteration for the layer rules
            foreach (layerDistributionActiveRuleBase aRule in layerActiveRules)
            {
                aRule.startIteration(wRecord.iteration, wRecord);
            }


            // << ---- learning about the links
            foreach (spiderLink link in input.active)
            {
                foreach (layerDistributionActiveRuleBase activeRule in layerActiveRules)
                {
                    activeRule.learn(link);
                }
            }

            // <<----- evaluation of the links
            foreach (spiderLink link in input.active)
            {
                bool                      assigned   = false;
                layerCollection           assignedTo = null;
                layerDistributionRuleBase layerRule  = null;
                foreach (layerDistributionPassiveRuleBase passiveRule in layerPassiveRules)
                {
                    spiderEvalRuleResult lres = link.marks[passiveRule];
                    if (lres == null)
                    {
                        lres = passiveRule.evaluate(link);
                        link.marks.deploy(lres);
                    }

                    if (lres.layer > -1)
                    {
                        assignedTo = layers[lres.layer];
                        assigned   = true;
                        layers[lres.layer].Push <spiderLink>(link);
                        layerRule = passiveRule;
                        break;
                    }
                }
                if (!assigned)
                {
                    foreach (layerDistributionActiveRuleBase activeRule in layerActiveRules)
                    {
                        spiderEvalRuleResult lres = activeRule.evaluate(link);
                        link.marks.deploy(lres);

                        if (lres.layer > -1)
                        {
                            assignedTo = layers[lres.layer];
                            assigned   = true;
                            layers[lres.layer].Push <spiderLink>(link);
                            layerRule = activeRule;
                            break;
                        }
                    }
                }

                if (!assigned)
                {
                    rest.Add(link);
                }
                else
                {
                    wRecord.logBuilder.AppendLine("Link [" + link.url + "] => " + assignedTo.name + "(" + assignedTo.Count + ") [" + layerRule.tagName + "]");
                }
            }

            switch (restPolicy)
            {
            case spiderLayerModuleEvaluationRestPolicy.assignToTheInactive:
                output.inactive.AddRange(rest);
                break;

            case spiderLayerModuleEvaluationRestPolicy.assignToTheDeepestLayer:
                output.inactive.AddRange(layers.Deepest.Push <spiderLink>(rest));
                break;
            }


            //  wRecord.logBuilder.Append(layers.GetInlineDescription());



            List <spiderLink> result = layers.Pull <spiderLink>(pullLimit, doTakeFromLower);

            if (imbWEMManager.settings.directReportEngine.DR_ReportModules)
            {
                moduleDLCRecordTableEntry.reportEvaluateEnd(result, wRecord, this);
                //  input.moduleDLC.reportEvaluateEnd(result, wRecord, moduleDLCRecordTableEntry, this);
            }

            wRecord.logBuilder.AppendLine("Module output => layers[" + layers.layer_id + "].Pull(" + pullLimit + ", " + doTakeFromLower + ") => " + result.Count);

            // <<----- ranking
            result = rankLinks(result, wRecord.iteration);

            if (imbWEMManager.settings.directReportEngine.DR_ReportModules)
            {
                moduleDLCRecordTableEntry.reportEvaluateAlterRanking(result, wRecord, this);
                //moduleLevelReportTable.reportEvaluateAlterRanking(result, wRecord, moduleDLCRecordTableEntry, this); // ------ module level report -- after ranking
                // moduleLevelReportTable.AddOrUpdate(moduleDLCRecordTableEntry);
            }

            if (result.Any())
            {
                output.active.AddRange(result);
                output.isModuleGaveUp = false;
            }
            else
            {
                output.active.AddRange(input.active);
                output.isModuleGaveUp = true;
            }

            wRecord.logBuilder.Append(output.GetInlineDescription("Output"));



            return(output);
        }
Esempio n. 30
0
        //public abstract spiderTask operation_GetLoadTask(modelSpiderSiteRecord wRecord);

        /// <summary>
        /// E3: Performes ranking, selects the next task and drops links below
        /// </summary>
        /// <param name="stResult">The st result.</param>
        /// <param name="wRecord">The s record.</param>
        public virtual List <spiderPage> operation_evaluatePages(modelSpiderSiteRecord wRecord)
        {
            pageScoreRules.prepare();
            List <spiderPage> output = new List <spiderPage>();

            foreach (spiderPage pg in wRecord.web.webPages.items.Values)
            {
                if (pg.webpage.status == pageStatus.loaded)
                {
                    foreach (spiderEvalRuleForPageBase ruleForPage in pageScoreRules)
                    {
                        ruleForPage.learn(pg);
                    }
                    output.Add(pg);
                }
            }

            //foreach (spiderEvalRuleForPageBase ruleForPage in pageScoreRules)
            //{
            //    ruleForPage.AppendDataFields(wRecord.stats);
            //}


            List <spiderPage> outputTwo = new List <spiderPage>();

            foreach (spiderPage pg in output)
            {
                foreach (spiderEvalRuleForPageBase ruleForPage in pageScoreRules)
                {
                    spiderEvalRuleResult ruleResult = ruleForPage.evaluate(pg);
                    pg.marks.deploy(ruleResult);
                }
                int score = pg.marks.calculate(wRecord.iteration);

                if (score > -1)
                {
                    outputTwo.Add(pg);
                }
            }


            // <---------------------------------------------------------------------------------------- Application of page control rules
            spiderObjectiveSolutionSet obSet = new spiderObjectiveSolutionSet();

            foreach (controlPageRuleBase aRule in controlPageRules)
            {
                aRule.startIteration(wRecord.iteration, wRecord);
                foreach (spiderPage pg in output)
                {
                    obSet.listen(aRule.evaluate(pg, wRecord));
                }
            }


            foreach (spiderPage page in obSet.links)
            {
                if (outputTwo.Count() > settings.primaryPageSetSize)
                {
                    outputTwo.Remove(page);
                }
                else
                {
                    break;
                }
            }
            // <-------------------------------------------------------------------------------------------------------------------------

            outputTwo.Sort((x, y) => x.marks.score.CompareTo(y.marks.score)); // <----------------------- sorts the pages after cut


            if (settings.flags.HasFlag(spiderEvaluatorExecutionFlags.doTrimPrimaryOutput)) // <------------------- does the final trim if it is turned on
            {
                int tkc = Math.Min(settings.primaryPageSetSize, outputTwo.Count());
                outputTwo = outputTwo.Take(tkc).ToList();
            }

            wRecord.resultPageSet = outputTwo; // <------------------------------------------------------ transfers the final set to the record

            foreach (spiderPage pg in outputTwo)
            {
                var pRecord = wRecord.children.GetRecord(pg.spiderResult.target);

                pRecord.recordFinish(wRecord.resultPageSet); // <---------------------------------------- calls record finish for page records
            }

            return(outputTwo);
        }