예제 #1
0
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this);

            output.score = 0;
            if (link.originPage != null)
            {
                foreach (var pair in link.originPage.relationship.crossLinks)
                {
                    spiderPage crossPage = wRecord.web.webPages[pair.Value.targetHash]; //.GetPageByLink(pair.Value);

                    if (crossPage.relationship.outflowLinks.ContainsAsTarget(link.targetHash))
                    {
                        output.score += scoreUnit;
                    }
                }
            }
            else
            {
                throw new aceGeneralException("Link origin page not set!", null, link, "ruleActiveCrossLink->evaluate()");
            }


            return(output);
        }
예제 #2
0
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this);

            linknodeElement node = wRecord.linkHierarchy.GetByOriginalPath(link.url);

            if (node == null)
            {
                wRecord.log("Link not found in the hierarchy");
                return(output);
            }
            if (node.level == 0)
            {
                return(output);
            }

            int range = (max - min);

            if (range > 0)
            {
                double coeficient = ((double)node.level) / range;

                output.score = Convert.ToInt32(penaltyUnit * coeficient);
            }
            return(output);
        }
예제 #3
0
        /// <summary>
        /// Evaluates the specified link.
        /// </summary>
        /// <param name="link">The link.</param>
        /// <returns></returns>
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this);

            output.score = scoreUnit;


            return(output);
        }
예제 #4
0
        public void learn(ISpiderElement element)
        {
            spiderLink link = element as spiderLink;

            if (link != null)
            {
                Gt.Add(link.url, link);
            }
        }
예제 #5
0
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this);

            int depth = link.link.html.XPath.Replace("/", "\\").getPathParts().Count();


            double k = 1 - ((double)depth) / ((double)max);

            output.score = Convert.ToInt32(((double)scoreUnit) * k);
            return(output);
        }
예제 #6
0
        /// <summary>
        /// Takes information from page - called before evaluation
        /// </summary>
        /// <param name="page">The page.</param>
        public override void learn(spiderLink link)
        {
            //
            linknodeElement node = wRecord.linkHierarchy.GetByOriginalPath(link.url);

            if (node == null)
            {
                wRecord.log("Link not found in the hierarchy");
                return;
            }
            min = Math.Min(node.level, min);
            max = Math.Max(node.level, max);
        }
예제 #7
0
        /// <summary>
        /// Adds the specified link if not already known
        /// </summary>
        /// <param name="link">The link.</param>
        /// <returns></returns>
        public bool Add(spiderLink link)
        {
            string key = link.getTargetSignature(useCription);

            if (items.ContainsKey(key))
            {
                return(false);
            }
            else
            {
                items.Add(key, link);
                return(true);
            }
        }
예제 #8
0
        /// <summary>
        /// Evaluates the specified link.
        /// </summary>
        /// <param name="link">The link.</param>
        /// <returns></returns>
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this);

            if (link.link.pathAndQuery.isNullOrEmpty())
            {
                return(output);
            }

            bool score = false;

            List <string> words = link.link.pathAndQuery.getStringTokens();

            foreach (string wrd in words)
            {
                if (languageNames.Contains(wrd.ToLower()))
                {
                    output.comment = "url_found";
                    score          = true;
                    break;
                }
            }

            if (score == false)
            {
                if (!link.link.caption.isNullOrEmpty())
                {
                    words = link.link.caption.getStringTokens();

                    foreach (string wrd in words)
                    {
                        if (languageNames.Contains(wrd.ToLower()))
                        {
                            output.comment = "caption_found";
                            score          = true;
                            break;
                        }
                    }
                }
            }

            if (score)
            {
                output.score = scoreUnit;
            }

            return(output);
        }
        /// <summary>
        /// Evaluates the specified link.
        /// </summary>
        /// <param name="link">The link.</param>
        /// <returns></returns>
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this);
            bool penalty = false;

            if (link.link.path.isNullOrEmpty())
            {
                output.comment = "path null";
                return(output);
            }

            if (paths.Contains(link.link.path))
            {
                output.comment = "";
                penalty        = true;
            }
            else
            {
                paths.Add(link.link.path);
                output.score += scoreUnit;
            }



            if (link.link.pathDirectoryPath.isNullOrEmpty())
            {
                return(output);
            }

            if (folderPaths.Contains(link.link.pathDirectoryPath))
            {
                penalty = true;
            }
            else
            {
                folderPaths.Add(link.link.pathDirectoryPath);
                output.score += scoreUnit;
            }



            if (penalty)
            {
                output.score += penaltyUnit;
            }

            return(output);
        }
예제 #10
0
        /// <summary>
        /// Evaluates the specified link.
        /// </summary>
        /// <param name="link">The link.</param>
        /// <returns></returns>
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this);
            string cp = link.link.caption;

            if (cp.isNullOrEmpty())
            {
                output.comment = "caption is empty";
                return(output);
            }

            //if (cp.IsEmptyOrWhiteSpace())
            //{
            //    output.comment = "caption is whitespace";
            //    return output;
            //}

            if (cp.isNumber())
            {
                output.comment = "caption is numeric";
                return(output);
            }

            if (cp.isSymbolicContentOnly())
            {
                output.comment = "caption is symbolic content";
                return(output);
            }


            if (knownCaptions.Contains(cp))
            {
                output.score = penaltyUnit;
            }
            else
            {
                knownCaptions.Add(cp);
                output.score = scoreUnit;
            }

            return(output);
        }
예제 #11
0
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult result = new spiderEvalRuleResult(this);

            if (tree.Gd == null)
            {
                tree.buildGd();
            }

            if (tree.Gd == null)
            {
                result.layer = layer3ID;
                return(result);
            }

            linknodeElement linkNode = tree.GetLinkNode(link.url); //.Gd.sourceNodes[link.url];

            if (linkNode == null)
            {
                result.layer = layer3ID;
                return(result);
            }
            if (tree.bestNode == linkNode)
            {
                result.layer = layerID;
            }
            else if (tree.bestNode.items.Values.Contains(linkNode))
            {
                result.layer = layerID;
            }
            else if (tree.bestNode.items.Values.Any(x => x.items.Values.Contains(linkNode)))
            {
                result.layer = layer2ID;
            }
            else
            {
                result.layer = layer3ID;
            }

            return(result);
        }
예제 #12
0
        public spiderLink setSeedUrl(string rootUrl)
        {
            link lnk       = new link(rootUrl, linkProcessFlags.standard);
            Uri  __rootUrl = new Uri(rootUrl);

            crawledPage cpage = new crawledPage(ORIGIN_OF_ROOTURL, 0);

            spiderPage spage = new spiderPage(cpage, 0, 0);
            // webPages.Add(spage);
            spiderLink splink = new spiderLink(spage, lnk, 1);

            //splink.li = lnk;//allLinks.AddSpiderLink(lnk);
            seedLink           = splink;
            name               = rootUrl;
            splink.domain      = __rootUrl.Host;
            domain             = __rootUrl.Host;
            splink.link.domain = domain;
            //webLinks.Add(splink);
            //webTargets.Add(splink);
            return(splink);
        }
예제 #13
0
        /// <summary>
        /// Evaluates the specified link.
        /// </summary>
        /// <param name="link">The link.</param>
        /// <returns></returns>
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this);

            if (link.link.pathAndQuery.isNullOrEmpty())
            {
                output.comment = "no pathAndQuery";
                return(output);
            }

            List <string> words = link.link.pathAndQuery.getStringTokensMinLength();

            bool score = false;

            if (!words.Any())
            {
                output.comment = "no words in url";
                return(output);
            }

            foreach (string wrd in words)
            {
                if (language.isKnownWord(wrd))
                {
                    score = true;
                    break;
                }
            }

            if (score)
            {
                output.score = scoreUnit;
            }
            else
            {
                output.score = penaltyUnit;
            }

            return(output);
        }
예제 #14
0
        /// <summary>
        /// Evaluates the specified link.
        /// </summary>
        /// <param name="link">The link.</param>
        /// <returns></returns>
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderTarget target = wRecord.context.targets.GetOrCreateTarget(link, false, false);

            spiderEvalRuleResult output = new spiderEvalRuleResult(this);

            output.layer = layer2ID;
            foreach (IWeightTableTerm term in target.tokens)
            {
                if (needles.Contains(term.nominalForm))
                {
                    //if (needles.Contains("ru"))
                    //{
                    //    aceLog.log("Target [" + target.url + "] triggered by : " + needles.Join(","));
                    //}
                    output.layer = layerID;
                    break;
                }
            }

            return(output);
        }