Пример #1
0
        public override spiderEvalRuleResult evaluate(spiderLink link)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this);

            output.score = 0;
            if (link.originPage != null)
            {
                foreach (var pair in link.originPage.relationship.crossLinks)
                {
                    spiderPage crossPage = wRecord.web.webPages[pair.Value.targetHash]; //.GetPageByLink(pair.Value);

                    if (crossPage.relationship.outflowLinks.ContainsAsTarget(link.targetHash))
                    {
                        output.score += scoreUnit;
                    }
                }
            }
            else
            {
                throw new aceGeneralException("Link origin page not set!", null, link, "ruleActiveCrossLink->evaluate()");
            }


            return(output);
        }
Пример #2
0
        /// <summary>
        /// Adds the specified link if not already known
        /// </summary>
        /// <param name="link">The link.</param>
        /// <returns></returns>
        public bool Add(spiderPage page)
        {
            string key = page.getPageSignature(useCription);

            if (items.ContainsKey(key))
            {
                if (items[key].webpage.isCrawled == false)
                {
                    if (page.webpage.isCrawled)
                    {
                        items[key] = page;
                        imbWEMManager.log.log("Temp. spiderPage [" + items[key].name + "] replaced by new instance for domain [" + page.webpage.domain + "]");
                    }
                    else
                    {
                        aceLog.log("---- This page is not crawled yet and it cannot substitute registrated one ----");
                        // throw new aceGeneralException("This page is not crawled yet and it cannot substitute registrated one", null, this, "New page under same key is not crawled");
                    }
                }
                else
                {
                    aceLog.log("---- shouldn't replace existing page if it is crawled yet ----");
                    //throw new aceGeneralException("You shouldn't replace existing page if it is crawled yet", null, this, "Page under same key is crawled");
                }


                return(false);
            }
            else
            {
                items.Add(key, page);
                hashList.Add(key);
                return(true);
            }
        }
Пример #3
0
        public override spiderObjectiveSolution evaluate(spiderPage element, modelSpiderSiteRecord sRecord, params object[] resources)
        {
            spiderObjectiveSolution sol = new spiderObjectiveSolution();

            if (max < treshold)
            {
                return(null);
            }

            if (element.relationship.crossLinks.Count() > treshold)
            {
                if (q1 == int.MinValue)
                {
                    double __q1;
                    double __q3;
                    Measures.Quartiles(scoreList.ToArray(), out __q1, out __q3, false);
                    q1 = Convert.ToInt32(__q1);
                    q3 = Convert.ToInt32(__q3);
                }

                if (element.marks.score <= q1)
                {
                    sol = new spiderObjectiveSolution(element, spiderObjectiveStatus.aborted);
                }
                else
                {
                }
            }
            return(sol);
        }
Пример #4
0
        public override void learn(spiderPage element, modelSpiderSiteRecord sRecord, params object[] resources)
        {
            int cross = element.relationship.crossLinks.Count();

            min = Math.Min(cross, min);
            max = Math.Max(cross, max);
            scoreList.Add(Convert.ToDouble(cross));
        }
Пример #5
0
 /// <summary>
 /// Finishes the result item
 /// </summary>
 /// <param name="__page">The page.</param>
 public void finish(crawledPage __page, int __iteration)
 {
     page               = __page;
     status             = page.status;
     duration           = DateTime.Now.Subtract(startTime);
     sPage              = new spiderPage(page, target.iterationDiscovery, __iteration); // wRecord.iteration);
     sPage.spiderResult = this;
 }
Пример #6
0
 /// <summary>
 /// Takes information from page - called before evaluation
 /// </summary>
 /// <param name="page">The page.</param>
 public override void learn(spiderPage page)
 {
     //
     if (page.spiderResult != null)
     {
         linkScoreMax = Math.Max(page.spiderResult.target.marks.score, linkScoreMax);
     }
 }
Пример #7
0
        public static string getPageSignature(this spiderPage sPage, bool cripted = false)
        {
            string output   = "";
            string position = sPage.originHash;

            //if (cripted)
            //{
            //    position = md5.GetMd5Hash(position);
            //}

            output = position;   //->[" + leftSide + "]";
            return(output);
        }
Пример #8
0
 public void init(spiderPage __pageInstance)
 {
     state        = modelRecordStateEnum.initiated;
     pageInstance = __pageInstance;
     if (pGeneralRecord != null)
     {
         pGeneralRecord.sideRecordSets.AddRecord(instance, this);
     }
     else
     {
         //pGeneralRecord.instance
     }
 }
Пример #9
0
        public void SetActiveTargets(modelSpiderSiteRecord wRecord, indexDomain domain)
        {
            List <indexPage> pages = domain.getPageSet();
            // wRecord.web.setSeedUrl(domain.url);
            //spiderPage sp = new spiderPage()

            crawledPage cpage = new crawledPage(domain.url, 0);

            spiderPage spage = new spiderPage(cpage, 0, 0);

            foreach (indexPage p in pages)
            {
                link l = new link(p.url);
                wRecord.context.processLink(l, spage, false);
            }
        }
Пример #10
0
        /// <summary>
        /// Evaluates the specified link.
        /// </summary>
        /// <param name="page">The page.</param>
        /// <returns></returns>
        public override spiderEvalRuleResult evaluate(spiderPage page)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this, spiderEvalRuleResultEnum.passive);


            int score = page.relationship.inflowLinks.Count;

            if (maxInboundLinks > 0)
            {
                decimal ratio = Convert.ToDecimal(score) / Convert.ToDecimal(maxInboundLinks);

                decimal result = scoreUnit * ratio;

                output.comment = ratio.ToString("P2");

                output.score = Convert.ToInt32(result);
            }

            return(output);
        }
        /// <summary>
        /// Evaluates the specified link.
        /// </summary>
        /// <param name="page">The page.</param>
        /// <returns></returns>
        public override spiderEvalRuleResult evaluate(spiderPage page)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this, spiderEvalRuleResultEnum.passive);

            bool score = false;


            List <string> words = page.webpage.pageCaption.getStringTokensMinLength();

            foreach (string wrd in words)
            {
                if (titleWords[wrd] == 1)
                {
                    output.comment = " [" + wrd + "]:IsUni ";

                    if (language.isKnownWord(wrd))
                    {
                        output.comment += "& IsKnown";
                        output.score    = scoreUnit;
                        return(output);
                    }
                    else
                    {
                    }
                }
                else
                {
                }
            }

            if (score)
            {
                // output.score = scoreUnit;
            }
            else
            {
                output.score = penaltyUnit;
            }

            return(output);
        }
Пример #12
0
        public spiderLink setSeedUrl(string rootUrl)
        {
            link lnk       = new link(rootUrl, linkProcessFlags.standard);
            Uri  __rootUrl = new Uri(rootUrl);

            crawledPage cpage = new crawledPage(ORIGIN_OF_ROOTURL, 0);

            spiderPage spage = new spiderPage(cpage, 0, 0);
            // webPages.Add(spage);
            spiderLink splink = new spiderLink(spage, lnk, 1);

            //splink.li = lnk;//allLinks.AddSpiderLink(lnk);
            seedLink           = splink;
            name               = rootUrl;
            splink.domain      = __rootUrl.Host;
            domain             = __rootUrl.Host;
            splink.link.domain = domain;
            //webLinks.Add(splink);
            //webTargets.Add(splink);
            return(splink);
        }
Пример #13
0
        /// <summary>
        /// Evaluates the specified link.
        /// </summary>
        /// <param name="page">The page.</param>
        /// <returns></returns>
        public override spiderEvalRuleResult evaluate(spiderPage page)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this, spiderEvalRuleResultEnum.active);

            if (page.spiderResult != null)
            {
                if (linkScoreMax > 0)
                {
                    int score = page.spiderResult.target.marks.score;

                    decimal ratio = Convert.ToDecimal(score) / Convert.ToDecimal(linkScoreMax);

                    decimal result = scoreUnit * ratio;

                    output.comment = ratio.ToString("P2");

                    output.score = Convert.ToInt32(result);
                }
            }

            return(output);
        }
Пример #14
0
        public bool Add(string url, spiderPage page, spiderLink link)
        {
            if (pageIndex.ContainsKey(url))
            {
                //
            }
            else
            {
                if (page != null)
                {
                    pageIndex.Add(url, page);
                }
            }

            if (linkIndex.ContainsKey(url))
            {
                //
            }
            else
            {
                if (link != null)
                {
                    linkIndex.Add(url, link);
                }
            }

            bool ok = pageIndex.ContainsKey(url) && linkIndex.ContainsKey(url);

            if (ok)
            {
                urlRegistar.Add(url);
                return(true);
            }
            else
            {
                return(false);
            }
        }
Пример #15
0
        /// <summary>
        /// Evaluates the specified link.
        /// </summary>
        /// <param name="page">The page.</param>
        /// <returns></returns>
        public override spiderEvalRuleResult evaluate(spiderPage page)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this);

            int score = page.relationship.crossLinks.Count();

            if (maxCrosslinkScore > 0)
            {
                decimal ratio = Convert.ToDecimal(score) / Convert.ToDecimal(maxCrosslinkScore);

                decimal result = scoreUnit * ratio;

                output.comment = ratio.ToString("P2");

                output.score = Convert.ToInt32(result);
            }
            else
            {
                output.comment = " [maxCLS=0] ";
            }

            return(output);
        }
Пример #16
0
        /// <summary>
        /// Evaluates the specified link.
        /// </summary>
        /// <param name="page">The page.</param>
        /// <returns></returns>
        public override spiderEvalRuleResult evaluate(spiderPage page)
        {
            spiderEvalRuleResult output = new spiderEvalRuleResult(this, mode);

            bool score = false;

            output.comment = " freq:[" + pageTitleCount[page.webpage.pageCaption] + "] ";

            if (pageTitleCount[page.webpage.pageCaption] == 1)
            {
                score = true;
            }

            if (score)
            {
                output.score = scoreUnit;
            }
            else
            {
                output.score = penaltyUnit;
            }

            return(output);
        }
 /// <summary>
 /// Takes information from page - called before evaluation
 /// </summary>
 /// <param name="page">The page.</param>
 public override void learn(spiderPage page)
 {
     titleWords.AddInstanceRange(page.webpage.pageCaption.getStringTokensMinLength());
     //
 }
Пример #18
0
 /// <summary>
 /// Takes information from page - called before evaluation
 /// </summary>
 /// <param name="page">The page.</param>
 public override void learn(spiderPage page)
 {
     maxInboundLinks = Math.Max(page.relationship.inflowLinks.Count, maxInboundLinks);
     //
 }
Пример #19
0
 public bool HasCrossLinkWith(spiderPage whom)
 {
     return(crossLinks.ContainsKey(whom.getPageSignature(true)));
 }
 /// <summary>
 /// Evaluates the specified page.
 /// </summary>
 /// <param name="page">The page.</param>
 /// <returns></returns>
 public abstract spiderEvalRuleResult evaluate(spiderPage page);
 /// <summary>
 /// Takes information from page - called before evaluation
 /// </summary>
 /// <param name="page">The page.</param>
 public abstract void learn(spiderPage page);
Пример #22
0
 /// <summary>
 /// Takes information from page - called before evaluation
 /// </summary>
 /// <param name="page">The page.</param>
 public override void learn(spiderPage page)
 {
     maxCrosslinkScore = Math.Max(page.relationship.crossLinks.Count(), maxCrosslinkScore);
     //
 }
Пример #23
0
 /// <summary>
 /// Takes information from page - called before evaluation
 /// </summary>
 /// <param name="page">The page.</param>
 public override void learn(spiderPage page)
 {
     //
     //pageTitles.Add(page.webpage.pageCaption);
     pageTitleCount.AddInstance(page.webpage.pageCaption, "webpage.pageCaption @ learn() in pageruleTitleUnique");
 }