Exemplo n.º 1
0
        //    var matrix = wRecord.context.targets.GetLinkMatrix();
        //    pageRank = new PageRank(matrix, alpha, convergence, checkSteps);

        //    Double[] dbl = pageRank.ComputePageRank();
        //    List<Int32> pri = new List<int>();
        //        foreach (Double db in dbl)
        //        {
        //            pri.Add(Convert.ToInt32(db* scoreUnit));
        //}

        //ranks = wRecord.context.targets.linkMatrix.MapToY(pri);

        // public HITSRank(linkMatrix


        public HITSScore this [ISpiderTarget target]
        {
            get
            {
                return(targetToScore[target.targetHash]);
            }
        }
Exemplo n.º 2
0
        public int getLinkCountRotated(ISpiderTarget itemY, ISpiderTarget itemX)
        {
            int        output = 0;
            spiderPage page   = itemX.page as spiderPage;

            List <spiderLink> links = page.relationship.outflowLinks.GetTargetingTo(itemY.targetHash);

            return(output = links.Count);
            //wRecord.web
        }
Exemplo n.º 3
0
        private void onTargetPageAttached(modelSpiderSiteRecord __wRecord, modelSpiderSiteRecordEventArgs __args)
        {
            imbMCRepository mcRepo = mcm.activeRepository;
            imbMCWebSite    wRepo  = webSiteReposByDomain[__wRecord.domain];

            ISpiderTarget target = __args.Target;

            if (mcRepo.isTargetProper(target))
            {
                imbMCWebPage pRepo = mcRepo.BuildWebPage(target, wRepo, loger);
                pRepo.indexEntry     = imbWEMManager.index.pageIndexTable.GetPageForUrl(target.url);
                pRepo.HtmlSourceCode = __args.sourceHtml;
                pRepo.XmlSourceCode  = __args.sourceXml;

                pRepo.SaveDataStructure(wRepo.folder, loger);
            }
        }
 public modelSpiderSiteRecordEventArgs(ISpiderTarget __target, modelSpiderSiteRecordEventType __type = modelSpiderSiteRecordEventType.DLCTargetPageAttached)
 {
     Target = __target;
     if (__target is spiderTarget)
     {
         spiderTarget target = (spiderTarget)__target;
         sourceHtml = target.page.webpage.result.sourceCode;
         htmlDoc    = target.page.webpage.result.HtmlDocument; //(HtmlDocument)target.page.webpage.result.document.getDocument<HtmlDocument>();
         if (htmlDoc != null)
         {
             if (htmlDoc.DocumentNode != null)
             {
                 sourceXml = target.page.webpage.result.sourceCode;
             }
         }
         //sourceXml = target.page.spiderResult.page.result.document.getDocument<HtmlDocument>()
     }
     type = __type;
 }
Exemplo n.º 5
0
        /// <summary>
        /// Determines whether target is proper according to repositorium settings
        /// </summary>
        /// <param name="target">The target.</param>
        /// <returns>
        ///   <c>true</c> if [is target proper] [the specified target]; otherwise, <c>false</c>.
        /// </returns>
        public bool isTargetProper(ISpiderTarget target)
        {
            ISpiderTarget t = target;

            if (doExcludeDuplicatePages && target.isDuplicate)
            {
                t = null;
            }
            if (doExcludeIrrelevantPages && !target.IsRelevant)
            {
                t = null;
            }
            if (t != null)
            {
                return(true);
            }
            else
            {
                return(false);
            }
        }
Exemplo n.º 6
0
        /// <summary>
        /// Builds the web page repository using <see cref="ISpiderTarget"/> crawl information
        /// </summary>
        /// <param name="target">Target information</param>
        /// <param name="site">The site to build page for</param>
        /// <param name="output">The output for console/log</param>
        /// <returns>Built or updated web page repository</returns>
        public imbMCWebPage BuildWebPage(ISpiderTarget target, imbMCWebSite site, ILogBuilder output = null)
        {
            imbMCWebPage page  = GetWebPage(site, target.url, true, output);
            ISpiderPage  sPage = target.page;

            page.entry.AnchorTextAll       = sPage.captions.toCsvInLine(",");
            page.entry.ClickDepth          = sPage.iterationDiscovery;
            page.entry.ResolvedRelativeURL = site.domainInfo.GetURLWithoutDomainName(target.url);

            page.deploy(page.entry);

            page.indexEntry = target.GetIndexPage();

            page.TextContent = target.pageText;
            page.name        = target.page.name;

            var htmlDoc = target.GetHtmlDocument();

            if (htmlDoc != null)
            {
                page.HtmlSourceCode = htmlDoc.DocumentNode.OuterHtml; // ; = target.contentBlocks;
            }
            else
            {
            }

            page.Blocks = new List <imbCommonModels.contentBlock.nodeBlock>();


            page.TermTable = target.tokens.GetCompiledTable(output);

            target.contentBlocks.ForEach(x => page.Blocks.Add(x));

            site.pageTable.AddOrUpdate(page.entry);

            page.SaveDataStructure(site.folder, output);

            return(page);
        }
Exemplo n.º 7
0
 int ISpiderTargetCollection.getLinkCountRotated(ISpiderTarget itemY, ISpiderTarget itemX)
 {
     return(getLinkCountRotated((spiderTarget)itemY, (spiderTarget)itemX));
 }
Exemplo n.º 8
0
 int ISpiderTargetCollection.getLinkCount(ISpiderTarget itemX, ISpiderTarget itemY)
 {
     return(getLinkCount((spiderTarget)itemX, (spiderTarget)itemY));
 }