/// <summary>
        /// Builds or updates web site repositorium using crawling information.
        /// </summary>
        /// <param name="targetCollection">Collection of SpiderTargets, populated by DLC crawl</param>
        /// <param name="domainInfo">DLC domain information</param>
        /// <param name="output">The output.</param>
        /// <returns>
        /// Reference to created or updated web site repository
        /// </returns>
        /// <remarks>
        /// This method uses completed DLC information to create <see cref="imbMCWebSite" /> repository and <see cref="imbMCWebPage" /> for all proper targets
        /// </remarks>
        public imbMCWebSite BuildWebSite(ISpiderTargetCollection targetCollection, domainAnalysis domainInfo, ILogBuilder output = null)
        {
            //Int32 siteCount = siteTable.Count;
            int pageCount = 0;

            imbMCWebSite repo = GetWebSite(domainInfo, true, output);



            pageCount = repo.pageTable.Count;

            if (pageCount == 0)
            {
                loger.log("Web site repository created [" + domainInfo.domainName + "]");
            }

            List <ISpiderTarget> crawledTargets = targetCollection.GetLoaded();

            foreach (ISpiderTarget target in crawledTargets)
            {
                if (isTargetProper(target))
                {
                    BuildWebPage(target, repo);
                }
            }

            int nPageCount = repo.pageTable.Count - pageCount;

            if (nPageCount > 0)
            {
                loger.log("Repository [" + domainInfo.domainName + "] expanded for [" + nPageCount + "] new pages, in total [" + (pageCount + nPageCount) + "] pages.");
            }

            siteTable.AddOrUpdate(repo.entry);

            repo.SaveDataStructure(folder, output);

            return(repo);
        }
Esempio n. 2
0
        public void recalculate(ISpiderTargetCollection targets, double convergence = 0.0001, int checkSteps = 20)
        {
            targetToScore = new Dictionary <string, HITSScore>();
            foreach (ISpiderTarget target in targets)
            {
                targetToScore.Add(target.targetHash, new HITSScore(1, 1));
            }

            aceRelationMatrix <ISpiderTarget, ISpiderTarget, int> matrix = targets.GetAceMatrixRotated();

            if (matrix == null)
            {
                return;
            }

            for (int i = 0; i < checkSteps; i++)
            {
                // <---- resets value change record, in order to detect convergence at the end of iteration
                foreach (ISpiderTarget target in targets)
                {
                    targetToScore[target.targetHash].resetChangeMeasure();
                }



                // <--- Authority
                foreach (ISpiderTarget xTarget in matrix.GetXAxis())
                {
                    foreach (ISpiderTarget yTarget in matrix.GetYAxis())
                    {
                        targetToScore[xTarget.targetHash].a += targetToScore[yTarget.targetHash].h;
                    }
                }

                // Normalize authority
                double nfact = 0;
                foreach (ISpiderTarget target in targets)
                {
                    nfact += Math.Pow(targetToScore[target.targetHash].a, 2);
                }
                nfact = Math.Sqrt(nfact);

                foreach (ISpiderTarget target in targets)
                {
                    if (nfact != 0)
                    {
                        targetToScore[target.targetHash].a = targetToScore[target.targetHash].a / nfact;
                    }
                }


                // <--- hub

                foreach (ISpiderTarget xTarget in matrix.GetXAxis())
                {
                    foreach (ISpiderTarget yTarget in matrix.GetYAxis())
                    {
                        targetToScore[yTarget.targetHash].h += targetToScore[xTarget.targetHash].a;
                    }
                }

                // <--- normalization
                nfact = 0;
                foreach (ISpiderTarget target in targets)
                {
                    nfact += Math.Pow(targetToScore[target.targetHash].h, 2);
                }
                nfact = Math.Sqrt(nfact);



                foreach (ISpiderTarget target in targets)
                {
                    targetToScore[target.targetHash].resetChangeMeasure();
                    if (nfact != 0)
                    {
                        targetToScore[target.targetHash].h = targetToScore[target.targetHash].h / nfact;
                    }
                }


                double maxChange = 0;

                // <---- resets value change record, in order to detect convergence at the end of iteration
                foreach (ISpiderTarget target in targets)
                {
                    maxChange = Math.Max(targetToScore[target.targetHash].a_delta, maxChange);
                    maxChange = Math.Max(targetToScore[target.targetHash].h_delta, maxChange);
                }

                if (maxChange < convergence)
                {
                    break;
                }
                else
                {
                }
            }
        }