/// <summary> /// Builds or updates web site repositorium using crawling information. /// </summary> /// <param name="targetCollection">Collection of SpiderTargets, populated by DLC crawl</param> /// <param name="domainInfo">DLC domain information</param> /// <param name="output">The output.</param> /// <returns> /// Reference to created or updated web site repository /// </returns> /// <remarks> /// This method uses completed DLC information to create <see cref="imbMCWebSite" /> repository and <see cref="imbMCWebPage" /> for all proper targets /// </remarks> public imbMCWebSite BuildWebSite(ISpiderTargetCollection targetCollection, domainAnalysis domainInfo, ILogBuilder output = null) { //Int32 siteCount = siteTable.Count; int pageCount = 0; imbMCWebSite repo = GetWebSite(domainInfo, true, output); pageCount = repo.pageTable.Count; if (pageCount == 0) { loger.log("Web site repository created [" + domainInfo.domainName + "]"); } List <ISpiderTarget> crawledTargets = targetCollection.GetLoaded(); foreach (ISpiderTarget target in crawledTargets) { if (isTargetProper(target)) { BuildWebPage(target, repo); } } int nPageCount = repo.pageTable.Count - pageCount; if (nPageCount > 0) { loger.log("Repository [" + domainInfo.domainName + "] expanded for [" + nPageCount + "] new pages, in total [" + (pageCount + nPageCount) + "] pages."); } siteTable.AddOrUpdate(repo.entry); repo.SaveDataStructure(folder, output); return(repo); }
public void recalculate(ISpiderTargetCollection targets, double convergence = 0.0001, int checkSteps = 20) { targetToScore = new Dictionary <string, HITSScore>(); foreach (ISpiderTarget target in targets) { targetToScore.Add(target.targetHash, new HITSScore(1, 1)); } aceRelationMatrix <ISpiderTarget, ISpiderTarget, int> matrix = targets.GetAceMatrixRotated(); if (matrix == null) { return; } for (int i = 0; i < checkSteps; i++) { // <---- resets value change record, in order to detect convergence at the end of iteration foreach (ISpiderTarget target in targets) { targetToScore[target.targetHash].resetChangeMeasure(); } // <--- Authority foreach (ISpiderTarget xTarget in matrix.GetXAxis()) { foreach (ISpiderTarget yTarget in matrix.GetYAxis()) { targetToScore[xTarget.targetHash].a += targetToScore[yTarget.targetHash].h; } } // Normalize authority double nfact = 0; foreach (ISpiderTarget target in targets) { nfact += Math.Pow(targetToScore[target.targetHash].a, 2); } nfact = Math.Sqrt(nfact); foreach (ISpiderTarget target in targets) { if (nfact != 0) { targetToScore[target.targetHash].a = targetToScore[target.targetHash].a / nfact; } } // <--- hub foreach (ISpiderTarget xTarget in matrix.GetXAxis()) { foreach (ISpiderTarget yTarget in matrix.GetYAxis()) { targetToScore[yTarget.targetHash].h += targetToScore[xTarget.targetHash].a; } } // <--- normalization nfact = 0; foreach (ISpiderTarget target in targets) { nfact += Math.Pow(targetToScore[target.targetHash].h, 2); } nfact = Math.Sqrt(nfact); foreach (ISpiderTarget target in targets) { targetToScore[target.targetHash].resetChangeMeasure(); if (nfact != 0) { targetToScore[target.targetHash].h = targetToScore[target.targetHash].h / nfact; } } double maxChange = 0; // <---- resets value change record, in order to detect convergence at the end of iteration foreach (ISpiderTarget target in targets) { maxChange = Math.Max(targetToScore[target.targetHash].a_delta, maxChange); maxChange = Math.Max(targetToScore[target.targetHash].h_delta, maxChange); } if (maxChange < convergence) { break; } else { } } }