private void initModelFromRepo() { string msg; DateTime endt = DateTime.UtcNow; DateTime startt = DateTime.UtcNow.AddHours(-1 * MaxHoursConsidered); //DateTime startt = DateTime.UtcNow.AddHours(-1 * 36); // for testing msg = String.Format("initModelFromRepo(): Building Initial Model for Articles in Vertical {0} \n\t from startTime [{1}] to endTime [{2}].", VerticalConstants.GetFriendlyName(VerticalId), startt.ToString(), endt.ToString()); logger.Info(msg); logger.Debug("Retrieving Articles from Pickscomb repository."); List <Article> articles = null; try { IPickscombRepository repo = PickscombRepository.Instance; logger.Info("PickscombRepository instantiated. Fetching Articles . . ."); articles = repo.GetArticles(VerticalId, startt, endt).ToList <Article>(); } catch (Exception ex) { logger.Error("VerticalCitationStore::initModelFromRepo() failed to fetch Articles from Pickscomb repository."); logger.Error(ex.Message); throw new Exception("VerticalCitationStore::initModelFromRepo() failed to fetch Articles from Pickscomb repository."); } msg = String.Format("Retrieved {0} Articles from the repository. Building the initial model . . .", articles.Count()); logger.Info(msg); uint count = 0; foreach (Article art in articles) { // Test function. // testArticleURLs(art); msg = string.Format("Adding #[{0}]", ++count); logger.Info(msg); AddCitationsInArticle(art); //logger.Info("ADDED"); } logger.Info("Initial CitationStore Model build complete.\n\n"); // testCitationStore(); }
public void rankOnCitationPopularity(uint vIndex, uint numRankedArticlesToExtract) { NPreFetched = 0; dataSetV[vIndex] = false; Guid vid = VerticalConstants.AllVerticals[vIndex]; IPickscombRepository repo = PickscombRepository.Instance; /* // Do not fetch all articles from repo; too expensive. * // Fetch from database Articles going back MaxHoursConsidered ; default MaxHoursConsidered = 5 days * 24 * uint numHours = CitationStore.Instance.CStore[vid].MaxHoursConsidered; * DateTime start = DateTime.Now.AddHours(-1 * numHours); * List<Article> articles = repo.GetArticles( vid, start, start.AddHours(numHours)).ToList<Article>(); * */ Guid artid; List <Guid> articlesWithCitations = new List <Guid>(); uint ranking = 0, inDBCount = 0; CitationStore.Instance.CStore[vid].setTimeWeightedCitationPopularity(); Dictionary <string, double> rankedURLs = CitationStore.Instance.CStore[vid].getTimeWeightedCitationPopularityScores(); // make sure the URLs are sorted descending according to ranking score rankedURLs.OrderByDescending(y => y.Value).ToDictionary(y => y.Key, y => y.Value); foreach (KeyValuePair <string, double> url in rankedURLs) { ++ranking; if (CitationStore.RepoArticleUrlGuidIndex[vid].ContainsKey(url.Key)) { // Article for this URL is in DB; map to Guid ++inDBCount; artid = CitationStore.RepoArticleUrlGuidIndex[vid][url.Key]; articlesWithCitations.Add(artid); _allRankedArticleMetrics[vIndex].Add(url.Value, artid); // Log only the top 100 if (inDBCount < 100) { logger.DebugFormat(" Found Article Scored [{0}] in DB with Guid [{1}] Ranking [{2}]", url.Value, artid, inDBCount); } /* * // Fetch and update in repo scores of only those Articles to be indexed; others scored at 0.0. * if (inDBCount <= numRankedArticlesToExtract) * { * Article art = repo.GetArticle(vid, artid); * art.Score = url.Value; * art.PopularityScore = url.Value; * art.PopularityRanking = ranking; * repo.Save(art); * logger.DebugFormat("[#{0}] Updated DB Article Score to [{1:F20}].", inDBCount, art.Score); * } */ } else //( !articleInDB ) { _rankedURLNotInDB[vIndex].Add(url.Key, new RankingMetrics(ranking, url.Value)); } } logger.Debug("\n"); logger.DebugFormat("~~~ Found [{0}] Cited Articles in DB.", inDBCount); logger.DebugFormat("~~~ Found [{0}] Total Cited Article URLs.", ranking); if (ranking > 0) { logger.DebugFormat("~~~ [{0}] ({1:F4}%) Citated URL's do not have Articles in the database.\n\n", ranking - inDBCount, (ranking - inDBCount) * 100 / ranking); } /* Don't fetch from repo here. Fetch operation too expensive. * Now add the Articles in the DB within the time frame but with no citations, to the * bottom if the list with a lower score than the lowest citation score. * double noCitationScore = lowestScore / 10; ++ranking; * foreach (Article art in articles) * { * if ( !articlesWithCitations.Contains( art.GetArticleId() ) ) * { * // Add this article with a score lower than the lowest score. * // They all have the same ranking and the same lowest score at the bottom. * art.PopularityRanking = ranking; * art.PopularityScore = noCitationScore; * repo.Save(art); * * _allRankedArticleMetrics[vIndex].Add(art.PopularityScore, art.GetArticleId()); * if (++inDBCount <= NPreFetched) * { * _topNRankedArticles[vIndex].Add(art.PopularityScore, art); * } * } * } * */ _averagePopularityScore[vIndex] = CitationStore.Instance.CStore[vid].AveragePopularityScore; dataSetV[vIndex] = true; }