public void rankOnCitationPopularity(Guid vertical, CancellationTokenSource cancellationToken) { uint vIndex = VerticalConstants.getVerticalIndex(vertical); rankOnCitationPopularity(vIndex, NToIndex, cancellationToken); dataSetV[vIndex] = true; }
public void reRank(Guid vertical, CancellationTokenSource cancellationToken) { uint vindex = VerticalConstants.getVerticalIndex(vertical); dataSetV[vindex] = false; rankOnCitationPopularity(vertical, cancellationToken); }
// Rank for a particular vertical only. public void rankOnCitationPopularity(Guid vertical) { uint vIndex = VerticalConstants.getVerticalIndex(vertical); rankOnCitationPopularity(vIndex, NToIndex); dataSetV[vIndex] = true; }
/* public void reRank() * { * dataSetV[vIndex] = false; * rankOnCitationPopularity(); * } * */ public void reRank(Guid vertical) { uint vindex = VerticalConstants.getVerticalIndex(vertical); dataSetV[vindex] = false; rankOnCitationPopularity(vertical); }
public Dictionary <string, RankingMetrics> getAllRankedURLNotInDB(Guid vertical) { if (!dataSetV[VerticalConstants.getVerticalIndex(vertical)]) { rankOnCitationPopularity(vertical); } return(_rankedURLNotInDB[VerticalConstants.getVerticalIndex(vertical)]); }
/* Don't fetch from DB here. Operatoion too time expensive * // Top N ranking Articles pre-fetched for you * public SortedList<double, Article> getTopRankedNArticles( Guid vertical ) * { * if (!dataSetV[VerticalConstants.getVerticalIndex(vertical)]) * { * rankOnCitationPopularity(vertical); * } * return _topNRankedArticles[VerticalConstants.getVerticalIndex(vertical)]; * } * */ public SortedList <double, Guid> getAllRankedArticleMetrics(Guid vertical) { if (!dataSetV[VerticalConstants.getVerticalIndex(vertical)]) { rankOnCitationPopularity(vertical); } return(_allRankedArticleMetrics[VerticalConstants.getVerticalIndex(vertical)]); }
public double getAveragePopularityScore(Guid vertical) { if (!dataSetV[VerticalConstants.getVerticalIndex(vertical)]) { rankOnCitationPopularity(vertical); } return(_averagePopularityScore[VerticalConstants.getVerticalIndex(vertical)]); }
// Reclaim memory for a particular Vertical. public void clearVertical(Guid vertical) { uint vid = VerticalConstants.getVerticalIndex(vertical); // _topNRankedArticles[vid].Clear(); _allRankedArticleMetrics[vid].Clear(); _rankedURLNotInDB[vid].Clear(); _averagePopularityScore[vid] = 0.0; dataSetV[vid] = false; }
private void initModelFromRepo() { string msg; DateTime endt = DateTime.UtcNow; DateTime startt = DateTime.UtcNow.AddHours(-1 * MaxHoursConsidered); //DateTime startt = DateTime.UtcNow.AddHours(-1 * 36); // for testing msg = String.Format("initModelFromRepo(): Building Initial Model for Articles in Vertical {0} \n\t from startTime [{1}] to endTime [{2}].", VerticalConstants.GetFriendlyName(VerticalId), startt.ToString(), endt.ToString()); logger.Info(msg); logger.Debug("Retrieving Articles from Pickscomb repository."); List <Article> articles = null; try { IPickscombRepository repo = PickscombRepository.Instance; logger.Info("PickscombRepository instantiated. Fetching Articles . . ."); articles = repo.GetArticles(VerticalId, startt, endt).ToList <Article>(); } catch (Exception ex) { logger.Error("VerticalCitationStore::initModelFromRepo() failed to fetch Articles from Pickscomb repository."); logger.Error(ex.Message); throw new Exception("VerticalCitationStore::initModelFromRepo() failed to fetch Articles from Pickscomb repository."); } msg = String.Format("Retrieved {0} Articles from the repository. Building the initial model . . .", articles.Count()); logger.Info(msg); uint count = 0; foreach (Article art in articles) { // Test function. // testArticleURLs(art); msg = string.Format("Adding #[{0}]", ++count); logger.Info(msg); AddCitationsInArticle(art); //logger.Info("ADDED"); } logger.Info("Initial CitationStore Model build complete.\n\n"); // testCitationStore(); }
public void setTimeWeightedCitationPopularity() { Dictionary <string, uint[]> cCountsPerURL = new Dictionary <string, uint[]>(); // per period citations counts for each URL uint[] totalCCounts; // per period total citation counts _popularitySet = false; if (_cstore.Count == 0) { logger.ErrorFormat("[setTimeWeightedCitationPopularity()] CitationStore empty for vertical {0}.", VerticalConstants.GetFriendlyName(VerticalId)); return; } uint numPeriods = MaxHoursConsidered * 60 / Period; // How many Period minutes in the Article lifetime of Max hours. if ((MaxHoursConsidered * 60) % Period != 0) // One extra period if any time left over. { ++numPeriods; } /* Count the citations per URL and total citations per each period. */ List <DateTimeOffset> removeList; // for cleaning up citations past the max hours limit uint p; // processed period count DateTimeOffset now = DateTime.UtcNow; DateTimeOffset periodEnd; totalCCounts = new uint[numPeriods]; foreach (KeyValuePair <string, SortedList <DateTimeOffset, uint> > i in _cstore) { cCountsPerURL[i.Key] = new uint[numPeriods]; removeList = new List <DateTimeOffset>(); // now count citations per url per period // and total citations per period p = 0; // processed period count periodEnd = now.AddMinutes(-1 * Period); foreach (KeyValuePair <DateTimeOffset, uint> j in i.Value) { if (j.Key < periodEnd) // Advance to next period backwards { ++p; if (p > numPeriods) // Mark this citations as past the last max allowed hours. { removeList.Add(j.Key); continue; } periodEnd = periodEnd.AddMinutes(-1 * Period); } cCountsPerURL[i.Key][p] += j.Value; totalCCounts[p] += j.Value; } // Now remove citations past the last 36 hour for this URL. foreach (DateTimeOffset dt in removeList) { i.Value.Remove(dt); } } // Cleanup continued: Remove any URLs with no citations within the allwed Max hours. foreach (string url in _cstore.Keys) { if (_cstore[url].Count == 0) { _cstore.Remove(url); } } /* Total and per-URL citation counts for each period are now set. * Citation store has also been cleaned of citations past the last 36 hours, * and of URL with no citations within the past 36 hours. * Now all URLs in citation store have citations within the past 36 hours. * Proceed to Popularity scoring. */ // Get the correct time-weighting for each period. RecencyCalculator rec = new RecencyCalculator(); double[] timeWeighting = rec.GetRecencyCurve(numPeriods); /* Estimate the Time Weighted Citation Based Popularity of each article. */ Dictionary <string, double> tmpp = new Dictionary <string, double>(); foreach (KeyValuePair <string, uint[]> i in cCountsPerURL) { tmpp[i.Key] = 0.0; for (uint j = 0; j < numPeriods; ++j) { if (totalCCounts[j] > 0) { if (cCountsPerURL[i.Key][j] > 0) { tmpp[i.Key] += 1000 * timeWeighting[j] * cCountsPerURL[i.Key][j] / totalCCounts[j]; } } else { if (cCountsPerURL[i.Key][j] > 0) { string msg = "[CitationStore::setTimeWeightedCitationPopularity()]" + " Something went wrong for URL " + i.Key + " in time period " + j.ToString() + "\n URL citation count " + cCountsPerURL[i.Key][j].ToString() + " / total citation count " + totalCCounts[j].ToString(); logger.Error(msg); } } } } /* * _averageScore = 0.0; * foreach (KeyValuePair<string, double> i in _popularityScore) * { * _averageScore += i.Value; * } * _averageScore = _averageScore / _popularityScore.Count; */ /* Now sort the Article URLs according to their popularity score. */ _popularityScore.Clear(); _popularityScore = tmpp.OrderByDescending(y => y.Value).ToDictionary(y => y.Key, y => y.Value); /* * _popularityRankedURLs = new string[_popularityScore.Count]; * double maxScore = _popularityScore.ElementAt(0).Value; * * KeyValuePair<string, double> pr; * for (int j = 0; j < _popularityScore.Count(); ++j ) * { * pr = _popularityScore.ElementAt(j); * // _popularityScore[pr.Key] = pr.Value; // un-normalized score * // _popularityScore[pr.Key] = pr.Value / maxScore; // Mormalize to [0.0 .. 1.0] range * _popularityRankedURLs[j] = pr.Key; * } * */ _popularitySet = true; }