public void rankOnCitationPopularity(Guid vertical, CancellationTokenSource cancellationToken)
            uint vIndex = VerticalConstants.getVerticalIndex(vertical);

            rankOnCitationPopularity(vIndex, NToIndex, cancellationToken);
            dataSetV[vIndex] = true;
        public void reRank(Guid vertical, CancellationTokenSource cancellationToken)
            uint vindex = VerticalConstants.getVerticalIndex(vertical);

            dataSetV[vindex] = false;
            rankOnCitationPopularity(vertical, cancellationToken);
        // Rank for a particular vertical only.
        public void rankOnCitationPopularity(Guid vertical)
            uint vIndex = VerticalConstants.getVerticalIndex(vertical);

            rankOnCitationPopularity(vIndex, NToIndex);
            dataSetV[vIndex] = true;
        /*  public void reRank()
         * {
         *    dataSetV[vIndex] = false;
         *    rankOnCitationPopularity();
         * }
         * */

        public void reRank(Guid vertical)
            uint vindex = VerticalConstants.getVerticalIndex(vertical);

            dataSetV[vindex] = false;
 public Dictionary <string, RankingMetrics> getAllRankedURLNotInDB(Guid vertical)
     if (!dataSetV[VerticalConstants.getVerticalIndex(vertical)])
        /* Don't fetch from DB here. Operatoion too time expensive
         * // Top N ranking Articles pre-fetched for you
         * public SortedList<double, Article> getTopRankedNArticles( Guid vertical )
         * {
         *  if (!dataSetV[VerticalConstants.getVerticalIndex(vertical)])
         *  {
         *      rankOnCitationPopularity(vertical);
         *  }
         *  return _topNRankedArticles[VerticalConstants.getVerticalIndex(vertical)];
         * }
         * */

        public SortedList <double, Guid> getAllRankedArticleMetrics(Guid vertical)
            if (!dataSetV[VerticalConstants.getVerticalIndex(vertical)])
 public double getAveragePopularityScore(Guid vertical)
     if (!dataSetV[VerticalConstants.getVerticalIndex(vertical)])
        // Reclaim memory for a particular Vertical.
        public void clearVertical(Guid vertical)
            uint vid = VerticalConstants.getVerticalIndex(vertical);

//            _topNRankedArticles[vid].Clear();
            _averagePopularityScore[vid] = 0.0;
            dataSetV[vid] = false;
Esempio n. 9
        private void initModelFromRepo()
            string msg;

            DateTime endt   = DateTime.UtcNow;
            DateTime startt = DateTime.UtcNow.AddHours(-1 * MaxHoursConsidered);

            //DateTime startt = DateTime.UtcNow.AddHours(-1 * 36);    // for testing

            msg = String.Format("initModelFromRepo(): Building Initial Model for Articles in Vertical {0} \n\t from startTime [{1}] to endTime [{2}].",
                                VerticalConstants.GetFriendlyName(VerticalId), startt.ToString(), endt.ToString());

            logger.Debug("Retrieving Articles from Pickscomb repository.");
            List <Article> articles = null;

                IPickscombRepository repo = PickscombRepository.Instance;
                logger.Info("PickscombRepository instantiated. Fetching Articles . . .");
                articles = repo.GetArticles(VerticalId, startt, endt).ToList <Article>();
            catch (Exception ex)
                logger.Error("VerticalCitationStore::initModelFromRepo() failed to fetch Articles from Pickscomb repository.");
                throw new Exception("VerticalCitationStore::initModelFromRepo() failed to fetch Articles from Pickscomb repository.");

            msg = String.Format("Retrieved {0} Articles from the repository. Building the initial model . . .", articles.Count());
            uint count = 0;

            foreach (Article art in articles)
                // Test function.
//                testArticleURLs(art);
                msg = string.Format("Adding #[{0}]", ++count);
            logger.Info("Initial CitationStore Model build complete.\n\n");

//            testCitationStore();
Esempio n. 10
        public void setTimeWeightedCitationPopularity()
            Dictionary <string, uint[]> cCountsPerURL = new Dictionary <string, uint[]>(); // per period citations counts for each URL

            uint[] totalCCounts;                                                           // per period total citation counts

            _popularitySet = false;

            if (_cstore.Count == 0)
                logger.ErrorFormat("[setTimeWeightedCitationPopularity()] CitationStore empty for vertical {0}.",

            uint numPeriods = MaxHoursConsidered * 60 / Period;   // How many Period minutes in the Article lifetime of Max hours.

            if ((MaxHoursConsidered * 60) % Period != 0)          // One extra period if any time left over.

            /* Count the citations per URL and total citations per each period. */
            List <DateTimeOffset> removeList;       // for cleaning up citations past the max hours limit
            uint           p;                       // processed period count
            DateTimeOffset now = DateTime.UtcNow;
            DateTimeOffset periodEnd;

            totalCCounts = new uint[numPeriods];
            foreach (KeyValuePair <string, SortedList <DateTimeOffset, uint> > i in _cstore)
                cCountsPerURL[i.Key] = new uint[numPeriods];
                removeList           = new List <DateTimeOffset>();

                // now count citations per url per period
                // and total citations per period
                p         = 0; // processed period count
                periodEnd = now.AddMinutes(-1 * Period);
                foreach (KeyValuePair <DateTimeOffset, uint> j in i.Value)
                    if (j.Key < periodEnd)    // Advance to next period backwards
                        if (p > numPeriods)  // Mark this citations as past the last max allowed hours.

                        periodEnd = periodEnd.AddMinutes(-1 * Period);

                    cCountsPerURL[i.Key][p] += j.Value;
                    totalCCounts[p]         += j.Value;

                // Now remove citations past the last 36 hour for this URL.
                foreach (DateTimeOffset dt in removeList)

            // Cleanup continued: Remove any URLs with no citations within the allwed Max hours.
            foreach (string url in _cstore.Keys)
                if (_cstore[url].Count == 0)

            /* Total and per-URL citation counts for each period are now set.
             * Citation store has also been cleaned of citations past the last 36 hours,
             * and of URL with no citations within the past 36 hours.
             * Now all URLs in citation store have citations within the past 36 hours.
             * Proceed to Popularity scoring.

            // Get the correct time-weighting for each period.
            RecencyCalculator rec = new RecencyCalculator();

            double[] timeWeighting = rec.GetRecencyCurve(numPeriods);

            /* Estimate the Time Weighted Citation Based Popularity of each article. */
            Dictionary <string, double> tmpp = new Dictionary <string, double>();

            foreach (KeyValuePair <string, uint[]> i in cCountsPerURL)
                tmpp[i.Key] = 0.0;
                for (uint j = 0; j < numPeriods; ++j)
                    if (totalCCounts[j] > 0)
                        if (cCountsPerURL[i.Key][j] > 0)
                            tmpp[i.Key] += 1000 * timeWeighting[j] * cCountsPerURL[i.Key][j] / totalCCounts[j];
                        if (cCountsPerURL[i.Key][j] > 0)
                            string msg = "[CitationStore::setTimeWeightedCitationPopularity()]"
                                         + " Something went wrong for URL " + i.Key + " in time period " + j.ToString()
                                         + "\n URL citation count " + cCountsPerURL[i.Key][j].ToString()
                                         + " / total citation count " + totalCCounts[j].ToString();

             * _averageScore = 0.0;
             * foreach (KeyValuePair<string, double> i in _popularityScore)
             * {
             *  _averageScore += i.Value;
             * }
             * _averageScore = _averageScore / _popularityScore.Count;

            /* Now sort the Article URLs according to their popularity score. */
            _popularityScore = tmpp.OrderByDescending(y => y.Value).ToDictionary(y => y.Key, y => y.Value);

             * _popularityRankedURLs = new string[_popularityScore.Count];
             * double maxScore = _popularityScore.ElementAt(0).Value;
             * KeyValuePair<string, double> pr;
             * for (int j = 0; j < _popularityScore.Count(); ++j )
             * {
             *  pr = _popularityScore.ElementAt(j);
             * // _popularityScore[pr.Key] = pr.Value; // un-normalized score
             * // _popularityScore[pr.Key] = pr.Value / maxScore; // Mormalize to [0.0 .. 1.0] range
             *  _popularityRankedURLs[j] = pr.Key;
             * }
             * */

            _popularitySet = true;