public void rankOnCitationPopularity(Guid vertical, CancellationTokenSource cancellationToken)
        {
            uint vIndex = VerticalConstants.getVerticalIndex(vertical);

            rankOnCitationPopularity(vIndex, NToIndex, cancellationToken);
            dataSetV[vIndex] = true;
        }
        public void reRank(Guid vertical, CancellationTokenSource cancellationToken)
        {
            uint vindex = VerticalConstants.getVerticalIndex(vertical);

            dataSetV[vindex] = false;
            rankOnCitationPopularity(vertical, cancellationToken);
        }
        // Rank for a particular vertical only.
        public void rankOnCitationPopularity(Guid vertical)
        {
            uint vIndex = VerticalConstants.getVerticalIndex(vertical);

            rankOnCitationPopularity(vIndex, NToIndex);
            dataSetV[vIndex] = true;
        }
        /*  public void reRank()
         * {
         *    dataSetV[vIndex] = false;
         *    rankOnCitationPopularity();
         * }
         * */

        public void reRank(Guid vertical)
        {
            uint vindex = VerticalConstants.getVerticalIndex(vertical);

            dataSetV[vindex] = false;
            rankOnCitationPopularity(vertical);
        }
 public Dictionary <string, RankingMetrics> getAllRankedURLNotInDB(Guid vertical)
 {
     if (!dataSetV[VerticalConstants.getVerticalIndex(vertical)])
     {
         rankOnCitationPopularity(vertical);
     }
     return(_rankedURLNotInDB[VerticalConstants.getVerticalIndex(vertical)]);
 }
        /* Don't fetch from DB here. Operatoion too time expensive
         * // Top N ranking Articles pre-fetched for you
         * public SortedList<double, Article> getTopRankedNArticles( Guid vertical )
         * {
         *  if (!dataSetV[VerticalConstants.getVerticalIndex(vertical)])
         *  {
         *      rankOnCitationPopularity(vertical);
         *  }
         *  return _topNRankedArticles[VerticalConstants.getVerticalIndex(vertical)];
         * }
         * */

        public SortedList <double, Guid> getAllRankedArticleMetrics(Guid vertical)
        {
            if (!dataSetV[VerticalConstants.getVerticalIndex(vertical)])
            {
                rankOnCitationPopularity(vertical);
            }
            return(_allRankedArticleMetrics[VerticalConstants.getVerticalIndex(vertical)]);
        }
 public double getAveragePopularityScore(Guid vertical)
 {
     if (!dataSetV[VerticalConstants.getVerticalIndex(vertical)])
     {
         rankOnCitationPopularity(vertical);
     }
     return(_averagePopularityScore[VerticalConstants.getVerticalIndex(vertical)]);
 }
        // Reclaim memory for a particular Vertical.
        public void clearVertical(Guid vertical)
        {
            uint vid = VerticalConstants.getVerticalIndex(vertical);

//            _topNRankedArticles[vid].Clear();
            _allRankedArticleMetrics[vid].Clear();
            _rankedURLNotInDB[vid].Clear();
            _averagePopularityScore[vid] = 0.0;
            dataSetV[vid] = false;
        }
Beispiel #9
0
        private void initModelFromRepo()
        {
            string msg;

            DateTime endt   = DateTime.UtcNow;
            DateTime startt = DateTime.UtcNow.AddHours(-1 * MaxHoursConsidered);

            //DateTime startt = DateTime.UtcNow.AddHours(-1 * 36);    // for testing

            msg = String.Format("initModelFromRepo(): Building Initial Model for Articles in Vertical {0} \n\t from startTime [{1}] to endTime [{2}].",
                                VerticalConstants.GetFriendlyName(VerticalId), startt.ToString(), endt.ToString());
            logger.Info(msg);

            logger.Debug("Retrieving Articles from Pickscomb repository.");
            List <Article> articles = null;

            try
            {
                IPickscombRepository repo = PickscombRepository.Instance;
                logger.Info("PickscombRepository instantiated. Fetching Articles . . .");
                articles = repo.GetArticles(VerticalId, startt, endt).ToList <Article>();
            }
            catch (Exception ex)
            {
                logger.Error("VerticalCitationStore::initModelFromRepo() failed to fetch Articles from Pickscomb repository.");
                logger.Error(ex.Message);
                throw new Exception("VerticalCitationStore::initModelFromRepo() failed to fetch Articles from Pickscomb repository.");
            }

            msg = String.Format("Retrieved {0} Articles from the repository. Building the initial model . . .", articles.Count());
            logger.Info(msg);
            uint count = 0;

            foreach (Article art in articles)
            {
                // Test function.
//                testArticleURLs(art);
                msg = string.Format("Adding #[{0}]", ++count);
                logger.Info(msg);
                AddCitationsInArticle(art);
                //logger.Info("ADDED");
            }
            logger.Info("Initial CitationStore Model build complete.\n\n");

//            testCitationStore();
        }
Beispiel #10
0
        public void setTimeWeightedCitationPopularity()
        {
            Dictionary <string, uint[]> cCountsPerURL = new Dictionary <string, uint[]>(); // per period citations counts for each URL

            uint[] totalCCounts;                                                           // per period total citation counts

            _popularitySet = false;

            if (_cstore.Count == 0)
            {
                logger.ErrorFormat("[setTimeWeightedCitationPopularity()] CitationStore empty for vertical {0}.",
                                   VerticalConstants.GetFriendlyName(VerticalId));
                return;
            }

            uint numPeriods = MaxHoursConsidered * 60 / Period;   // How many Period minutes in the Article lifetime of Max hours.

            if ((MaxHoursConsidered * 60) % Period != 0)          // One extra period if any time left over.
            {
                ++numPeriods;
            }

            /* Count the citations per URL and total citations per each period. */
            List <DateTimeOffset> removeList;       // for cleaning up citations past the max hours limit
            uint           p;                       // processed period count
            DateTimeOffset now = DateTime.UtcNow;
            DateTimeOffset periodEnd;

            totalCCounts = new uint[numPeriods];
            foreach (KeyValuePair <string, SortedList <DateTimeOffset, uint> > i in _cstore)
            {
                cCountsPerURL[i.Key] = new uint[numPeriods];
                removeList           = new List <DateTimeOffset>();

                // now count citations per url per period
                // and total citations per period
                p         = 0; // processed period count
                periodEnd = now.AddMinutes(-1 * Period);
                foreach (KeyValuePair <DateTimeOffset, uint> j in i.Value)
                {
                    if (j.Key < periodEnd)    // Advance to next period backwards
                    {
                        ++p;
                        if (p > numPeriods)  // Mark this citations as past the last max allowed hours.
                        {
                            removeList.Add(j.Key);
                            continue;
                        }

                        periodEnd = periodEnd.AddMinutes(-1 * Period);
                    }

                    cCountsPerURL[i.Key][p] += j.Value;
                    totalCCounts[p]         += j.Value;
                }

                // Now remove citations past the last 36 hour for this URL.
                foreach (DateTimeOffset dt in removeList)
                {
                    i.Value.Remove(dt);
                }
            }

            // Cleanup continued: Remove any URLs with no citations within the allwed Max hours.
            foreach (string url in _cstore.Keys)
            {
                if (_cstore[url].Count == 0)
                {
                    _cstore.Remove(url);
                }
            }

            /* Total and per-URL citation counts for each period are now set.
             * Citation store has also been cleaned of citations past the last 36 hours,
             * and of URL with no citations within the past 36 hours.
             * Now all URLs in citation store have citations within the past 36 hours.
             * Proceed to Popularity scoring.
             */

            // Get the correct time-weighting for each period.
            RecencyCalculator rec = new RecencyCalculator();

            double[] timeWeighting = rec.GetRecencyCurve(numPeriods);

            /* Estimate the Time Weighted Citation Based Popularity of each article. */
            Dictionary <string, double> tmpp = new Dictionary <string, double>();

            foreach (KeyValuePair <string, uint[]> i in cCountsPerURL)
            {
                tmpp[i.Key] = 0.0;
                for (uint j = 0; j < numPeriods; ++j)
                {
                    if (totalCCounts[j] > 0)
                    {
                        if (cCountsPerURL[i.Key][j] > 0)
                        {
                            tmpp[i.Key] += 1000 * timeWeighting[j] * cCountsPerURL[i.Key][j] / totalCCounts[j];
                        }
                    }
                    else
                    {
                        if (cCountsPerURL[i.Key][j] > 0)
                        {
                            string msg = "[CitationStore::setTimeWeightedCitationPopularity()]"
                                         + " Something went wrong for URL " + i.Key + " in time period " + j.ToString()
                                         + "\n URL citation count " + cCountsPerURL[i.Key][j].ToString()
                                         + " / total citation count " + totalCCounts[j].ToString();
                            logger.Error(msg);
                        }
                    }
                }
            }


            /*
             * _averageScore = 0.0;
             * foreach (KeyValuePair<string, double> i in _popularityScore)
             * {
             *  _averageScore += i.Value;
             * }
             * _averageScore = _averageScore / _popularityScore.Count;
             */

            /* Now sort the Article URLs according to their popularity score. */
            _popularityScore.Clear();
            _popularityScore = tmpp.OrderByDescending(y => y.Value).ToDictionary(y => y.Key, y => y.Value);

            /*
             * _popularityRankedURLs = new string[_popularityScore.Count];
             * double maxScore = _popularityScore.ElementAt(0).Value;
             *
             * KeyValuePair<string, double> pr;
             * for (int j = 0; j < _popularityScore.Count(); ++j )
             * {
             *  pr = _popularityScore.ElementAt(j);
             * // _popularityScore[pr.Key] = pr.Value; // un-normalized score
             * // _popularityScore[pr.Key] = pr.Value / maxScore; // Mormalize to [0.0 .. 1.0] range
             *  _popularityRankedURLs[j] = pr.Key;
             * }
             * */

            _popularitySet = true;
        }