Exemple #1
        private void RunSimilarityModel(CitationPopularityModel citationModel,
                                        SimilarityRanker similarityModel)
            Dictionary <string, double> scoredArticles = citationModel.PopularityScoredArticles;

            SortedDictionary <double, PopularGroup> result =
                new SortedDictionary <double, PopularGroup>(new DescDuplicateDoubleComp());

            while (scoredArticles.Count() > similarityModel.MaxNumSimilar)

                KeyValuePair <string, double> thisArt = getMaxVal(scoredArticles);

                var similarArt = similarityModel.GetTopSimilarArticlesTo(thisArt.Key);

                PopularGroup pop = new PopularGroup();
                pop.TitleArticle = thisArt.Key;
                foreach (KeyValuePair <double, string> p in similarArt)

                    try { pop.SimilarArticles[p.Key] = p.Value; }
                    catch (Exception) { }

                    try { scoredArticles.Remove(p.Value); }
                    catch (Exception) { }
                result[thisArt.Value] = pop;

                // remove already ranked articles
                try { scoredArticles.Remove(thisArt.Key); }
                catch (Exception) { }

            // The few left overs
            foreach (string art in scoredArticles.Keys)

                PopularGroup pop = new PopularGroup();
                pop.TitleArticle            = art;
                result[scoredArticles[art]] = pop;

            // Write the fully ranked cluster results to the database
                VerticalId, result, _cToken);
Exemple #2
        /// <summary>
        /// This is the Run() function, the sole entry point, to start the Ranker.
        /// NOTE: You do nto start the Ranker every 15 minutes.
        /// You start the Ranker only once, and it runs forever.
        /// You restart only if it crashes or if the server re-starts
        /// </summary>
        /// <param name="cancellationToken"></param>
        public void Run(CancellationTokenSource cancellationToken)
            _cToken = cancellationToken;
            _PrlOpt = new ParallelOptions()
                MaxDegreeOfParallelism = System.Environment.ProcessorCount,
                CancellationToken      = cancellationToken.Token

                     + "Ranker started for vertical {0}.\n", VerticalId.ToString());

            CitationPopularityModel citationModel = CitationPopularityModel.Instance;

            Log.Info("Citation Model instantiated.");

            SimilarityRanker similarityModel = SimilarityRanker.Instance;

            if (_rankingType == RankingType_T.PopularitySimilarity)
                Log.Info("TF-IDF Similarity Ranker instantiated.");

            DateTimeOffset toTime     = DateTimeOffset.Now;
            uint           readPeriod = Ranker.RankingHoursConsidered + Ranker.RankingHoursBuffer;
            DateTimeOffset fromTime   = toTime.AddHours(-1 * readPeriod);

            // Get data from fromTime to Now from the CITATIONS table
            Dictionary <string, string> rankingData;

                rankingData = RankerDbAccess.ReadRankingDataFromDb(_rankingType, _verticalId,
                                                                   fromTime, toTime);
            catch (Exception ex)
                Log.Error("Reading CITATIONS table from the database failed.");
                throw ex;
            Log.Info("Ranking Data successfully read in from the CITATIONS table.");

                citationModel.Init(VerticalId, RankingType, rankingData, _cToken);
            catch (Exception ex)
                Log.Error("[FATAL]  Citation Popularity Ranker initialization failed!");
                throw ex;
            Log.Info("Citation Model built for the past {0} hours.\n",

            // The first ranking run.
            // Estimate the top ranking Articles and their popularity scores
            catch (Exception ex)
                Log.Error("Citation Popularity Calculation failed in re-rank. ");
                throw ex;
            Log.Info("Time weighted Citation Popularity set.");

            if (_rankingType == RankingType_T.Popularity)
                // Write top ranking Articles and Scores to the RANKING table
                Log.Info("Wrote the latest Citation Popularity scores to the database.");

            // Similarity Ranker initial run
            if (_rankingType == RankingType_T.PopularitySimilarity)
                    similarityModel.Init(VerticalId, rankingData, _cToken);
                catch (Exception ex)
                    Log.Error("[FATAL]  TF-IDF Similarity Ranker initialization failed!");
                    throw ex;
                Log.Info("TF-IDF Similarity Model built for the past {0} hours.\n",
                         Ranker.RankingHoursConsidered + Ranker.RankingHoursBuffer);

                RunSimilarityModel(citationModel, similarityModel);

            Log.Info("------- Ranker initialization and first run completed. -------\n\n");

            // The last time the data was retrieved and the models successfully built from it.
            _lastDbRetrievalTime = toTime;

            // Ranker initialization completed.  Now run the Ranker infinite loop.
            TimeSpan processingTime, sleepTime, rankerInterval;

            rankerInterval = TimeSpan.FromMinutes(Ranker.RankerRunPeriod);

            while (true)

                processingTime = DateTimeOffset.Now - LastDbRetrievalTime;
                if (rankerInterval > processingTime)
                    sleepTime = rankerInterval - processingTime;


                // Get last interval's data from the CITATIONS table
                toTime   = DateTimeOffset.Now;
                fromTime = LastDbRetrievalTime;

                // get data from fromTime to Now from the CITATIONS table
                Log.Info("New run. Retrieving Ranking data from the DB tables.");
                    rankingData = RankerDbAccess.ReadRankingDataFromDb(_rankingType, _verticalId,
                                                                       fromTime, toTime);
                catch (Exception ex)
                    Log.Error("Reading CITATIONS table from the database failed.");
                    throw ex;
                _lastDbRetrievalTime = toTime;
                Log.Info("Ranking data fetched from the database.");

                // Update the Citation Model for the last period from the data retrieved
                    citationModel.LoadCitationsModel(RankingType, rankingData, toTime);
                catch (Exception ex)
                    Log.Error("Ranker Update: LoadCitationsModel() failed.");
                    throw ex;
                Log.Info("Citation Model successfully re-loaded since the last run.");

                // Estimate the top ranking Articles and their popularity scores
                catch (Exception ex)
                    Log.Error("Citation Popularity Calculation failed in re-rank. ");
                    throw ex;
                Log.Info("Time weighted Citation Popularity set.");

                if (_rankingType == RankingType_T.Popularity)
                    // Write top ranking Articles and Scores to the RANKING table
                    Log.Info("Wrote the latest Citation Popularity scores to the database.");

                // The front end then reads the top ranked articles directly from the RANKING table
                // RANKING table:
                // Key: Article Guid/ unique string ID
                // Value: double RankingScore in range [0.00 , 1.00]>
                // The front end will then be able to retrieve the most popular articles from the
                // database using the above info and display on the front page.

                // TF-IDF Similarity Ranking

                if (RankingType == RankingType_T.PopularitySimilarity)
                    catch (Exception)
                        Log.Error("[ERROR] TF-IDF Similarity Model initialization failed!");
                        goto CleanupCitations;
                    Log.Info("TF-IDF Similarity Model initialized successfully.");

                    RunSimilarityModel(citationModel, similarityModel);

                // cleanup and housekeeping
                Log.Info("Housekeeping tasks completed.\n");

                Log.Info("------- Ranker rerun completed. -------\n\n");