Ejemplo n.º 1
0
        /// <summary>
        /// Calculates MAE on 10-star rating input data with feature info
        /// </summary>
        /// <returns> MAE of movies grouped by number of ratings given for them </returns>
        public Dictionary <string, double> GetRatingsToMaeOnFeaturePredictions()
        {
            var starRatingTrainTestSplittingMapping = RecommenderMappingFactory.GetStarsMapping(false);

            var trainSource = SplitInstanceSource.Training(RatingsPath);
            var testSource  = SplitInstanceSource.Test(RatingsPath);

            Console.WriteLine($"Calculation of mean absolute error for movies with different numbers of ratings in the training set for data with feature info.");

            Rand.Restart(RandomSeed);

            var recommender = GetRecommender(starRatingTrainTestSplittingMapping, 16);

            recommender.Settings.Training.UseItemFeatures                     = true;
            recommender.Settings.Training.UseSharedUserThresholds             = true;
            recommender.Settings.Training.Advanced.UserThresholdPriorVariance = 10;

            recommender.Train(trainSource);

            var distribution = recommender.PredictDistribution(testSource);

            var predictionError = PredictionError(testSource, starRatingTrainTestSplittingMapping, distribution);
            var ratingsNumToMae = CreateItemPopularityPredictions(trainSource, starRatingTrainTestSplittingMapping, predictionError);

            return(ratingsNumToMae);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Predictions based on 10-star rating input data and features
        /// </summary>
        /// <param name="traitsCounts"> Number of item traits </param>
        /// <returns> Metrics </returns>
        public MetricValues PredictionsOnDataWithFeatures(IList <int> traitsCounts)
        {
            var starRatingTrainTestSplittingMapping   = RecommenderMappingFactory.GetStarsMapping(true);
            var binaryRatingTrainTestSplittingMapping = RecommenderMappingFactory.BinarizeMapping(starRatingTrainTestSplittingMapping);

            var trainSource = SplitInstanceSource.Training(RatingsPath);
            var testSource  = SplitInstanceSource.Test(RatingsPath);

            var binaryRatingEvaluator = new RecommenderEvaluator <SplitInstanceSource <string>, string, Movie, int, int, IDictionary <int, double> >(binaryRatingTrainTestSplittingMapping.ForEvaluation());
            var starsRatingEvaluator  = new RecommenderEvaluator <SplitInstanceSource <string>, string, Movie, int, int, IDictionary <int, double> >(starRatingTrainTestSplittingMapping.ForEvaluation());

            var correctFractions = new Dictionary <string, double>();
            var ndcgs            = new Dictionary <string, double>();
            var maes             = new Dictionary <string, double>();

            foreach (var traitCount in traitsCounts)
            {
                Console.WriteLine($"Running metrics calculation for data with features and a model with {traitCount} traits.");

                Rand.Restart(RandomSeed);

                var recommender = GetRecommender(starRatingTrainTestSplittingMapping, traitCount);

                recommender.Settings.Training.UseItemFeatures                     = true;
                recommender.Settings.Training.UseSharedUserThresholds             = true;
                recommender.Settings.Training.Advanced.UserThresholdPriorVariance = 10;

                recommender.Train(trainSource);

                var distribution = recommender.PredictDistribution(testSource);

                var binarizedPredictions = BinarizePredictions(distribution);

                var predictions = recommender.Predict(testSource);

                var correctFraction = 1.0 - binaryRatingEvaluator.RatingPredictionMetric(testSource, binarizedPredictions, Metrics.ZeroOneError);
                correctFractions.Add(traitCount.ToString(), correctFraction);

                var itemRecommendationsForEvaluation = starsRatingEvaluator.RecommendRatedItems(recommender, testSource, 5, 5);
                var ndcg = starsRatingEvaluator.ItemRecommendationMetric(testSource, itemRecommendationsForEvaluation, Metrics.Ndcg);
                ndcgs.Add(traitCount.ToString(), ndcg);
                var mae = starsRatingEvaluator.RatingPredictionMetric(testSource, predictions, Metrics.AbsoluteError);
                //Divide maes by 2 to convert 10-star rating to 5-star rating
                maes.Add(traitCount.ToString(), mae / 2.0);
            }

            return(new MetricValues(correctFractions, ndcgs, maes));
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Predictions based on like/dislike input data
        /// </summary>
        /// <param name="traitsCounts"> Number of item traits </param>
        /// <returns>A tuple of probability of like and metrics </returns>
        public (Dictionary <string, double[][]> likeProbability, MetricValues metricValues) PredictionsOnBinaryData(
            IList <int> traitsCounts
            )
        {
            var starRatingTrainTestSplittingMapping   = RecommenderMappingFactory.GetStarsMapping(true);
            var binaryRatingTrainTestSplittingMapping = RecommenderMappingFactory.BinarizeMapping(starRatingTrainTestSplittingMapping);

            var trainSource = SplitInstanceSource.Training(RatingsPath);
            var testSource  = SplitInstanceSource.Test(RatingsPath);

            var binaryRatingEvaluator = new RecommenderEvaluator <SplitInstanceSource <string>, string, Movie, int, int, IDictionary <int, double> >(binaryRatingTrainTestSplittingMapping.ForEvaluation());
            var starsRatingEvaluator  = new RecommenderEvaluator <SplitInstanceSource <string>, string, Movie, int, int, IDictionary <int, double> >(starRatingTrainTestSplittingMapping.ForEvaluation());

            var correctFractions = new Dictionary <string, double>();
            var ndcgs            = new Dictionary <string, double>();
            var likeProbability  = new Dictionary <string, double[][]>();

            foreach (var traitCount in traitsCounts)
            {
                Console.WriteLine($"Running metrics calculation for binarized data and a model with {traitCount} traits.");

                Rand.Restart(RandomSeed);

                var recommender = GetRecommender(binaryRatingTrainTestSplittingMapping, traitCount);

                recommender.Settings.Training.Advanced.UserThresholdPriorVariance = EpsilonPriorVariance;

                recommender.Train(trainSource);

                var predictions = recommender.Predict(testSource);

                likeProbability.Add(traitCount.ToString(), GetLikeProbability(recommender.PredictDistribution(testSource)));

                var correctFraction = 1.0 - binaryRatingEvaluator.RatingPredictionMetric(testSource, predictions, Metrics.ZeroOneError);
                correctFractions.Add(traitCount.ToString(), correctFraction);

                var itemRecommendationsForEvaluation = starsRatingEvaluator.RecommendRatedItems(recommender, testSource, 5, 5);
                var ndcg = starsRatingEvaluator.ItemRecommendationMetric(testSource, itemRecommendationsForEvaluation, Metrics.Ndcg);
                ndcgs.Add(traitCount.ToString(), ndcg);
            }

            return(likeProbability, new MetricValues(correctFractions, ndcgs));
        }
Ejemplo n.º 4
0
 public ModelRunner(RecommenderMappingFactory recommenderMappingFactory, string ratingsPath)
 {
     RatingsPath = ratingsPath;
     RecommenderMappingFactory = recommenderMappingFactory;
 }
Ejemplo n.º 5
0
        /// <summary>
        /// Predictions based on 10-star rating input data
        /// </summary>
        /// <param name="traitsCounts"> Number of item traits </param>
        /// <returns>A tuple of probability of thresholds posterior distributions, most probable ratings and metrics </returns>
        public (Dictionary <string, IDictionary <string, Gaussian> > posteriorDistributionsOfThresholds, Dictionary <string, double[][]> mostProbableRatings, MetricValues metricValues) PredictionsOnStarRatings(
            IList <int> traitsCounts
            )
        {
            var starRatingTrainTestSplittingMapping   = RecommenderMappingFactory.GetStarsMapping(true);
            var binaryRatingTrainTestSplittingMapping = RecommenderMappingFactory.BinarizeMapping(starRatingTrainTestSplittingMapping);

            var trainSource = SplitInstanceSource.Training(RatingsPath);
            var testSource  = SplitInstanceSource.Test(RatingsPath);

            var binaryRatingEvaluator = new RecommenderEvaluator <SplitInstanceSource <string>, string, Movie, int, int, IDictionary <int, double> >(binaryRatingTrainTestSplittingMapping.ForEvaluation());
            var starsRatingEvaluator  = new RecommenderEvaluator <SplitInstanceSource <string>, string, Movie, int, int, IDictionary <int, double> >(starRatingTrainTestSplittingMapping.ForEvaluation());

            var correctFractions = new Dictionary <string, double>();
            var ndcgs            = new Dictionary <string, double>();
            var maes             = new Dictionary <string, double>();

            var mostProbableRatings = new Dictionary <string, double[][]>();
            var posteriorDistributionsOfThresholds = new Dictionary <string, IDictionary <string, Gaussian> >();

            foreach (var traitCount in traitsCounts)
            {
                Console.WriteLine($"Running metrics calculation for 10-star data and a model with {traitCount} traits.");

                Rand.Restart(RandomSeed);

                var recommender = GetRecommender(starRatingTrainTestSplittingMapping, traitCount);

                recommender.Settings.Training.UseSharedUserThresholds             = true;
                recommender.Settings.Training.Advanced.UserThresholdPriorVariance = 10;

                recommender.Train(trainSource);

                var distributions = recommender.PredictDistribution(testSource);

                var predictions = recommender.Predict(testSource);

                mostProbableRatings.Add(traitCount.ToString(),
                                        GetJaggedDoubles(predictions.Select(userRating =>
                                                                            userRating.Value.Select(movieRating => (double)movieRating.Value))));

                var posteriorDistributionOfThresholds     = recommender.GetPosteriorDistributions().Users.First().Value.Thresholds.ToList();
                var posteriorDistributionOfThresholdsDict = BeautifyPosteriorDistribution(posteriorDistributionOfThresholds);

                posteriorDistributionsOfThresholds.Add(traitCount.ToString(), posteriorDistributionOfThresholdsDict);

                var binarizedPredictions = BinarizePredictions(distributions);

                var correctFraction = 1.0 - binaryRatingEvaluator.RatingPredictionMetric(testSource, binarizedPredictions, Metrics.ZeroOneError);
                correctFractions.Add(traitCount.ToString(), correctFraction);

                var itemRecommendationsForEvaluation = starsRatingEvaluator.RecommendRatedItems(recommender, testSource, 5, 5);
                var ndcg = starsRatingEvaluator.ItemRecommendationMetric(testSource, itemRecommendationsForEvaluation, Metrics.Ndcg);
                ndcgs.Add(traitCount.ToString(), ndcg);
                var mae = starsRatingEvaluator.RatingPredictionMetric(testSource, predictions, Metrics.AbsoluteError);
                //Divide maes by 2 to convert 10-star rating to 5-star rating
                maes.Add(traitCount.ToString(), mae / 2.0);
            }

            return(posteriorDistributionsOfThresholds, mostProbableRatings,
                   new MetricValues(correctFractions, ndcgs, maes));
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Forces ModelRunner to run experiments, takes its results and show them via outputter.
        /// </summary>
        /// <param name="outputter">A container for experiments output.</param>
        /// <param name="experimentRunType">
        /// When set to <see cref="ExperimentRunType.FullRun"/>, inference is run to convergence, which gives the metrics shown in the book.
        /// When set to <see cref="ExperimentRunType.FastRun"/>, the number of iterations in inference is reduced to improve execution time, while still achieving reasonable accuracy numbers, and some of the trait counts are omitted.
        /// When set to <see cref="ExperimentRunType.TestRun"/>, the number of iterations in inference is reduced still, and even more of the trait counts are omitted.
        /// </param>
        public static void RunExperiments(Outputter outputter, ExperimentRunType experimentRunType)
        {
            // List containing numbers of traits to use in experiments. A separate set of experiments will be run for each number in the list.
            var traitCounts =
                experimentRunType == ExperimentRunType.FullRun ? new int[] { 0, 1, 2, 4, 8, 16 }
                : experimentRunType == ExperimentRunType.FastRun ? new int[] { 0, 1, 2, 4 }
                : new int[] { 0, 4 }; // experimentRunType == ExperimentRunType.TestRun
            var movies = GetMovies();
            var recommenderMappingFactory = new RecommenderMappingFactory(movies);

            var modelRunner = new ModelRunner(recommenderMappingFactory, RatingsPath)
            {
                IterationCount = experimentRunType == ExperimentRunType.FullRun ? 200 : 30
            };

            #region Section3

            Console.WriteLine($"\n{Contents.S3TrainingOurRecommender.NumberedName}.\n");

            var(ratings, ratingsToStarsDistribution, rankToRatingsDistributions) = PriorRatings(movies);
            outputter.Out(ratings, Contents.S3TrainingOurRecommender.NumberedName, "Ratings");
            outputter.Out(ratingsToStarsDistribution,
                          Contents.S3TrainingOurRecommender.NumberedName, "The number of ratings given for each possible number of stars");

            #endregion

            #region Section4

            Console.WriteLine($"\n{Contents.S4OurFirstRecommendations.NumberedName}.\n");

            outputter.Out(modelRunner.GetGroundTruth(recommenderMappingFactory.GetBinaryMapping(true)),
                          Contents.S4OurFirstRecommendations.NumberedName, "Ground truth");

            var(predictions, metricsOfPredictionsOnBinary) = modelRunner.PredictionsOnBinaryData(traitCounts);

            outputter.Out(predictions, Contents.S4OurFirstRecommendations.NumberedName, "Predictions");

            outputter.Out(metricsOfPredictionsOnBinary.CorrectFractions,
                          Contents.S4OurFirstRecommendations.NumberedName,
                          "Fraction of predictions correct");

            outputter.Out(metricsOfPredictionsOnBinary.Ndcgs,
                          Contents.S4OurFirstRecommendations.NumberedName,
                          "Average NDCG@5");

            #endregion

            #region Section5

            Console.WriteLine($"\n{Contents.S5ModellingStarRatings.NumberedName}.\n");

            outputter.Out(modelRunner.GetGroundTruth(recommenderMappingFactory.GetStarsMapping(true)),
                          Contents.S5ModellingStarRatings.NumberedName, "Ground truth");

            var(posteriorDistributionsOfThresholds, predictionsOnStars, metricsOfPredictionsWithStars) =
                modelRunner.PredictionsOnStarRatings(traitCounts);

            var ratingsNumToMaeStars = modelRunner.GetRatingsNumToMaeOnStarsPredictions();

            outputter.Out(posteriorDistributionsOfThresholds, Contents.S5ModellingStarRatings.NumberedName, "Posterior distributions for star ratings thresholds");

            outputter.Out(predictionsOnStars, Contents.S5ModellingStarRatings.NumberedName, "Predictions");

            var traitsToCorrectFractionSection5 = new Dictionary <string, IDictionary <string, double> >()
            {
                { "Initial", metricsOfPredictionsOnBinary.CorrectFractions },
                { "With stars", metricsOfPredictionsWithStars.CorrectFractions }
            };

            var traitCountToMaeSection5 = new Dictionary <string, IDictionary <string, double> >()
            {
                { "Initial", metricsOfPredictionsOnBinary.Ndcgs },
                { "With stars", metricsOfPredictionsWithStars.Ndcgs }
            };

            outputter.Out(traitsToCorrectFractionSection5, Contents.S5ModellingStarRatings.NumberedName,
                          "Fraction of predictions correct");

            outputter.Out(traitCountToMaeSection5, Contents.S5ModellingStarRatings.NumberedName,
                          "Average NDCG@5");

            outputter.Out(metricsOfPredictionsWithStars.Maes, Contents.S5ModellingStarRatings.NumberedName,
                          "Mean absolute error (MAE)");

            #endregion

            #region Section6

            Console.WriteLine($"\n{Contents.S6AnotherColdStartProblem.NumberedName}.\n");

            outputter.Out(rankToRatingsDistributions,
                          Contents.S6AnotherColdStartProblem.NumberedName, "The number of ratings given for each movie in the data set as a whole. ");

            var metricsOfPredictionsWithFeatures = modelRunner.PredictionsOnDataWithFeatures(traitCounts);

            var ratingsNumToMaeFeatures = modelRunner.GetRatingsToMaeOnFeaturePredictions();

            outputter.Out(ratingsNumToMaeStars, Contents.S6AnotherColdStartProblem.NumberedName,
                          "MAE for movies with different numbers of ratings.");

            var ratingsNumToMae = new Dictionary <string, Dictionary <string, double> >
            {
                { "With stars", ratingsNumToMaeStars },
                { "With stars and features", ratingsNumToMaeFeatures }
            };

            outputter.Out(ratingsNumToMae, Contents.S6AnotherColdStartProblem.NumberedName,
                          "MAE for movies with different numbers of ratings. A model including feature information.");

            var traitCountToMae = new Dictionary <string, IDictionary <string, double> >()
            {
                { "With stars", metricsOfPredictionsWithStars.Maes },
                { "With stars and features", metricsOfPredictionsWithFeatures.Maes },
            };

            outputter.Out(traitCountToMae, Contents.S6AnotherColdStartProblem.NumberedName,
                          "Mean absolute error (MAE)");

            var traitCountToNdcg = new Dictionary <string, IDictionary <string, double> >()
            {
                { "Initial", metricsOfPredictionsOnBinary.Ndcgs },
                { "With stars", metricsOfPredictionsWithStars.Ndcgs },
                { "With stars and features", metricsOfPredictionsWithFeatures.Ndcgs },
            };

            outputter.Out(traitCountToNdcg, Contents.S6AnotherColdStartProblem.NumberedName,
                          "Average NDCG@5");

            #endregion

            Console.WriteLine("\nCompleted all experiments.");
        }