/// <summary> /// Calculates MAE on 10-star rating input data with feature info /// </summary> /// <returns> MAE of movies grouped by number of ratings given for them </returns> public Dictionary <string, double> GetRatingsToMaeOnFeaturePredictions() { var starRatingTrainTestSplittingMapping = RecommenderMappingFactory.GetStarsMapping(false); var trainSource = SplitInstanceSource.Training(RatingsPath); var testSource = SplitInstanceSource.Test(RatingsPath); Console.WriteLine($"Calculation of mean absolute error for movies with different numbers of ratings in the training set for data with feature info."); Rand.Restart(RandomSeed); var recommender = GetRecommender(starRatingTrainTestSplittingMapping, 16); recommender.Settings.Training.UseItemFeatures = true; recommender.Settings.Training.UseSharedUserThresholds = true; recommender.Settings.Training.Advanced.UserThresholdPriorVariance = 10; recommender.Train(trainSource); var distribution = recommender.PredictDistribution(testSource); var predictionError = PredictionError(testSource, starRatingTrainTestSplittingMapping, distribution); var ratingsNumToMae = CreateItemPopularityPredictions(trainSource, starRatingTrainTestSplittingMapping, predictionError); return(ratingsNumToMae); }
/// <summary> /// Predictions based on 10-star rating input data and features /// </summary> /// <param name="traitsCounts"> Number of item traits </param> /// <returns> Metrics </returns> public MetricValues PredictionsOnDataWithFeatures(IList <int> traitsCounts) { var starRatingTrainTestSplittingMapping = RecommenderMappingFactory.GetStarsMapping(true); var binaryRatingTrainTestSplittingMapping = RecommenderMappingFactory.BinarizeMapping(starRatingTrainTestSplittingMapping); var trainSource = SplitInstanceSource.Training(RatingsPath); var testSource = SplitInstanceSource.Test(RatingsPath); var binaryRatingEvaluator = new RecommenderEvaluator <SplitInstanceSource <string>, string, Movie, int, int, IDictionary <int, double> >(binaryRatingTrainTestSplittingMapping.ForEvaluation()); var starsRatingEvaluator = new RecommenderEvaluator <SplitInstanceSource <string>, string, Movie, int, int, IDictionary <int, double> >(starRatingTrainTestSplittingMapping.ForEvaluation()); var correctFractions = new Dictionary <string, double>(); var ndcgs = new Dictionary <string, double>(); var maes = new Dictionary <string, double>(); foreach (var traitCount in traitsCounts) { Console.WriteLine($"Running metrics calculation for data with features and a model with {traitCount} traits."); Rand.Restart(RandomSeed); var recommender = GetRecommender(starRatingTrainTestSplittingMapping, traitCount); recommender.Settings.Training.UseItemFeatures = true; recommender.Settings.Training.UseSharedUserThresholds = true; recommender.Settings.Training.Advanced.UserThresholdPriorVariance = 10; recommender.Train(trainSource); var distribution = recommender.PredictDistribution(testSource); var binarizedPredictions = BinarizePredictions(distribution); var predictions = recommender.Predict(testSource); var correctFraction = 1.0 - binaryRatingEvaluator.RatingPredictionMetric(testSource, binarizedPredictions, Metrics.ZeroOneError); correctFractions.Add(traitCount.ToString(), correctFraction); var itemRecommendationsForEvaluation = starsRatingEvaluator.RecommendRatedItems(recommender, testSource, 5, 5); var ndcg = starsRatingEvaluator.ItemRecommendationMetric(testSource, itemRecommendationsForEvaluation, Metrics.Ndcg); ndcgs.Add(traitCount.ToString(), ndcg); var mae = starsRatingEvaluator.RatingPredictionMetric(testSource, predictions, Metrics.AbsoluteError); //Divide maes by 2 to convert 10-star rating to 5-star rating maes.Add(traitCount.ToString(), mae / 2.0); } return(new MetricValues(correctFractions, ndcgs, maes)); }
/// <summary> /// Predictions based on like/dislike input data /// </summary> /// <param name="traitsCounts"> Number of item traits </param> /// <returns>A tuple of probability of like and metrics </returns> public (Dictionary <string, double[][]> likeProbability, MetricValues metricValues) PredictionsOnBinaryData( IList <int> traitsCounts ) { var starRatingTrainTestSplittingMapping = RecommenderMappingFactory.GetStarsMapping(true); var binaryRatingTrainTestSplittingMapping = RecommenderMappingFactory.BinarizeMapping(starRatingTrainTestSplittingMapping); var trainSource = SplitInstanceSource.Training(RatingsPath); var testSource = SplitInstanceSource.Test(RatingsPath); var binaryRatingEvaluator = new RecommenderEvaluator <SplitInstanceSource <string>, string, Movie, int, int, IDictionary <int, double> >(binaryRatingTrainTestSplittingMapping.ForEvaluation()); var starsRatingEvaluator = new RecommenderEvaluator <SplitInstanceSource <string>, string, Movie, int, int, IDictionary <int, double> >(starRatingTrainTestSplittingMapping.ForEvaluation()); var correctFractions = new Dictionary <string, double>(); var ndcgs = new Dictionary <string, double>(); var likeProbability = new Dictionary <string, double[][]>(); foreach (var traitCount in traitsCounts) { Console.WriteLine($"Running metrics calculation for binarized data and a model with {traitCount} traits."); Rand.Restart(RandomSeed); var recommender = GetRecommender(binaryRatingTrainTestSplittingMapping, traitCount); recommender.Settings.Training.Advanced.UserThresholdPriorVariance = EpsilonPriorVariance; recommender.Train(trainSource); var predictions = recommender.Predict(testSource); likeProbability.Add(traitCount.ToString(), GetLikeProbability(recommender.PredictDistribution(testSource))); var correctFraction = 1.0 - binaryRatingEvaluator.RatingPredictionMetric(testSource, predictions, Metrics.ZeroOneError); correctFractions.Add(traitCount.ToString(), correctFraction); var itemRecommendationsForEvaluation = starsRatingEvaluator.RecommendRatedItems(recommender, testSource, 5, 5); var ndcg = starsRatingEvaluator.ItemRecommendationMetric(testSource, itemRecommendationsForEvaluation, Metrics.Ndcg); ndcgs.Add(traitCount.ToString(), ndcg); } return(likeProbability, new MetricValues(correctFractions, ndcgs)); }
public ModelRunner(RecommenderMappingFactory recommenderMappingFactory, string ratingsPath) { RatingsPath = ratingsPath; RecommenderMappingFactory = recommenderMappingFactory; }
/// <summary> /// Predictions based on 10-star rating input data /// </summary> /// <param name="traitsCounts"> Number of item traits </param> /// <returns>A tuple of probability of thresholds posterior distributions, most probable ratings and metrics </returns> public (Dictionary <string, IDictionary <string, Gaussian> > posteriorDistributionsOfThresholds, Dictionary <string, double[][]> mostProbableRatings, MetricValues metricValues) PredictionsOnStarRatings( IList <int> traitsCounts ) { var starRatingTrainTestSplittingMapping = RecommenderMappingFactory.GetStarsMapping(true); var binaryRatingTrainTestSplittingMapping = RecommenderMappingFactory.BinarizeMapping(starRatingTrainTestSplittingMapping); var trainSource = SplitInstanceSource.Training(RatingsPath); var testSource = SplitInstanceSource.Test(RatingsPath); var binaryRatingEvaluator = new RecommenderEvaluator <SplitInstanceSource <string>, string, Movie, int, int, IDictionary <int, double> >(binaryRatingTrainTestSplittingMapping.ForEvaluation()); var starsRatingEvaluator = new RecommenderEvaluator <SplitInstanceSource <string>, string, Movie, int, int, IDictionary <int, double> >(starRatingTrainTestSplittingMapping.ForEvaluation()); var correctFractions = new Dictionary <string, double>(); var ndcgs = new Dictionary <string, double>(); var maes = new Dictionary <string, double>(); var mostProbableRatings = new Dictionary <string, double[][]>(); var posteriorDistributionsOfThresholds = new Dictionary <string, IDictionary <string, Gaussian> >(); foreach (var traitCount in traitsCounts) { Console.WriteLine($"Running metrics calculation for 10-star data and a model with {traitCount} traits."); Rand.Restart(RandomSeed); var recommender = GetRecommender(starRatingTrainTestSplittingMapping, traitCount); recommender.Settings.Training.UseSharedUserThresholds = true; recommender.Settings.Training.Advanced.UserThresholdPriorVariance = 10; recommender.Train(trainSource); var distributions = recommender.PredictDistribution(testSource); var predictions = recommender.Predict(testSource); mostProbableRatings.Add(traitCount.ToString(), GetJaggedDoubles(predictions.Select(userRating => userRating.Value.Select(movieRating => (double)movieRating.Value)))); var posteriorDistributionOfThresholds = recommender.GetPosteriorDistributions().Users.First().Value.Thresholds.ToList(); var posteriorDistributionOfThresholdsDict = BeautifyPosteriorDistribution(posteriorDistributionOfThresholds); posteriorDistributionsOfThresholds.Add(traitCount.ToString(), posteriorDistributionOfThresholdsDict); var binarizedPredictions = BinarizePredictions(distributions); var correctFraction = 1.0 - binaryRatingEvaluator.RatingPredictionMetric(testSource, binarizedPredictions, Metrics.ZeroOneError); correctFractions.Add(traitCount.ToString(), correctFraction); var itemRecommendationsForEvaluation = starsRatingEvaluator.RecommendRatedItems(recommender, testSource, 5, 5); var ndcg = starsRatingEvaluator.ItemRecommendationMetric(testSource, itemRecommendationsForEvaluation, Metrics.Ndcg); ndcgs.Add(traitCount.ToString(), ndcg); var mae = starsRatingEvaluator.RatingPredictionMetric(testSource, predictions, Metrics.AbsoluteError); //Divide maes by 2 to convert 10-star rating to 5-star rating maes.Add(traitCount.ToString(), mae / 2.0); } return(posteriorDistributionsOfThresholds, mostProbableRatings, new MetricValues(correctFractions, ndcgs, maes)); }
/// <summary> /// Forces ModelRunner to run experiments, takes its results and show them via outputter. /// </summary> /// <param name="outputter">A container for experiments output.</param> /// <param name="experimentRunType"> /// When set to <see cref="ExperimentRunType.FullRun"/>, inference is run to convergence, which gives the metrics shown in the book. /// When set to <see cref="ExperimentRunType.FastRun"/>, the number of iterations in inference is reduced to improve execution time, while still achieving reasonable accuracy numbers, and some of the trait counts are omitted. /// When set to <see cref="ExperimentRunType.TestRun"/>, the number of iterations in inference is reduced still, and even more of the trait counts are omitted. /// </param> public static void RunExperiments(Outputter outputter, ExperimentRunType experimentRunType) { // List containing numbers of traits to use in experiments. A separate set of experiments will be run for each number in the list. var traitCounts = experimentRunType == ExperimentRunType.FullRun ? new int[] { 0, 1, 2, 4, 8, 16 } : experimentRunType == ExperimentRunType.FastRun ? new int[] { 0, 1, 2, 4 } : new int[] { 0, 4 }; // experimentRunType == ExperimentRunType.TestRun var movies = GetMovies(); var recommenderMappingFactory = new RecommenderMappingFactory(movies); var modelRunner = new ModelRunner(recommenderMappingFactory, RatingsPath) { IterationCount = experimentRunType == ExperimentRunType.FullRun ? 200 : 30 }; #region Section3 Console.WriteLine($"\n{Contents.S3TrainingOurRecommender.NumberedName}.\n"); var(ratings, ratingsToStarsDistribution, rankToRatingsDistributions) = PriorRatings(movies); outputter.Out(ratings, Contents.S3TrainingOurRecommender.NumberedName, "Ratings"); outputter.Out(ratingsToStarsDistribution, Contents.S3TrainingOurRecommender.NumberedName, "The number of ratings given for each possible number of stars"); #endregion #region Section4 Console.WriteLine($"\n{Contents.S4OurFirstRecommendations.NumberedName}.\n"); outputter.Out(modelRunner.GetGroundTruth(recommenderMappingFactory.GetBinaryMapping(true)), Contents.S4OurFirstRecommendations.NumberedName, "Ground truth"); var(predictions, metricsOfPredictionsOnBinary) = modelRunner.PredictionsOnBinaryData(traitCounts); outputter.Out(predictions, Contents.S4OurFirstRecommendations.NumberedName, "Predictions"); outputter.Out(metricsOfPredictionsOnBinary.CorrectFractions, Contents.S4OurFirstRecommendations.NumberedName, "Fraction of predictions correct"); outputter.Out(metricsOfPredictionsOnBinary.Ndcgs, Contents.S4OurFirstRecommendations.NumberedName, "Average NDCG@5"); #endregion #region Section5 Console.WriteLine($"\n{Contents.S5ModellingStarRatings.NumberedName}.\n"); outputter.Out(modelRunner.GetGroundTruth(recommenderMappingFactory.GetStarsMapping(true)), Contents.S5ModellingStarRatings.NumberedName, "Ground truth"); var(posteriorDistributionsOfThresholds, predictionsOnStars, metricsOfPredictionsWithStars) = modelRunner.PredictionsOnStarRatings(traitCounts); var ratingsNumToMaeStars = modelRunner.GetRatingsNumToMaeOnStarsPredictions(); outputter.Out(posteriorDistributionsOfThresholds, Contents.S5ModellingStarRatings.NumberedName, "Posterior distributions for star ratings thresholds"); outputter.Out(predictionsOnStars, Contents.S5ModellingStarRatings.NumberedName, "Predictions"); var traitsToCorrectFractionSection5 = new Dictionary <string, IDictionary <string, double> >() { { "Initial", metricsOfPredictionsOnBinary.CorrectFractions }, { "With stars", metricsOfPredictionsWithStars.CorrectFractions } }; var traitCountToMaeSection5 = new Dictionary <string, IDictionary <string, double> >() { { "Initial", metricsOfPredictionsOnBinary.Ndcgs }, { "With stars", metricsOfPredictionsWithStars.Ndcgs } }; outputter.Out(traitsToCorrectFractionSection5, Contents.S5ModellingStarRatings.NumberedName, "Fraction of predictions correct"); outputter.Out(traitCountToMaeSection5, Contents.S5ModellingStarRatings.NumberedName, "Average NDCG@5"); outputter.Out(metricsOfPredictionsWithStars.Maes, Contents.S5ModellingStarRatings.NumberedName, "Mean absolute error (MAE)"); #endregion #region Section6 Console.WriteLine($"\n{Contents.S6AnotherColdStartProblem.NumberedName}.\n"); outputter.Out(rankToRatingsDistributions, Contents.S6AnotherColdStartProblem.NumberedName, "The number of ratings given for each movie in the data set as a whole. "); var metricsOfPredictionsWithFeatures = modelRunner.PredictionsOnDataWithFeatures(traitCounts); var ratingsNumToMaeFeatures = modelRunner.GetRatingsToMaeOnFeaturePredictions(); outputter.Out(ratingsNumToMaeStars, Contents.S6AnotherColdStartProblem.NumberedName, "MAE for movies with different numbers of ratings."); var ratingsNumToMae = new Dictionary <string, Dictionary <string, double> > { { "With stars", ratingsNumToMaeStars }, { "With stars and features", ratingsNumToMaeFeatures } }; outputter.Out(ratingsNumToMae, Contents.S6AnotherColdStartProblem.NumberedName, "MAE for movies with different numbers of ratings. A model including feature information."); var traitCountToMae = new Dictionary <string, IDictionary <string, double> >() { { "With stars", metricsOfPredictionsWithStars.Maes }, { "With stars and features", metricsOfPredictionsWithFeatures.Maes }, }; outputter.Out(traitCountToMae, Contents.S6AnotherColdStartProblem.NumberedName, "Mean absolute error (MAE)"); var traitCountToNdcg = new Dictionary <string, IDictionary <string, double> >() { { "Initial", metricsOfPredictionsOnBinary.Ndcgs }, { "With stars", metricsOfPredictionsWithStars.Ndcgs }, { "With stars and features", metricsOfPredictionsWithFeatures.Ndcgs }, }; outputter.Out(traitCountToNdcg, Contents.S6AnotherColdStartProblem.NumberedName, "Average NDCG@5"); #endregion Console.WriteLine("\nCompleted all experiments."); }