/// <summary> /// Calculates MAE on 10-star rating input data with feature info /// </summary> /// <returns> MAE of movies grouped by number of ratings given for them </returns> public Dictionary <string, double> GetRatingsToMaeOnFeaturePredictions() { var starRatingTrainTestSplittingMapping = RecommenderMappingFactory.GetStarsMapping(false); var trainSource = SplitInstanceSource.Training(RatingsPath); var testSource = SplitInstanceSource.Test(RatingsPath); Console.WriteLine($"Calculation of mean absolute error for movies with different numbers of ratings in the training set for data with feature info."); Rand.Restart(RandomSeed); var recommender = GetRecommender(starRatingTrainTestSplittingMapping, 16); recommender.Settings.Training.UseItemFeatures = true; recommender.Settings.Training.UseSharedUserThresholds = true; recommender.Settings.Training.Advanced.UserThresholdPriorVariance = 10; recommender.Train(trainSource); var distribution = recommender.PredictDistribution(testSource); var predictionError = PredictionError(testSource, starRatingTrainTestSplittingMapping, distribution); var ratingsNumToMae = CreateItemPopularityPredictions(trainSource, starRatingTrainTestSplittingMapping, predictionError); return(ratingsNumToMae); }
/// <summary> /// Runs the module. /// </summary> /// <param name="args">The command line arguments for the module.</param> /// <param name="usagePrefix">The prefix to print before the usage string.</param> /// <returns>True if the run was successful, false otherwise.</returns> public override bool Run(string[] args, string usagePrefix) { string inputDatasetFile = string.Empty; string outputTrainingDatasetFile = string.Empty; string outputTestDatasetFile = string.Empty; double trainingOnlyUserFraction = 0.5; double testUserRatingTrainingFraction = 0.25; double coldUserFraction = 0; double coldItemFraction = 0; double ignoredUserFraction = 0; double ignoredItemFraction = 0; bool removeOccasionalColdItems = false; var parser = new CommandLineParser(); parser.RegisterParameterHandler("--input-data", "FILE", "Dataset to split", v => inputDatasetFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--output-data-train", "FILE", "Training part of the split dataset", v => outputTrainingDatasetFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--output-data-test", "FILE", "Test part of the split dataset", v => outputTestDatasetFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--training-users", "NUM", "Fraction of training-only users; defaults to 0.5", (double v) => trainingOnlyUserFraction = v, CommandLineParameterType.Optional); parser.RegisterParameterHandler("--test-user-training-ratings", "NUM", "Fraction of test user ratings for training; defaults to 0.25", (double v) => testUserRatingTrainingFraction = v, CommandLineParameterType.Optional); parser.RegisterParameterHandler("--cold-users", "NUM", "Fraction of cold (test-only) users; defaults to 0", (double v) => coldUserFraction = v, CommandLineParameterType.Optional); parser.RegisterParameterHandler("--cold-items", "NUM", "Fraction of cold (test-only) items; defaults to 0", (double v) => coldItemFraction = v, CommandLineParameterType.Optional); parser.RegisterParameterHandler("--ignored-users", "NUM", "Fraction of ignored users; defaults to 0", (double v) => ignoredUserFraction = v, CommandLineParameterType.Optional); parser.RegisterParameterHandler("--ignored-items", "NUM", "Fraction of ignored items; defaults to 0", (double v) => ignoredItemFraction = v, CommandLineParameterType.Optional); parser.RegisterParameterHandler("--remove-occasional-cold-items", "Remove occasionally produced cold items", () => removeOccasionalColdItems = true); if (!parser.TryParse(args, usagePrefix)) { return(false); } var splittingMapping = Mappings.StarRatingRecommender.SplitToTrainTest( trainingOnlyUserFraction, testUserRatingTrainingFraction, coldUserFraction, coldItemFraction, ignoredUserFraction, ignoredItemFraction, removeOccasionalColdItems); var inputDataset = RecommenderDataset.Load(inputDatasetFile); var outputTrainingDataset = new RecommenderDataset( splittingMapping.GetInstances(SplitInstanceSource.Training(inputDataset)), inputDataset.StarRatingInfo); outputTrainingDataset.Save(outputTrainingDatasetFile); var outputTestDataset = new RecommenderDataset( splittingMapping.GetInstances(SplitInstanceSource.Test(inputDataset)), inputDataset.StarRatingInfo); outputTestDataset.Save(outputTestDatasetFile); return(true); }
/// <summary> /// Predictions based on 10-star rating input data and features /// </summary> /// <param name="traitsCounts"> Number of item traits </param> /// <returns> Metrics </returns> public MetricValues PredictionsOnDataWithFeatures(IList <int> traitsCounts) { var starRatingTrainTestSplittingMapping = RecommenderMappingFactory.GetStarsMapping(true); var binaryRatingTrainTestSplittingMapping = RecommenderMappingFactory.BinarizeMapping(starRatingTrainTestSplittingMapping); var trainSource = SplitInstanceSource.Training(RatingsPath); var testSource = SplitInstanceSource.Test(RatingsPath); var binaryRatingEvaluator = new RecommenderEvaluator <SplitInstanceSource <string>, string, Movie, int, int, IDictionary <int, double> >(binaryRatingTrainTestSplittingMapping.ForEvaluation()); var starsRatingEvaluator = new RecommenderEvaluator <SplitInstanceSource <string>, string, Movie, int, int, IDictionary <int, double> >(starRatingTrainTestSplittingMapping.ForEvaluation()); var correctFractions = new Dictionary <string, double>(); var ndcgs = new Dictionary <string, double>(); var maes = new Dictionary <string, double>(); foreach (var traitCount in traitsCounts) { Console.WriteLine($"Running metrics calculation for data with features and a model with {traitCount} traits."); Rand.Restart(RandomSeed); var recommender = GetRecommender(starRatingTrainTestSplittingMapping, traitCount); recommender.Settings.Training.UseItemFeatures = true; recommender.Settings.Training.UseSharedUserThresholds = true; recommender.Settings.Training.Advanced.UserThresholdPriorVariance = 10; recommender.Train(trainSource); var distribution = recommender.PredictDistribution(testSource); var binarizedPredictions = BinarizePredictions(distribution); var predictions = recommender.Predict(testSource); var correctFraction = 1.0 - binaryRatingEvaluator.RatingPredictionMetric(testSource, binarizedPredictions, Metrics.ZeroOneError); correctFractions.Add(traitCount.ToString(), correctFraction); var itemRecommendationsForEvaluation = starsRatingEvaluator.RecommendRatedItems(recommender, testSource, 5, 5); var ndcg = starsRatingEvaluator.ItemRecommendationMetric(testSource, itemRecommendationsForEvaluation, Metrics.Ndcg); ndcgs.Add(traitCount.ToString(), ndcg); var mae = starsRatingEvaluator.RatingPredictionMetric(testSource, predictions, Metrics.AbsoluteError); //Divide maes by 2 to convert 10-star rating to 5-star rating maes.Add(traitCount.ToString(), mae / 2.0); } return(new MetricValues(correctFractions, ndcgs, maes)); }
/// <summary> /// Takes test data from data source and represents it in the form of jagged array. /// The first dimension represents users, the second dimension represents movies. /// </summary> /// <param name="mapping">A mapping to convert ratings to a scale used exactly in the current experiment. </param> /// <returns></returns> public double[][] GetGroundTruth( IStarRatingRecommenderMapping <SplitInstanceSource <string>, RatingTriple, string, Movie, int, NoFeatureSource, Vector> mapping ) { Rand.Restart(RandomSeed); var testSource = SplitInstanceSource.Test(RatingsPath); var mappingForEvaluation = mapping.ForEvaluation(); var users = mappingForEvaluation.GetUsers(testSource); var ratings = users.Select(u => mappingForEvaluation.GetItemsRatedByUser(testSource, u) .Select(m => (double)mappingForEvaluation.GetRating(testSource, u, m))); var groundTruthArray = GetJaggedDoubles(ratings); return(groundTruthArray); }
/// <summary> /// Predictions based on like/dislike input data /// </summary> /// <param name="traitsCounts"> Number of item traits </param> /// <returns>A tuple of probability of like and metrics </returns> public (Dictionary <string, double[][]> likeProbability, MetricValues metricValues) PredictionsOnBinaryData( IList <int> traitsCounts ) { var starRatingTrainTestSplittingMapping = RecommenderMappingFactory.GetStarsMapping(true); var binaryRatingTrainTestSplittingMapping = RecommenderMappingFactory.BinarizeMapping(starRatingTrainTestSplittingMapping); var trainSource = SplitInstanceSource.Training(RatingsPath); var testSource = SplitInstanceSource.Test(RatingsPath); var binaryRatingEvaluator = new RecommenderEvaluator <SplitInstanceSource <string>, string, Movie, int, int, IDictionary <int, double> >(binaryRatingTrainTestSplittingMapping.ForEvaluation()); var starsRatingEvaluator = new RecommenderEvaluator <SplitInstanceSource <string>, string, Movie, int, int, IDictionary <int, double> >(starRatingTrainTestSplittingMapping.ForEvaluation()); var correctFractions = new Dictionary <string, double>(); var ndcgs = new Dictionary <string, double>(); var likeProbability = new Dictionary <string, double[][]>(); foreach (var traitCount in traitsCounts) { Console.WriteLine($"Running metrics calculation for binarized data and a model with {traitCount} traits."); Rand.Restart(RandomSeed); var recommender = GetRecommender(binaryRatingTrainTestSplittingMapping, traitCount); recommender.Settings.Training.Advanced.UserThresholdPriorVariance = EpsilonPriorVariance; recommender.Train(trainSource); var predictions = recommender.Predict(testSource); likeProbability.Add(traitCount.ToString(), GetLikeProbability(recommender.PredictDistribution(testSource))); var correctFraction = 1.0 - binaryRatingEvaluator.RatingPredictionMetric(testSource, predictions, Metrics.ZeroOneError); correctFractions.Add(traitCount.ToString(), correctFraction); var itemRecommendationsForEvaluation = starsRatingEvaluator.RecommendRatedItems(recommender, testSource, 5, 5); var ndcg = starsRatingEvaluator.ItemRecommendationMetric(testSource, itemRecommendationsForEvaluation, Metrics.Ndcg); ndcgs.Add(traitCount.ToString(), ndcg); } return(likeProbability, new MetricValues(correctFractions, ndcgs)); }
/// <summary> /// Generates a random dataset of the specified size, splits it as requested and checks the correctness of the resulting split. /// </summary> /// <param name="userCount">The number of users in the dataset.</param> /// <param name="itemCount">The number of items in the dataset.</param> /// <param name="sparsity">The probability of a random item to be rated by a random user.</param> /// <param name="trainingOnlyUserFraction">The fraction of users presented only in the training set.</param> /// <param name="testUserTrainingRatingFraction">The fraction of ratings in the training set for each user who is presented in both sets.</param> /// <param name="coldUserFraction">The fraction of users presented only in test set.</param> /// <param name="coldItemFraction">The fraction of items presented only in test set.</param> /// <param name="ignoredUserFraction">The fraction of users not presented in any of the sets.</param> /// <param name="ignoredItemFraction">The fraction of items not presented in any of the sets.</param> /// <param name="removeOccasionalColdItems">Specifies whether the occasionally produced cold items should be removed from the test set.</param> /// <returns>A triple containing the generated dataset, the training subset, and the test subset.</returns> private static Tuple <Dataset, Dataset, Dataset> TestSplittingHelper( int userCount, int itemCount, double sparsity, double trainingOnlyUserFraction, double testUserTrainingRatingFraction, double coldUserFraction, double coldItemFraction, double ignoredUserFraction, double ignoredItemFraction, bool removeOccasionalColdItems) { Dataset dataset = GenerateDataset(userCount, itemCount, sparsity); var mapping = new Mapping(); var splittingMapping = mapping.SplitToTrainTest( trainingOnlyUserFraction, testUserTrainingRatingFraction, coldUserFraction, coldItemFraction, ignoredUserFraction, ignoredItemFraction, removeOccasionalColdItems); Dataset trainingDataset = splittingMapping.GetInstances(SplitInstanceSource.Training(dataset)); Dataset testDataset = splittingMapping.GetInstances(SplitInstanceSource.Test(dataset)); CheckDatasetSplitCorrectness( dataset, trainingDataset, testDataset, coldUserFraction > 0, coldItemFraction > 0, ignoredUserFraction > 0, ignoredItemFraction > 0, removeOccasionalColdItems); return(Tuple.Create(dataset, trainingDataset, testDataset)); }
/// <summary> /// Predictions based on 10-star rating input data /// </summary> /// <param name="traitsCounts"> Number of item traits </param> /// <returns>A tuple of probability of thresholds posterior distributions, most probable ratings and metrics </returns> public (Dictionary <string, IDictionary <string, Gaussian> > posteriorDistributionsOfThresholds, Dictionary <string, double[][]> mostProbableRatings, MetricValues metricValues) PredictionsOnStarRatings( IList <int> traitsCounts ) { var starRatingTrainTestSplittingMapping = RecommenderMappingFactory.GetStarsMapping(true); var binaryRatingTrainTestSplittingMapping = RecommenderMappingFactory.BinarizeMapping(starRatingTrainTestSplittingMapping); var trainSource = SplitInstanceSource.Training(RatingsPath); var testSource = SplitInstanceSource.Test(RatingsPath); var binaryRatingEvaluator = new RecommenderEvaluator <SplitInstanceSource <string>, string, Movie, int, int, IDictionary <int, double> >(binaryRatingTrainTestSplittingMapping.ForEvaluation()); var starsRatingEvaluator = new RecommenderEvaluator <SplitInstanceSource <string>, string, Movie, int, int, IDictionary <int, double> >(starRatingTrainTestSplittingMapping.ForEvaluation()); var correctFractions = new Dictionary <string, double>(); var ndcgs = new Dictionary <string, double>(); var maes = new Dictionary <string, double>(); var mostProbableRatings = new Dictionary <string, double[][]>(); var posteriorDistributionsOfThresholds = new Dictionary <string, IDictionary <string, Gaussian> >(); foreach (var traitCount in traitsCounts) { Console.WriteLine($"Running metrics calculation for 10-star data and a model with {traitCount} traits."); Rand.Restart(RandomSeed); var recommender = GetRecommender(starRatingTrainTestSplittingMapping, traitCount); recommender.Settings.Training.UseSharedUserThresholds = true; recommender.Settings.Training.Advanced.UserThresholdPriorVariance = 10; recommender.Train(trainSource); var distributions = recommender.PredictDistribution(testSource); var predictions = recommender.Predict(testSource); mostProbableRatings.Add(traitCount.ToString(), GetJaggedDoubles(predictions.Select(userRating => userRating.Value.Select(movieRating => (double)movieRating.Value)))); var posteriorDistributionOfThresholds = recommender.GetPosteriorDistributions().Users.First().Value.Thresholds.ToList(); var posteriorDistributionOfThresholdsDict = BeautifyPosteriorDistribution(posteriorDistributionOfThresholds); posteriorDistributionsOfThresholds.Add(traitCount.ToString(), posteriorDistributionOfThresholdsDict); var binarizedPredictions = BinarizePredictions(distributions); var correctFraction = 1.0 - binaryRatingEvaluator.RatingPredictionMetric(testSource, binarizedPredictions, Metrics.ZeroOneError); correctFractions.Add(traitCount.ToString(), correctFraction); var itemRecommendationsForEvaluation = starsRatingEvaluator.RecommendRatedItems(recommender, testSource, 5, 5); var ndcg = starsRatingEvaluator.ItemRecommendationMetric(testSource, itemRecommendationsForEvaluation, Metrics.Ndcg); ndcgs.Add(traitCount.ToString(), ndcg); var mae = starsRatingEvaluator.RatingPredictionMetric(testSource, predictions, Metrics.AbsoluteError); //Divide maes by 2 to convert 10-star rating to 5-star rating maes.Add(traitCount.ToString(), mae / 2.0); } return(posteriorDistributionsOfThresholds, mostProbableRatings, new MetricValues(correctFractions, ndcgs, maes)); }
/// <summary> /// Executes the test for a given recommender under a specified name. /// </summary> public void Execute() { // Report that the run has been started if (this.Started != null) { this.Started(this, EventArgs.Empty); } try { Rand.Restart(1984); // Run should produce the same results every time TimeSpan totalTrainingTime = TimeSpan.Zero; TimeSpan totalPredictionTime = TimeSpan.Zero; TimeSpan totalEvaluationTime = TimeSpan.Zero; Stopwatch totalTimer = Stopwatch.StartNew(); MetricValueDistributionCollection metrics = null; for (int i = 0; i < this.FoldCount; ++i) { // Start timer measuring total time spent on this fold Stopwatch totalFoldTimer = Stopwatch.StartNew(); SplittingMapping splittingMapping = this.SplittingMappingFactory(); Recommender recommender = this.RecommenderFactory(splittingMapping); Evaluator evaluator = new Evaluator(new EvaluatorMapping(splittingMapping)); // Train the recommender Stopwatch foldTrainingTimer = Stopwatch.StartNew(); recommender.Train(SplitInstanceSource.Training(this.RecommenderDataset)); TimeSpan foldTrainingTime = foldTrainingTimer.Elapsed; // Run each test on the trained recommender var foldMetrics = new MetricValueDistributionCollection(); TimeSpan foldPredictionTime = TimeSpan.Zero; TimeSpan foldEvaluationTime = TimeSpan.Zero; foreach (RecommenderTest test in this.Tests) { // Perform the test TimeSpan testPredictionTime, testEvaluationTime; MetricValueDistributionCollection testMetrics; test.Execute( recommender, evaluator, SplitInstanceSource.Test(this.RecommenderDataset), out testPredictionTime, out testEvaluationTime, out testMetrics); // Merge the timings and the metrics foldPredictionTime += testPredictionTime; foldEvaluationTime += testEvaluationTime; foldMetrics.SetToUnionWith(testMetrics); } // Stop timer measuring total time spent on this fold TimeSpan totalFoldTime = totalFoldTimer.Elapsed; // Report that the fold has been processed if (this.FoldProcessed != null) { this.FoldProcessed( this, new RecommenderRunFoldProcessedEventArgs(i, totalFoldTime, foldTrainingTime, foldPredictionTime, foldEvaluationTime, foldMetrics)); } // Merge the timings totalTrainingTime += foldTrainingTime; totalPredictionTime += foldPredictionTime; totalEvaluationTime += foldEvaluationTime; // Merge the metrics if (metrics == null) { metrics = foldMetrics; } else { metrics.MergeWith(foldMetrics); } } // Report that the run has been completed TimeSpan totalTime = totalTimer.Elapsed; if (this.Completed != null) { this.Completed( this, new RecommenderRunCompletedEventArgs(totalTime, totalTrainingTime, totalPredictionTime, totalEvaluationTime, metrics)); } } catch (Exception e) { if (this.Interrupted != null) { this.Interrupted(this, new RecommenderRunInterruptedEventArgs(e)); } } }