/// <summary> /// Runs the module. /// </summary> /// <param name="args">The command line arguments for the module.</param> /// <param name="usagePrefix">The prefix to print before the usage string.</param> /// <returns>True if the run was successful, false otherwise.</returns> public override bool Run(string[] args, string usagePrefix) { string datasetFile = string.Empty; string trainedModelFile = string.Empty; string predictionsFile = string.Empty; int maxRelatedItemCount = 5; int minCommonRatingCount = 5; int minRelatedItemPoolSize = 5; var parser = new CommandLineParser(); parser.RegisterParameterHandler("--data", "FILE", "Dataset to make predictions for", v => datasetFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--model", "FILE", "File with trained model", v => trainedModelFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--predictions", "FILE", "File with generated predictions", v => predictionsFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--max-items", "NUM", "Maximum number of related items for a single item; defaults to 5", v => maxRelatedItemCount = v, CommandLineParameterType.Optional); parser.RegisterParameterHandler("--min-common-users", "NUM", "Minimum number of users that the query item and the related item should have been rated by in common; defaults to 5", v => minCommonRatingCount = v, CommandLineParameterType.Optional); parser.RegisterParameterHandler("--min-pool-size", "NUM", "Minimum size of the related item pool for a single item; defaults to 5", v => minRelatedItemPoolSize = v, CommandLineParameterType.Optional); if (!parser.TryParse(args, usagePrefix)) { return(false); } RecommenderDataset testDataset = RecommenderDataset.Load(datasetFile); var trainedModel = MatchboxRecommender.Load <RecommenderDataset, User, Item, RatingDistribution, DummyFeatureSource>(trainedModelFile); var evaluator = new RecommenderEvaluator <RecommenderDataset, User, Item, int, int, RatingDistribution>( Mappings.StarRatingRecommender.ForEvaluation()); IDictionary <Item, IEnumerable <Item> > relatedItems = evaluator.FindRelatedItemsRatedBySameUsers( trainedModel, testDataset, maxRelatedItemCount, minCommonRatingCount, minRelatedItemPoolSize); RecommenderPersistenceUtils.SaveRelatedItems(predictionsFile, relatedItems); return(true); }
/// <summary> /// Predictions based on 10-star rating input data and features /// </summary> /// <param name="traitsCounts"> Number of item traits </param> /// <returns> Metrics </returns> public MetricValues PredictionsOnDataWithFeatures(IList <int> traitsCounts) { var starRatingTrainTestSplittingMapping = RecommenderMappingFactory.GetStarsMapping(true); var binaryRatingTrainTestSplittingMapping = RecommenderMappingFactory.BinarizeMapping(starRatingTrainTestSplittingMapping); var trainSource = SplitInstanceSource.Training(RatingsPath); var testSource = SplitInstanceSource.Test(RatingsPath); var binaryRatingEvaluator = new RecommenderEvaluator <SplitInstanceSource <string>, string, Movie, int, int, IDictionary <int, double> >(binaryRatingTrainTestSplittingMapping.ForEvaluation()); var starsRatingEvaluator = new RecommenderEvaluator <SplitInstanceSource <string>, string, Movie, int, int, IDictionary <int, double> >(starRatingTrainTestSplittingMapping.ForEvaluation()); var correctFractions = new Dictionary <string, double>(); var ndcgs = new Dictionary <string, double>(); var maes = new Dictionary <string, double>(); foreach (var traitCount in traitsCounts) { Console.WriteLine($"Running metrics calculation for data with features and a model with {traitCount} traits."); Rand.Restart(RandomSeed); var recommender = GetRecommender(starRatingTrainTestSplittingMapping, traitCount); recommender.Settings.Training.UseItemFeatures = true; recommender.Settings.Training.UseSharedUserThresholds = true; recommender.Settings.Training.Advanced.UserThresholdPriorVariance = 10; recommender.Train(trainSource); var distribution = recommender.PredictDistribution(testSource); var binarizedPredictions = BinarizePredictions(distribution); var predictions = recommender.Predict(testSource); var correctFraction = 1.0 - binaryRatingEvaluator.RatingPredictionMetric(testSource, binarizedPredictions, Metrics.ZeroOneError); correctFractions.Add(traitCount.ToString(), correctFraction); var itemRecommendationsForEvaluation = starsRatingEvaluator.RecommendRatedItems(recommender, testSource, 5, 5); var ndcg = starsRatingEvaluator.ItemRecommendationMetric(testSource, itemRecommendationsForEvaluation, Metrics.Ndcg); ndcgs.Add(traitCount.ToString(), ndcg); var mae = starsRatingEvaluator.RatingPredictionMetric(testSource, predictions, Metrics.AbsoluteError); //Divide maes by 2 to convert 10-star rating to 5-star rating maes.Add(traitCount.ToString(), mae / 2.0); } return(new MetricValues(correctFractions, ndcgs, maes)); }
/// <summary> /// Predictions based on like/dislike input data /// </summary> /// <param name="traitsCounts"> Number of item traits </param> /// <returns>A tuple of probability of like and metrics </returns> public (Dictionary <string, double[][]> likeProbability, MetricValues metricValues) PredictionsOnBinaryData( IList <int> traitsCounts ) { var starRatingTrainTestSplittingMapping = RecommenderMappingFactory.GetStarsMapping(true); var binaryRatingTrainTestSplittingMapping = RecommenderMappingFactory.BinarizeMapping(starRatingTrainTestSplittingMapping); var trainSource = SplitInstanceSource.Training(RatingsPath); var testSource = SplitInstanceSource.Test(RatingsPath); var binaryRatingEvaluator = new RecommenderEvaluator <SplitInstanceSource <string>, string, Movie, int, int, IDictionary <int, double> >(binaryRatingTrainTestSplittingMapping.ForEvaluation()); var starsRatingEvaluator = new RecommenderEvaluator <SplitInstanceSource <string>, string, Movie, int, int, IDictionary <int, double> >(starRatingTrainTestSplittingMapping.ForEvaluation()); var correctFractions = new Dictionary <string, double>(); var ndcgs = new Dictionary <string, double>(); var likeProbability = new Dictionary <string, double[][]>(); foreach (var traitCount in traitsCounts) { Console.WriteLine($"Running metrics calculation for binarized data and a model with {traitCount} traits."); Rand.Restart(RandomSeed); var recommender = GetRecommender(binaryRatingTrainTestSplittingMapping, traitCount); recommender.Settings.Training.Advanced.UserThresholdPriorVariance = EpsilonPriorVariance; recommender.Train(trainSource); var predictions = recommender.Predict(testSource); likeProbability.Add(traitCount.ToString(), GetLikeProbability(recommender.PredictDistribution(testSource))); var correctFraction = 1.0 - binaryRatingEvaluator.RatingPredictionMetric(testSource, predictions, Metrics.ZeroOneError); correctFractions.Add(traitCount.ToString(), correctFraction); var itemRecommendationsForEvaluation = starsRatingEvaluator.RecommendRatedItems(recommender, testSource, 5, 5); var ndcg = starsRatingEvaluator.ItemRecommendationMetric(testSource, itemRecommendationsForEvaluation, Metrics.Ndcg); ndcgs.Add(traitCount.ToString(), ndcg); } return(likeProbability, new MetricValues(correctFractions, ndcgs)); }
/// <summary> /// Predictions based on 10-star rating input data /// </summary> /// <param name="traitsCounts"> Number of item traits </param> /// <returns>A tuple of probability of thresholds posterior distributions, most probable ratings and metrics </returns> public (Dictionary <string, IDictionary <string, Gaussian> > posteriorDistributionsOfThresholds, Dictionary <string, double[][]> mostProbableRatings, MetricValues metricValues) PredictionsOnStarRatings( IList <int> traitsCounts ) { var starRatingTrainTestSplittingMapping = RecommenderMappingFactory.GetStarsMapping(true); var binaryRatingTrainTestSplittingMapping = RecommenderMappingFactory.BinarizeMapping(starRatingTrainTestSplittingMapping); var trainSource = SplitInstanceSource.Training(RatingsPath); var testSource = SplitInstanceSource.Test(RatingsPath); var binaryRatingEvaluator = new RecommenderEvaluator <SplitInstanceSource <string>, string, Movie, int, int, IDictionary <int, double> >(binaryRatingTrainTestSplittingMapping.ForEvaluation()); var starsRatingEvaluator = new RecommenderEvaluator <SplitInstanceSource <string>, string, Movie, int, int, IDictionary <int, double> >(starRatingTrainTestSplittingMapping.ForEvaluation()); var correctFractions = new Dictionary <string, double>(); var ndcgs = new Dictionary <string, double>(); var maes = new Dictionary <string, double>(); var mostProbableRatings = new Dictionary <string, double[][]>(); var posteriorDistributionsOfThresholds = new Dictionary <string, IDictionary <string, Gaussian> >(); foreach (var traitCount in traitsCounts) { Console.WriteLine($"Running metrics calculation for 10-star data and a model with {traitCount} traits."); Rand.Restart(RandomSeed); var recommender = GetRecommender(starRatingTrainTestSplittingMapping, traitCount); recommender.Settings.Training.UseSharedUserThresholds = true; recommender.Settings.Training.Advanced.UserThresholdPriorVariance = 10; recommender.Train(trainSource); var distributions = recommender.PredictDistribution(testSource); var predictions = recommender.Predict(testSource); mostProbableRatings.Add(traitCount.ToString(), GetJaggedDoubles(predictions.Select(userRating => userRating.Value.Select(movieRating => (double)movieRating.Value)))); var posteriorDistributionOfThresholds = recommender.GetPosteriorDistributions().Users.First().Value.Thresholds.ToList(); var posteriorDistributionOfThresholdsDict = BeautifyPosteriorDistribution(posteriorDistributionOfThresholds); posteriorDistributionsOfThresholds.Add(traitCount.ToString(), posteriorDistributionOfThresholdsDict); var binarizedPredictions = BinarizePredictions(distributions); var correctFraction = 1.0 - binaryRatingEvaluator.RatingPredictionMetric(testSource, binarizedPredictions, Metrics.ZeroOneError); correctFractions.Add(traitCount.ToString(), correctFraction); var itemRecommendationsForEvaluation = starsRatingEvaluator.RecommendRatedItems(recommender, testSource, 5, 5); var ndcg = starsRatingEvaluator.ItemRecommendationMetric(testSource, itemRecommendationsForEvaluation, Metrics.Ndcg); ndcgs.Add(traitCount.ToString(), ndcg); var mae = starsRatingEvaluator.RatingPredictionMetric(testSource, predictions, Metrics.AbsoluteError); //Divide maes by 2 to convert 10-star rating to 5-star rating maes.Add(traitCount.ToString(), mae / 2.0); } return(posteriorDistributionsOfThresholds, mostProbableRatings, new MetricValues(correctFractions, ndcgs, maes)); }