/// <summary> /// Trains the recommender on a given dataset. Since Mahout doesn't support model persistence, /// the training procedure just saves the training data to a temporary file to use it later during prediction. /// </summary> /// <param name="instanceSource">The instances of the dataset.</param> /// <param name="featureSource">The parameter is not used.</param> public void Train(TInstanceSource instanceSource, DummyFeatureSource featureSource = null) { IEnumerable <RatedUserItem> trainingInstances = this.mapping.GetInstances(instanceSource).ToList(); this.starRatingInfo = this.mapping.GetRatingInfo(instanceSource); this.trainingSetRatingDistribution = this.EstimateRatingDistribution(trainingInstances); // Map users and items to dense integers as required by Mahout foreach (RatedUserItem trainingInstance in trainingInstances) { if (!this.userToId.ContainsKey(trainingInstance.User)) { this.idToUser.Add(trainingInstance.User); this.userToId.Add(trainingInstance.User, this.userToId.Count); } if (!this.itemToId.ContainsKey(trainingInstance.Item)) { this.idToItem.Add(trainingInstance.Item); this.itemToId.Add(trainingInstance.Item, this.itemToId.Count); } } // Setup user and item subsets this.userSubset = this.userToId.Keys; this.itemSubset = this.itemToId.Keys; // Save the training dataset in Mahout format this.trainingDatasetFile = this.CreateDatasetFile(trainingInstances); }
/// <summary> /// Computes the probability calibration plot for a particular rating value. /// </summary> /// <param name="instanceSource">The instance source providing the ground truth.</param> /// <param name="predictions">A sparse users-by-items matrix of predicted rating distributions.</param> /// <param name="rating">The rating value to generate the calibration plot for.</param> /// <param name="bins">The number of bins to use.</param> /// <returns>The computed probability calibration plot.</returns> public double[] ProbabilityCalibrationPlot( TInstanceSource instanceSource, IDictionary <TUser, IDictionary <TItem, Discrete> > predictions, int rating, int bins) { IStarRatingInfo <TGroundTruthRating> starRatingInfo = this.mapping.GetRatingInfo(instanceSource); var countTotal = new int[bins]; var countGuessed = new int[bins]; foreach (var userWithPredictionList in predictions) { foreach (var itemPrediction in userWithPredictionList.Value) { TUser user = userWithPredictionList.Key; TItem item = itemPrediction.Key; double prob = itemPrediction.Value[rating]; int groundTruth = starRatingInfo.ToStarRating(this.mapping.GetRating(instanceSource, user, item)); int probBin = Math.Min((int)(prob * bins), bins - 1); countTotal[probBin] += 1; countGuessed[probBin] += (groundTruth == rating) ? 1 : 0; } } return(Util.ArrayInit(bins, i => (double)countGuessed[i] / countTotal[i])); }
/// <summary> /// Computes the average of a given rating prediction metric using ground truth in model domain by iterating over /// <paramref name="predictions"/> and using the aggregation method given in <paramref name="aggregationMethod"/>. /// </summary> /// <param name="instanceSource">The instance source providing the ground truth.</param> /// <param name="predictions">A sparse users-by-items matrix of predicted rating distributions.</param> /// <param name="metric">The rating prediction metric using ground truth in model domain.</param> /// <param name="aggregationMethod">A method specifying how metrics are aggregated over all instances.</param> /// <returns>The computed average of the given rating prediction metric.</returns> public double ModelDomainRatingPredictionMetric( TInstanceSource instanceSource, IDictionary <TUser, IDictionary <TItem, Discrete> > predictions, Func <int, Discrete, double> metric, RecommenderMetricAggregationMethod aggregationMethod = RecommenderMetricAggregationMethod.Default) { IStarRatingInfo <TGroundTruthRating> starRatingInfo = this.mapping.GetRatingInfo(instanceSource); Func <TGroundTruthRating, Discrete, double> metricWrapper = (g, up) => metric(starRatingInfo.ToStarRating(g), up); return(this.RatingPredictionMetric(instanceSource, predictions, metricWrapper, aggregationMethod)); }
/// <summary> /// Initializes a new instance of the <see cref="RecommenderDataset"/> class. /// </summary> /// <param name="observations">The list of observations to create the dataset from.</param> /// <param name="starRatingInfo">The information about ratings in the dataset.</param> public RecommenderDataset(IEnumerable <RatedUserItem> observations, IStarRatingInfo <int> starRatingInfo) { if (starRatingInfo == null) { throw new ArgumentNullException("starRatingInfo"); } this.StarRatingInfo = starRatingInfo; foreach (RatedUserItem observation in observations) { this.AddObservation(observation); } }
/// <summary> /// Trains the recommender on the given dataset. /// </summary> /// <param name="instanceSource">The instances of the dataset.</param> /// <param name="featureSource">The source of the features for the given instances.</param> public void Train(TInstanceSource instanceSource, TFeatureSource featureSource = default(TFeatureSource)) { this.starRatingInfo = this.topLevelMapping.GetRatingInfo(instanceSource); this.nativeMapping.SetRatingInfo(this.starRatingInfo); this.nativeMapping.UseUserFeatures = this.Settings.Training.UseUserFeatures; this.nativeMapping.UseItemFeatures = this.Settings.Training.UseItemFeatures; this.nativeMapping.SetBatchCount(this.recommender.Settings.Training.BatchCount); this.BuildIndexedEntitySets(instanceSource); this.nativeMapping.SetIndexedEntitySets(this.indexedUserSet, this.indexedItemSet); this.recommender.Train(instanceSource, featureSource); this.nativeMapping.SetBatchCount(1); this.nativeMapping.SetTrained(); }
/// <summary> /// Trains the recommender on the specified instances. For the random recommender it results in just /// retrieving the rating info, as well as the list of items and users from the training set. /// </summary> /// <param name="instanceSource">The source of instances to train on.</param> /// <param name="featureSource">The source of features for the specified instances.</param> public void Train(TInstanceSource instanceSource, TFeatureSource featureSource = default(TFeatureSource)) { this.starRatingInfo = this.mapping.GetRatingInfo(instanceSource); var trainingUsers = new HashSet <TUser>(); var trainingItems = new HashSet <TItem>(); foreach (TInstance instance in this.mapping.GetInstances(instanceSource)) { trainingUsers.Add(this.mapping.GetUser(instanceSource, instance)); trainingItems.Add(this.mapping.GetItem(instanceSource, instance)); } this.userSubset = trainingUsers.ToArray(); this.itemSubset = trainingItems.ToArray(); }
/// <summary> /// Prepares the environment (dataset, predictions, evaluation engine etc) before each test. /// </summary> public RecommenderEvaluatorTests() { this.dataset = new[] { // user, item, predicted rating, prediction distribution, true rating (data domain) Tuple.Create <string, string, int, RatingDistribution, double>( "A", "a", 4, new SortedDictionary <int, double> { { 0, 0.0 }, { 1, 0.2 }, { 2, 0.0 }, { 3, 0.0 }, { 4, 0.6 }, { 5, 0.2 } }, 1.1), Tuple.Create <string, string, int, RatingDistribution, double>( "B", "a", 1, new SortedDictionary <int, double> { { 0, 0.0 }, { 1, 0.5 }, { 2, 0.0 }, { 3, 0.0 }, { 4, 0.5 }, { 5, 0.0 } }, 4.1), Tuple.Create <string, string, int, RatingDistribution, double>( "D", "b", 2, new SortedDictionary <int, double> { { 0, 0.0 }, { 1, 0.0 }, { 2, 1.0 }, { 3, 0.0 }, { 4, 0.0 }, { 5, 0.0 } }, 1.9), Tuple.Create <string, string, int, RatingDistribution, double>( "A", "b", 5, new SortedDictionary <int, double> { { 0, 0.0 }, { 1, 0.0 }, { 2, 0.0 }, { 3, 0.0 }, { 4, 0.2 }, { 5, 0.8 } }, 5.3), Tuple.Create <string, string, int, RatingDistribution, double>( "A", "c", 3, new SortedDictionary <int, double> { { 0, 0.0 }, { 1, 0.0 }, { 2, 0.0 }, { 3, 0.6 }, { 4, 0.2 }, { 5, 0.2 } }, 4.7), Tuple.Create <string, string, int, RatingDistribution, double>( "A", "e", 1, new SortedDictionary <int, double> { { 0, 0.0 }, { 1, 0.5 }, { 2, 0.0 }, { 3, 0.0 }, { 4, 0.5 }, { 5, 0.0 } }, 3.6), Tuple.Create <string, string, int, RatingDistribution, double>( "B", "c", 2, new SortedDictionary <int, double> { { 0, 0.0 }, { 1, 0.0 }, { 2, 0.8 }, { 3, 0.2 }, { 4, 0.0 }, { 5, 0.0 } }, 3.1) }; var recommenderMapping = new StarRatingRecommenderMapping(); var evaluatorMapping = recommenderMapping.ForEvaluation(); this.starRatingInfo = recommenderMapping.GetRatingInfo(null); this.evaluator = new StarRatingRecommenderEvaluator <IEnumerable <Instance>, string, string, double>(evaluatorMapping); this.predictions = BuildPredictionDictionary(this.dataset, i => i.Item3); this.uncertainPredictions = BuildPredictionDictionary(this.dataset, i => i.Item4); }
/// <summary> /// Computes the expected confusion matrix. /// </summary> /// <param name="instanceSource">The instance source providing the ground truth.</param> /// <param name="predictions">A sparse users-by-items matrix of predicted rating distributions.</param> /// <param name="aggregationMethod">A method specifying how metrics are aggregated over all instances.</param> /// <returns>The computed expected confusion matrix.</returns> public RatingMatrix ExpectedConfusionMatrix( TInstanceSource instanceSource, IDictionary <TUser, IDictionary <TItem, Discrete> > predictions, RecommenderMetricAggregationMethod aggregationMethod = RecommenderMetricAggregationMethod.Default) { IStarRatingInfo <TGroundTruthRating> starRatingInfo = this.mapping.GetRatingInfo(instanceSource); var result = new RatingMatrix(starRatingInfo.MinStarRating, starRatingInfo.MaxStarRating); for (int predictedRating = starRatingInfo.MinStarRating; predictedRating <= starRatingInfo.MaxStarRating; ++predictedRating) { for (int trueRating = starRatingInfo.MinStarRating; trueRating <= starRatingInfo.MaxStarRating; ++trueRating) { result[predictedRating, trueRating] = this.ModelDomainRatingPredictionMetricExpectation( instanceSource, predictions, (p, t) => p == predictedRating && t == trueRating ? 1.0 : 0.0, aggregationMethod); } } return(result); }
/// <summary> /// Sets the rating info. /// </summary> /// <param name="starRatingInfo">The rating info to set.</param> public void SetRatingInfo(IStarRatingInfo <TDataRating> starRatingInfo) { this.starRatingInfo = starRatingInfo; }