Exemplo n.º 1
0
        /// <summary>
        /// Trains the recommender on a given dataset. Since Mahout doesn't support model persistence,
        /// the training procedure just saves the training data to a temporary file to use it later during prediction.
        /// </summary>
        /// <param name="instanceSource">The instances of the dataset.</param>
        /// <param name="featureSource">The parameter is not used.</param>
        public void Train(TInstanceSource instanceSource, DummyFeatureSource featureSource = null)
        {
            IEnumerable <RatedUserItem> trainingInstances = this.mapping.GetInstances(instanceSource).ToList();

            this.starRatingInfo = this.mapping.GetRatingInfo(instanceSource);
            this.trainingSetRatingDistribution = this.EstimateRatingDistribution(trainingInstances);

            // Map users and items to dense integers as required by Mahout
            foreach (RatedUserItem trainingInstance in trainingInstances)
            {
                if (!this.userToId.ContainsKey(trainingInstance.User))
                {
                    this.idToUser.Add(trainingInstance.User);
                    this.userToId.Add(trainingInstance.User, this.userToId.Count);
                }

                if (!this.itemToId.ContainsKey(trainingInstance.Item))
                {
                    this.idToItem.Add(trainingInstance.Item);
                    this.itemToId.Add(trainingInstance.Item, this.itemToId.Count);
                }
            }

            // Setup user and item subsets
            this.userSubset = this.userToId.Keys;
            this.itemSubset = this.itemToId.Keys;

            // Save the training dataset in Mahout format
            this.trainingDatasetFile = this.CreateDatasetFile(trainingInstances);
        }
Exemplo n.º 2
0
        /// <summary>
        /// Computes the probability calibration plot for a particular rating value.
        /// </summary>
        /// <param name="instanceSource">The instance source providing the ground truth.</param>
        /// <param name="predictions">A sparse users-by-items matrix of predicted rating distributions.</param>
        /// <param name="rating">The rating value to generate the calibration plot for.</param>
        /// <param name="bins">The number of bins to use.</param>
        /// <returns>The computed probability calibration plot.</returns>
        public double[] ProbabilityCalibrationPlot(
            TInstanceSource instanceSource,
            IDictionary <TUser, IDictionary <TItem, Discrete> > predictions,
            int rating,
            int bins)
        {
            IStarRatingInfo <TGroundTruthRating> starRatingInfo = this.mapping.GetRatingInfo(instanceSource);

            var countTotal   = new int[bins];
            var countGuessed = new int[bins];

            foreach (var userWithPredictionList in predictions)
            {
                foreach (var itemPrediction in userWithPredictionList.Value)
                {
                    TUser  user        = userWithPredictionList.Key;
                    TItem  item        = itemPrediction.Key;
                    double prob        = itemPrediction.Value[rating];
                    int    groundTruth = starRatingInfo.ToStarRating(this.mapping.GetRating(instanceSource, user, item));
                    int    probBin     = Math.Min((int)(prob * bins), bins - 1);

                    countTotal[probBin]   += 1;
                    countGuessed[probBin] += (groundTruth == rating) ? 1 : 0;
                }
            }

            return(Util.ArrayInit(bins, i => (double)countGuessed[i] / countTotal[i]));
        }
Exemplo n.º 3
0
        /// <summary>
        /// Computes the average of a given rating prediction metric using ground truth in model domain by iterating over
        /// <paramref name="predictions"/> and using the aggregation method given in <paramref name="aggregationMethod"/>.
        /// </summary>
        /// <param name="instanceSource">The instance source providing the ground truth.</param>
        /// <param name="predictions">A sparse users-by-items matrix of predicted rating distributions.</param>
        /// <param name="metric">The rating prediction metric using ground truth in model domain.</param>
        /// <param name="aggregationMethod">A method specifying how metrics are aggregated over all instances.</param>
        /// <returns>The computed average of the given rating prediction metric.</returns>
        public double ModelDomainRatingPredictionMetric(
            TInstanceSource instanceSource,
            IDictionary <TUser, IDictionary <TItem, Discrete> > predictions,
            Func <int, Discrete, double> metric,
            RecommenderMetricAggregationMethod aggregationMethod = RecommenderMetricAggregationMethod.Default)
        {
            IStarRatingInfo <TGroundTruthRating>        starRatingInfo = this.mapping.GetRatingInfo(instanceSource);
            Func <TGroundTruthRating, Discrete, double> metricWrapper  = (g, up) => metric(starRatingInfo.ToStarRating(g), up);

            return(this.RatingPredictionMetric(instanceSource, predictions, metricWrapper, aggregationMethod));
        }
Exemplo n.º 4
0
        /// <summary>
        /// Initializes a new instance of the <see cref="RecommenderDataset"/> class.
        /// </summary>
        /// <param name="observations">The list of observations to create the dataset from.</param>
        /// <param name="starRatingInfo">The information about ratings in the dataset.</param>
        public RecommenderDataset(IEnumerable <RatedUserItem> observations, IStarRatingInfo <int> starRatingInfo)
        {
            if (starRatingInfo == null)
            {
                throw new ArgumentNullException("starRatingInfo");
            }

            this.StarRatingInfo = starRatingInfo;
            foreach (RatedUserItem observation in observations)
            {
                this.AddObservation(observation);
            }
        }
        /// <summary>
        /// Trains the recommender on the given dataset.
        /// </summary>
        /// <param name="instanceSource">The instances of the dataset.</param>
        /// <param name="featureSource">The source of the features for the given instances.</param>
        public void Train(TInstanceSource instanceSource, TFeatureSource featureSource = default(TFeatureSource))
        {
            this.starRatingInfo = this.topLevelMapping.GetRatingInfo(instanceSource);
            this.nativeMapping.SetRatingInfo(this.starRatingInfo);
            this.nativeMapping.UseUserFeatures = this.Settings.Training.UseUserFeatures;
            this.nativeMapping.UseItemFeatures = this.Settings.Training.UseItemFeatures;
            this.nativeMapping.SetBatchCount(this.recommender.Settings.Training.BatchCount);

            this.BuildIndexedEntitySets(instanceSource);
            this.nativeMapping.SetIndexedEntitySets(this.indexedUserSet, this.indexedItemSet);

            this.recommender.Train(instanceSource, featureSource);
            this.nativeMapping.SetBatchCount(1);
            this.nativeMapping.SetTrained();
        }
Exemplo n.º 6
0
        /// <summary>
        /// Trains the recommender on the specified instances. For the random recommender it results in just
        /// retrieving the rating info, as well as the list of items and users from the training set.
        /// </summary>
        /// <param name="instanceSource">The source of instances to train on.</param>
        /// <param name="featureSource">The source of features for the specified instances.</param>
        public void Train(TInstanceSource instanceSource, TFeatureSource featureSource = default(TFeatureSource))
        {
            this.starRatingInfo = this.mapping.GetRatingInfo(instanceSource);

            var trainingUsers = new HashSet <TUser>();
            var trainingItems = new HashSet <TItem>();

            foreach (TInstance instance in this.mapping.GetInstances(instanceSource))
            {
                trainingUsers.Add(this.mapping.GetUser(instanceSource, instance));
                trainingItems.Add(this.mapping.GetItem(instanceSource, instance));
            }

            this.userSubset = trainingUsers.ToArray();
            this.itemSubset = trainingItems.ToArray();
        }
        /// <summary>
        /// Prepares the environment (dataset, predictions, evaluation engine etc) before each test.
        /// </summary>
        public RecommenderEvaluatorTests()
        {
            this.dataset = new[]
            {
                // user, item, predicted rating, prediction distribution, true rating (data domain)
                Tuple.Create <string, string, int, RatingDistribution, double>(
                    "A", "a", 4, new SortedDictionary <int, double> {
                    { 0, 0.0 }, { 1, 0.2 }, { 2, 0.0 }, { 3, 0.0 }, { 4, 0.6 }, { 5, 0.2 }
                }, 1.1),
                Tuple.Create <string, string, int, RatingDistribution, double>(
                    "B", "a", 1, new SortedDictionary <int, double> {
                    { 0, 0.0 }, { 1, 0.5 }, { 2, 0.0 }, { 3, 0.0 }, { 4, 0.5 }, { 5, 0.0 }
                }, 4.1),
                Tuple.Create <string, string, int, RatingDistribution, double>(
                    "D", "b", 2, new SortedDictionary <int, double> {
                    { 0, 0.0 }, { 1, 0.0 }, { 2, 1.0 }, { 3, 0.0 }, { 4, 0.0 }, { 5, 0.0 }
                }, 1.9),
                Tuple.Create <string, string, int, RatingDistribution, double>(
                    "A", "b", 5, new SortedDictionary <int, double> {
                    { 0, 0.0 }, { 1, 0.0 }, { 2, 0.0 }, { 3, 0.0 }, { 4, 0.2 }, { 5, 0.8 }
                }, 5.3),
                Tuple.Create <string, string, int, RatingDistribution, double>(
                    "A", "c", 3, new SortedDictionary <int, double> {
                    { 0, 0.0 }, { 1, 0.0 }, { 2, 0.0 }, { 3, 0.6 }, { 4, 0.2 }, { 5, 0.2 }
                }, 4.7),
                Tuple.Create <string, string, int, RatingDistribution, double>(
                    "A", "e", 1, new SortedDictionary <int, double> {
                    { 0, 0.0 }, { 1, 0.5 }, { 2, 0.0 }, { 3, 0.0 }, { 4, 0.5 }, { 5, 0.0 }
                }, 3.6),
                Tuple.Create <string, string, int, RatingDistribution, double>(
                    "B", "c", 2, new SortedDictionary <int, double> {
                    { 0, 0.0 }, { 1, 0.0 }, { 2, 0.8 }, { 3, 0.2 }, { 4, 0.0 }, { 5, 0.0 }
                }, 3.1)
            };

            var recommenderMapping = new StarRatingRecommenderMapping();
            var evaluatorMapping   = recommenderMapping.ForEvaluation();

            this.starRatingInfo       = recommenderMapping.GetRatingInfo(null);
            this.evaluator            = new StarRatingRecommenderEvaluator <IEnumerable <Instance>, string, string, double>(evaluatorMapping);
            this.predictions          = BuildPredictionDictionary(this.dataset, i => i.Item3);
            this.uncertainPredictions = BuildPredictionDictionary(this.dataset, i => i.Item4);
        }
Exemplo n.º 8
0
        /// <summary>
        /// Computes the expected confusion matrix.
        /// </summary>
        /// <param name="instanceSource">The instance source providing the ground truth.</param>
        /// <param name="predictions">A sparse users-by-items matrix of predicted rating distributions.</param>
        /// <param name="aggregationMethod">A method specifying how metrics are aggregated over all instances.</param>
        /// <returns>The computed expected confusion matrix.</returns>
        public RatingMatrix ExpectedConfusionMatrix(
            TInstanceSource instanceSource,
            IDictionary <TUser, IDictionary <TItem, Discrete> > predictions,
            RecommenderMetricAggregationMethod aggregationMethod = RecommenderMetricAggregationMethod.Default)
        {
            IStarRatingInfo <TGroundTruthRating> starRatingInfo = this.mapping.GetRatingInfo(instanceSource);

            var result = new RatingMatrix(starRatingInfo.MinStarRating, starRatingInfo.MaxStarRating);

            for (int predictedRating = starRatingInfo.MinStarRating; predictedRating <= starRatingInfo.MaxStarRating; ++predictedRating)
            {
                for (int trueRating = starRatingInfo.MinStarRating; trueRating <= starRatingInfo.MaxStarRating; ++trueRating)
                {
                    result[predictedRating, trueRating] = this.ModelDomainRatingPredictionMetricExpectation(
                        instanceSource,
                        predictions,
                        (p, t) => p == predictedRating && t == trueRating ? 1.0 : 0.0,
                        aggregationMethod);
                }
            }

            return(result);
        }
 /// <summary>
 /// Sets the rating info.
 /// </summary>
 /// <param name="starRatingInfo">The rating info to set.</param>
 public void SetRatingInfo(IStarRatingInfo <TDataRating> starRatingInfo)
 {
     this.starRatingInfo = starRatingInfo;
 }