Ejemplo n.º 1
0
        /// <summary>
        /// Trains the recommender on a given dataset. Since Mahout doesn't support model persistence,
        /// the training procedure just saves the training data to a temporary file to use it later during prediction.
        /// </summary>
        /// <param name="instanceSource">The instances of the dataset.</param>
        /// <param name="featureSource">The parameter is not used.</param>
        public void Train(TInstanceSource instanceSource, DummyFeatureSource featureSource = null)
        {
            IEnumerable <RatedUserItem> trainingInstances = this.mapping.GetInstances(instanceSource).ToList();

            this.starRatingInfo = this.mapping.GetRatingInfo(instanceSource);
            this.trainingSetRatingDistribution = this.EstimateRatingDistribution(trainingInstances);

            // Map users and items to dense integers as required by Mahout
            foreach (RatedUserItem trainingInstance in trainingInstances)
            {
                if (!this.userToId.ContainsKey(trainingInstance.User))
                {
                    this.idToUser.Add(trainingInstance.User);
                    this.userToId.Add(trainingInstance.User, this.userToId.Count);
                }

                if (!this.itemToId.ContainsKey(trainingInstance.Item))
                {
                    this.idToItem.Add(trainingInstance.Item);
                    this.itemToId.Add(trainingInstance.Item, this.itemToId.Count);
                }
            }

            // Setup user and item subsets
            this.userSubset = this.userToId.Keys;
            this.itemSubset = this.itemToId.Keys;

            // Save the training dataset in Mahout format
            this.trainingDatasetFile = this.CreateDatasetFile(trainingInstances);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Predicts ratings for the instances provided by a given instance source.
        /// </summary>
        /// <param name="instanceSource">The source providing the instances to predict ratings for.</param>
        /// <param name="featureSource">The parameter is not used.</param>
        /// <returns>The predicted ratings.</returns>
        public IDictionary <User, IDictionary <Item, int> > Predict(
            TInstanceSource instanceSource, DummyFeatureSource featureSource = null)
        {
            IDictionary <User, IDictionary <Item, double> > fractionalPredictions = this.PredictFractionalRatings(this.mapping.GetInstances(instanceSource));

            return(fractionalPredictions.ToDictionary(
                       kv => kv.Key,
                       kv => (IDictionary <Item, int>)kv.Value.ToDictionary(kv2 => kv2.Key, kv2 => Convert.ToInt32(kv2.Value))));
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Trains the recommender on a given dataset.
        /// </summary>
        /// <param name="instanceSource">The instances of the dataset.</param>
        /// <param name="featureSource">The parameter is not used.</param>
        public void Train(TInstanceSource instanceSource, DummyFeatureSource featureSource = null)
        {
            string trainingDatasetFile = null;
            string cacheFileName       = null;

            try
            {
                List <RatedUserItem> trainingInstanceList = this.mapping.GetInstances(instanceSource).ToList();

                // Create temporary files
                trainingDatasetFile = this.CreateDatasetFile(trainingInstanceList);
                cacheFileName       = Path.GetTempFileName();
                this.modelFileName  = Path.GetTempFileName();

                // Invoke VW to train the model
                string trainingCommand =
                    string.Format(
                        "{0} -d \"{1}\" -b {2} -q ui --rank {3} --l1 {4} --l2 {5} --learning_rate {6} --decay_learning_rate {7} --passes {8} --power_t 0 -f \"{9}\" --cache_file \"{10}\" -k",
                        PathToExe,
                        trainingDatasetFile,
                        this.Settings.BitPrecision,
                        this.Settings.TraitCount,
                        this.Settings.L1Regularization,
                        this.Settings.L2Regularization,
                        this.Settings.LearningRate,
                        this.Settings.LearningRateDecay,
                        this.Settings.PassCount,
                        this.modelFileName,
                        cacheFileName);
                WrapperUtils.ExecuteExternalCommand(trainingCommand);

                // Remember users and items used for training
                this.trainingItems = new HashSet <Item>();
                this.trainingUsers = new HashSet <User>();
                foreach (RatedUserItem observation in trainingInstanceList)
                {
                    this.trainingItems.Add(observation.Item);
                    this.trainingUsers.Add(observation.User);
                }

                // Setup item subset
                this.itemSubset = this.trainingItems;
            }
            finally
            {
                if (trainingDatasetFile != null)
                {
                    File.Delete(trainingDatasetFile);
                }

                if (cacheFileName != null)
                {
                    File.Delete(cacheFileName);
                }
            }
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Returns a list of items related to <paramref name="item"/>.
        /// </summary>
        /// <param name="item">The item for which related items should be found.</param>
        /// <param name="relatedItemCount">The maximum number of related items to return.</param>
        /// <param name="featureSource">The parameter is not used.</param>
        /// <returns>The list of related items.</returns>
        /// <remarks>Only items specified in <see cref="ItemSubset"/> will be returned.</remarks>
        public IEnumerable <Item> GetRelatedItems(Item item, int relatedItemCount, DummyFeatureSource featureSource = null)
        {
            Debug.Assert(item != null, "A valid item should be provided.");

            if (!this.itemToId.ContainsKey(item))
            {
                throw new NotSupportedException("Cold items are not supported by this recommender.");
            }

            if (this.lastFindRelatedItemsContext == null || this.lastFindRelatedItemsContext.IsNewContextNeeded(item, relatedItemCount))
            {
                this.lastFindRelatedItemsContext = new LazyFindRelatedItemsContext(this, relatedItemCount);
            }

            return(this.lastFindRelatedItemsContext.CreateLazyPredictionResults(item, this.itemSubset));
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Returns a list of users related to <paramref name="user"/>.
        /// </summary>
        /// <param name="user">The user for which related users should be found.</param>
        /// <param name="relatedUserCount">The maximum number of related users to return.</param>
        /// <param name="featureSource">The parameter is not used.</param>
        /// <returns>The list of related users.</returns>
        /// <remarks>Only users specified in <see cref="UserSubset"/> will be returned.</remarks>
        public IEnumerable <User> GetRelatedUsers(User user, int relatedUserCount, DummyFeatureSource featureSource = null)
        {
            Debug.Assert(user != null, "A valid user should be provided.");

            if (!this.userToId.ContainsKey(user))
            {
                throw new NotSupportedException("Cold users are not supported by this recommender.");
            }

            if (this.lastFindRelatedUsersContext == null || this.lastFindRelatedUsersContext.IsNewContextNeeded(user, relatedUserCount))
            {
                this.lastFindRelatedUsersContext = new LazyFindRelatedUsersContext(this, relatedUserCount);
            }

            return(this.lastFindRelatedUsersContext.CreateLazyPredictionResults(user, this.userSubset));
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Recommends items to a given user.
        /// </summary>
        /// <param name="user">The user to recommend items to.</param>
        /// <param name="recommendationCount">The maximum number of items to recommend.</param>
        /// <param name="featureSource">The parameter is not used.</param>
        /// <returns>The list of recommended items.</returns>
        /// <remarks>Only items specified in <see cref="ItemSubset"/> can be recommended.</remarks>
        public IEnumerable <Item> Recommend(User user, int recommendationCount, DummyFeatureSource featureSource = null)
        {
            Debug.Assert(user != null, "A valid user should be provided.");

            if (!this.trainingUsers.Contains(user))
            {
                throw new NotSupportedException("Cold users are not supported by this recommender.");
            }

            if (this.lastRecommendationContext == null || this.lastRecommendationContext.IsNewContextNeeded(user, recommendationCount))
            {
                this.lastRecommendationContext = new LazyRecommendationContext(this, recommendationCount);
            }

            return(this.lastRecommendationContext.CreateLazyPredictionResults(user, this.itemSubset));
        }
Ejemplo n.º 7
0
 /// <summary>
 /// Returns a list of related items to each item in <paramref name="items"/>.
 /// </summary>
 /// <param name="items">The parameter is not used.</param>
 /// <param name="relatedItemCount">The parameter is not used.</param>
 /// <param name="featureSource">The parameter is not used.</param>
 /// <returns>Nothing, since the method always throws.</returns>
 public IDictionary <Item, IEnumerable <Item> > GetRelatedItems(
     IEnumerable <Item> items, int relatedItemCount, DummyFeatureSource featureSource = null)
 {
     throw new NotSupportedException("Bulk related item prediction is not supported by this recommender.");
 }
Ejemplo n.º 8
0
 /// <summary>
 /// This query is not supported.
 /// </summary>
 /// <param name="users">The parameter is not used.</param>
 /// <param name="relatedUserCount">The parameter is not used.</param>
 /// <param name="featureSource">The parameter is not used.</param>
 /// <returns>Nothing, since the method always throws.</returns>
 public IDictionary <User, IEnumerable <User> > GetRelatedUsers(
     IEnumerable <User> users, int relatedUserCount, DummyFeatureSource featureSource = null)
 {
     throw new NotSupportedException("Bulk related user prediction is not supported by this recommender.");
 }
Ejemplo n.º 9
0
 /// <summary>
 /// Recommends items with their rating distributions to a specified list of users.
 /// </summary>
 /// <param name="users">The list of users to recommend items to.</param>
 /// <param name="recommendationCount">Maximum number of items to recommend to a single user.</param>
 /// <param name="featureSource">The source of features for the specified users.</param>
 /// <returns>The list of recommended items and their rating distributions for every user from <paramref name="users"/>.</returns>
 /// <remarks>Only items specified in <see cref="ItemSubset"/> can be recommended.</remarks>
 public IDictionary <User, IEnumerable <Tuple <Item, RatingDistribution> > > RecommendDistribution(
     IEnumerable <User> users, int recommendationCount, DummyFeatureSource featureSource = null)
 {
     throw new NotSupportedException("Item recommendation with rating distributions is not supported by this recommender.");
 }
Ejemplo n.º 10
0
 /// <summary>
 /// This query is not supported.
 /// </summary>
 /// <param name="users">The parameter is not used.</param>
 /// <param name="recommendationCount">The parameter is not used.</param>
 /// <param name="featureSource">The parameter is not used.</param>
 /// <returns>Nothing, since the method always throws.</returns>
 public IDictionary <User, IEnumerable <Item> > Recommend(
     IEnumerable <User> users, int recommendationCount, DummyFeatureSource featureSource = null)
 {
     throw new NotSupportedException("Item recommendation is not supported by this recommender.");
 }
Ejemplo n.º 11
0
 /// <summary>
 /// This query is not supported.
 /// </summary>
 /// <param name="instanceSource">The parameter is not used.</param>
 /// <param name="featureSource">The parameter is not used.</param>
 /// <returns>Nothing, since the method always throws.</returns>
 public IDictionary <User, IDictionary <Item, RatingDistribution> > PredictDistribution(
     TInstanceSource instanceSource, DummyFeatureSource featureSource = null)
 {
     throw new NotSupportedException("Uncertain rating prediction is not supported by this recommender.");
 }
Ejemplo n.º 12
0
 /// <summary>
 /// This query is not supported.
 /// </summary>
 /// <param name="user">The parameter is not used.</param>
 /// <param name="item">The parameter is not used.</param>
 /// <param name="featureSource">The parameter is not used.</param>
 /// <returns>Nothing, since the method always throws.</returns>
 public RatingDistribution PredictDistribution(User user, Item item, DummyFeatureSource featureSource = null)
 {
     throw new NotSupportedException("Uncertain rating prediction is not supported by this recommender.");
 }
Ejemplo n.º 13
0
        /// <summary>
        /// This query is not supported.
        /// </summary>
        /// <param name="instanceSource">The instances to predict ratings for.</param>
        /// <param name="featureSource">The parameter is not used.</param>
        /// <returns>The predicted ratings.</returns>
        public IDictionary <User, IDictionary <Item, int> > Predict(
            TInstanceSource instanceSource, DummyFeatureSource featureSource = null)
        {
            string queryFileName       = null;
            string predictionsFileName = null;

            try
            {
                IEnumerable <RatedUserItem> observations = this.mapping.GetInstances(instanceSource).ToList();
                if (observations.Any(o => !this.userToId.ContainsKey(o.User) || !this.itemToId.ContainsKey(o.Item)))
                {
                    throw new NotSupportedException("Cold users and items are not supported by this recommender.");
                }

                predictionsFileName = Path.GetTempFileName();
                queryFileName       = this.CreateRatingPredictionQueryFile(observations);
                string command = null;
                switch (this.Settings.RatingPredictionAlgorithm)
                {
                case MahoutRatingPredictionAlgorithm.UserBased:
                    command = string.Format(
                        "PredictRatings_UserBased \"{0}\" \"{1}\" \"{2}\" {3} {4}",
                        this.trainingDatasetFile,
                        queryFileName,
                        predictionsFileName,
                        this.Settings.RatingSimilarity,
                        this.Settings.UserNeighborhoodSize);
                    break;

                case MahoutRatingPredictionAlgorithm.ItemBased:
                    command = string.Format(
                        "PredictRatings_ItemBased \"{0}\" \"{1}\" \"{2}\" {3}",
                        this.trainingDatasetFile,
                        queryFileName,
                        predictionsFileName,
                        this.Settings.RatingSimilarity);
                    break;

                case MahoutRatingPredictionAlgorithm.SlopeOne:
                    command = string.Format(
                        "PredictRatings_SlopeOne \"{0}\" \"{1}\" \"{2}\"",
                        this.trainingDatasetFile,
                        queryFileName,
                        predictionsFileName);
                    break;

                case MahoutRatingPredictionAlgorithm.Svd:
                    command = string.Format(
                        "PredictRatings_Svd \"{0}\" \"{1}\" \"{2}\" {3} {4}",
                        this.trainingDatasetFile,
                        queryFileName,
                        predictionsFileName,
                        this.Settings.TraitCount,
                        this.Settings.IterationCount);
                    break;

                default:
                    Debug.Fail("Unsupported rating prediction algorithm.");
                    break;
                }

                this.RunMahoutWrapper(command);
                return(this.ReadRatingPredictions(predictionsFileName));
            }
            finally
            {
                if (queryFileName != null)
                {
                    File.Delete(queryFileName);
                }

                if (predictionsFileName != null)
                {
                    File.Delete(predictionsFileName);
                }
            }
        }
Ejemplo n.º 14
0
 /// <summary>
 /// This query is not supported.
 /// </summary>
 /// <param name="user">The parameter is not used.</param>
 /// <param name="item">The parameter is not used.</param>
 /// <param name="featureSource">The parameter is not used.</param>
 /// <returns>Nothing, since the method always throws.</returns>
 public int Predict(User user, Item item, DummyFeatureSource featureSource = null)
 {
     throw new NotSupportedException("Only bulk rating prediction is supported by this recommender.");
 }
Ejemplo n.º 15
0
 /// <summary>
 /// This query is not supported.
 /// </summary>
 /// <param name="item">The item for which related items should be found.</param>
 /// <param name="relatedItemCount">The maximum number of related items to return.</param>
 /// <param name="featureSource">The source of the features for the items. Unused.</param>
 /// <returns>Nothing, since the method always throws.</returns>
 public IEnumerable <Item> GetRelatedItems(Item item, int relatedItemCount, DummyFeatureSource featureSource = null)
 {
     throw new NotSupportedException("Related item prediction is not supported by this recommender");
 }
Ejemplo n.º 16
0
 /// <summary>
 /// This query is not supported.
 /// </summary>
 /// <param name="user">The parameter is not used.</param>
 /// <param name="relatedUserCount">The parameter is not used.</param>
 /// <param name="featureSource">The parameter is not used.</param>
 /// <returns>Nothing, since the method always throws.</returns>
 public IEnumerable <User> GetRelatedUsers(User user, int relatedUserCount, DummyFeatureSource featureSource = null)
 {
     throw new NotSupportedException("Related user prediction is not supported by this recommender");
 }