/// <summary> /// Trains the recommender on a given dataset. Since Mahout doesn't support model persistence, /// the training procedure just saves the training data to a temporary file to use it later during prediction. /// </summary> /// <param name="instanceSource">The instances of the dataset.</param> /// <param name="featureSource">The parameter is not used.</param> public void Train(TInstanceSource instanceSource, DummyFeatureSource featureSource = null) { IEnumerable <RatedUserItem> trainingInstances = this.mapping.GetInstances(instanceSource).ToList(); this.starRatingInfo = this.mapping.GetRatingInfo(instanceSource); this.trainingSetRatingDistribution = this.EstimateRatingDistribution(trainingInstances); // Map users and items to dense integers as required by Mahout foreach (RatedUserItem trainingInstance in trainingInstances) { if (!this.userToId.ContainsKey(trainingInstance.User)) { this.idToUser.Add(trainingInstance.User); this.userToId.Add(trainingInstance.User, this.userToId.Count); } if (!this.itemToId.ContainsKey(trainingInstance.Item)) { this.idToItem.Add(trainingInstance.Item); this.itemToId.Add(trainingInstance.Item, this.itemToId.Count); } } // Setup user and item subsets this.userSubset = this.userToId.Keys; this.itemSubset = this.itemToId.Keys; // Save the training dataset in Mahout format this.trainingDatasetFile = this.CreateDatasetFile(trainingInstances); }
/// <summary> /// Predicts ratings for the instances provided by a given instance source. /// </summary> /// <param name="instanceSource">The source providing the instances to predict ratings for.</param> /// <param name="featureSource">The parameter is not used.</param> /// <returns>The predicted ratings.</returns> public IDictionary <User, IDictionary <Item, int> > Predict( TInstanceSource instanceSource, DummyFeatureSource featureSource = null) { IDictionary <User, IDictionary <Item, double> > fractionalPredictions = this.PredictFractionalRatings(this.mapping.GetInstances(instanceSource)); return(fractionalPredictions.ToDictionary( kv => kv.Key, kv => (IDictionary <Item, int>)kv.Value.ToDictionary(kv2 => kv2.Key, kv2 => Convert.ToInt32(kv2.Value)))); }
/// <summary> /// Trains the recommender on a given dataset. /// </summary> /// <param name="instanceSource">The instances of the dataset.</param> /// <param name="featureSource">The parameter is not used.</param> public void Train(TInstanceSource instanceSource, DummyFeatureSource featureSource = null) { string trainingDatasetFile = null; string cacheFileName = null; try { List <RatedUserItem> trainingInstanceList = this.mapping.GetInstances(instanceSource).ToList(); // Create temporary files trainingDatasetFile = this.CreateDatasetFile(trainingInstanceList); cacheFileName = Path.GetTempFileName(); this.modelFileName = Path.GetTempFileName(); // Invoke VW to train the model string trainingCommand = string.Format( "{0} -d \"{1}\" -b {2} -q ui --rank {3} --l1 {4} --l2 {5} --learning_rate {6} --decay_learning_rate {7} --passes {8} --power_t 0 -f \"{9}\" --cache_file \"{10}\" -k", PathToExe, trainingDatasetFile, this.Settings.BitPrecision, this.Settings.TraitCount, this.Settings.L1Regularization, this.Settings.L2Regularization, this.Settings.LearningRate, this.Settings.LearningRateDecay, this.Settings.PassCount, this.modelFileName, cacheFileName); WrapperUtils.ExecuteExternalCommand(trainingCommand); // Remember users and items used for training this.trainingItems = new HashSet <Item>(); this.trainingUsers = new HashSet <User>(); foreach (RatedUserItem observation in trainingInstanceList) { this.trainingItems.Add(observation.Item); this.trainingUsers.Add(observation.User); } // Setup item subset this.itemSubset = this.trainingItems; } finally { if (trainingDatasetFile != null) { File.Delete(trainingDatasetFile); } if (cacheFileName != null) { File.Delete(cacheFileName); } } }
/// <summary> /// Returns a list of items related to <paramref name="item"/>. /// </summary> /// <param name="item">The item for which related items should be found.</param> /// <param name="relatedItemCount">The maximum number of related items to return.</param> /// <param name="featureSource">The parameter is not used.</param> /// <returns>The list of related items.</returns> /// <remarks>Only items specified in <see cref="ItemSubset"/> will be returned.</remarks> public IEnumerable <Item> GetRelatedItems(Item item, int relatedItemCount, DummyFeatureSource featureSource = null) { Debug.Assert(item != null, "A valid item should be provided."); if (!this.itemToId.ContainsKey(item)) { throw new NotSupportedException("Cold items are not supported by this recommender."); } if (this.lastFindRelatedItemsContext == null || this.lastFindRelatedItemsContext.IsNewContextNeeded(item, relatedItemCount)) { this.lastFindRelatedItemsContext = new LazyFindRelatedItemsContext(this, relatedItemCount); } return(this.lastFindRelatedItemsContext.CreateLazyPredictionResults(item, this.itemSubset)); }
/// <summary> /// Returns a list of users related to <paramref name="user"/>. /// </summary> /// <param name="user">The user for which related users should be found.</param> /// <param name="relatedUserCount">The maximum number of related users to return.</param> /// <param name="featureSource">The parameter is not used.</param> /// <returns>The list of related users.</returns> /// <remarks>Only users specified in <see cref="UserSubset"/> will be returned.</remarks> public IEnumerable <User> GetRelatedUsers(User user, int relatedUserCount, DummyFeatureSource featureSource = null) { Debug.Assert(user != null, "A valid user should be provided."); if (!this.userToId.ContainsKey(user)) { throw new NotSupportedException("Cold users are not supported by this recommender."); } if (this.lastFindRelatedUsersContext == null || this.lastFindRelatedUsersContext.IsNewContextNeeded(user, relatedUserCount)) { this.lastFindRelatedUsersContext = new LazyFindRelatedUsersContext(this, relatedUserCount); } return(this.lastFindRelatedUsersContext.CreateLazyPredictionResults(user, this.userSubset)); }
/// <summary> /// Recommends items to a given user. /// </summary> /// <param name="user">The user to recommend items to.</param> /// <param name="recommendationCount">The maximum number of items to recommend.</param> /// <param name="featureSource">The parameter is not used.</param> /// <returns>The list of recommended items.</returns> /// <remarks>Only items specified in <see cref="ItemSubset"/> can be recommended.</remarks> public IEnumerable <Item> Recommend(User user, int recommendationCount, DummyFeatureSource featureSource = null) { Debug.Assert(user != null, "A valid user should be provided."); if (!this.trainingUsers.Contains(user)) { throw new NotSupportedException("Cold users are not supported by this recommender."); } if (this.lastRecommendationContext == null || this.lastRecommendationContext.IsNewContextNeeded(user, recommendationCount)) { this.lastRecommendationContext = new LazyRecommendationContext(this, recommendationCount); } return(this.lastRecommendationContext.CreateLazyPredictionResults(user, this.itemSubset)); }
/// <summary> /// Returns a list of related items to each item in <paramref name="items"/>. /// </summary> /// <param name="items">The parameter is not used.</param> /// <param name="relatedItemCount">The parameter is not used.</param> /// <param name="featureSource">The parameter is not used.</param> /// <returns>Nothing, since the method always throws.</returns> public IDictionary <Item, IEnumerable <Item> > GetRelatedItems( IEnumerable <Item> items, int relatedItemCount, DummyFeatureSource featureSource = null) { throw new NotSupportedException("Bulk related item prediction is not supported by this recommender."); }
/// <summary> /// This query is not supported. /// </summary> /// <param name="users">The parameter is not used.</param> /// <param name="relatedUserCount">The parameter is not used.</param> /// <param name="featureSource">The parameter is not used.</param> /// <returns>Nothing, since the method always throws.</returns> public IDictionary <User, IEnumerable <User> > GetRelatedUsers( IEnumerable <User> users, int relatedUserCount, DummyFeatureSource featureSource = null) { throw new NotSupportedException("Bulk related user prediction is not supported by this recommender."); }
/// <summary> /// Recommends items with their rating distributions to a specified list of users. /// </summary> /// <param name="users">The list of users to recommend items to.</param> /// <param name="recommendationCount">Maximum number of items to recommend to a single user.</param> /// <param name="featureSource">The source of features for the specified users.</param> /// <returns>The list of recommended items and their rating distributions for every user from <paramref name="users"/>.</returns> /// <remarks>Only items specified in <see cref="ItemSubset"/> can be recommended.</remarks> public IDictionary <User, IEnumerable <Tuple <Item, RatingDistribution> > > RecommendDistribution( IEnumerable <User> users, int recommendationCount, DummyFeatureSource featureSource = null) { throw new NotSupportedException("Item recommendation with rating distributions is not supported by this recommender."); }
/// <summary> /// This query is not supported. /// </summary> /// <param name="users">The parameter is not used.</param> /// <param name="recommendationCount">The parameter is not used.</param> /// <param name="featureSource">The parameter is not used.</param> /// <returns>Nothing, since the method always throws.</returns> public IDictionary <User, IEnumerable <Item> > Recommend( IEnumerable <User> users, int recommendationCount, DummyFeatureSource featureSource = null) { throw new NotSupportedException("Item recommendation is not supported by this recommender."); }
/// <summary> /// This query is not supported. /// </summary> /// <param name="instanceSource">The parameter is not used.</param> /// <param name="featureSource">The parameter is not used.</param> /// <returns>Nothing, since the method always throws.</returns> public IDictionary <User, IDictionary <Item, RatingDistribution> > PredictDistribution( TInstanceSource instanceSource, DummyFeatureSource featureSource = null) { throw new NotSupportedException("Uncertain rating prediction is not supported by this recommender."); }
/// <summary> /// This query is not supported. /// </summary> /// <param name="user">The parameter is not used.</param> /// <param name="item">The parameter is not used.</param> /// <param name="featureSource">The parameter is not used.</param> /// <returns>Nothing, since the method always throws.</returns> public RatingDistribution PredictDistribution(User user, Item item, DummyFeatureSource featureSource = null) { throw new NotSupportedException("Uncertain rating prediction is not supported by this recommender."); }
/// <summary> /// This query is not supported. /// </summary> /// <param name="instanceSource">The instances to predict ratings for.</param> /// <param name="featureSource">The parameter is not used.</param> /// <returns>The predicted ratings.</returns> public IDictionary <User, IDictionary <Item, int> > Predict( TInstanceSource instanceSource, DummyFeatureSource featureSource = null) { string queryFileName = null; string predictionsFileName = null; try { IEnumerable <RatedUserItem> observations = this.mapping.GetInstances(instanceSource).ToList(); if (observations.Any(o => !this.userToId.ContainsKey(o.User) || !this.itemToId.ContainsKey(o.Item))) { throw new NotSupportedException("Cold users and items are not supported by this recommender."); } predictionsFileName = Path.GetTempFileName(); queryFileName = this.CreateRatingPredictionQueryFile(observations); string command = null; switch (this.Settings.RatingPredictionAlgorithm) { case MahoutRatingPredictionAlgorithm.UserBased: command = string.Format( "PredictRatings_UserBased \"{0}\" \"{1}\" \"{2}\" {3} {4}", this.trainingDatasetFile, queryFileName, predictionsFileName, this.Settings.RatingSimilarity, this.Settings.UserNeighborhoodSize); break; case MahoutRatingPredictionAlgorithm.ItemBased: command = string.Format( "PredictRatings_ItemBased \"{0}\" \"{1}\" \"{2}\" {3}", this.trainingDatasetFile, queryFileName, predictionsFileName, this.Settings.RatingSimilarity); break; case MahoutRatingPredictionAlgorithm.SlopeOne: command = string.Format( "PredictRatings_SlopeOne \"{0}\" \"{1}\" \"{2}\"", this.trainingDatasetFile, queryFileName, predictionsFileName); break; case MahoutRatingPredictionAlgorithm.Svd: command = string.Format( "PredictRatings_Svd \"{0}\" \"{1}\" \"{2}\" {3} {4}", this.trainingDatasetFile, queryFileName, predictionsFileName, this.Settings.TraitCount, this.Settings.IterationCount); break; default: Debug.Fail("Unsupported rating prediction algorithm."); break; } this.RunMahoutWrapper(command); return(this.ReadRatingPredictions(predictionsFileName)); } finally { if (queryFileName != null) { File.Delete(queryFileName); } if (predictionsFileName != null) { File.Delete(predictionsFileName); } } }
/// <summary> /// This query is not supported. /// </summary> /// <param name="user">The parameter is not used.</param> /// <param name="item">The parameter is not used.</param> /// <param name="featureSource">The parameter is not used.</param> /// <returns>Nothing, since the method always throws.</returns> public int Predict(User user, Item item, DummyFeatureSource featureSource = null) { throw new NotSupportedException("Only bulk rating prediction is supported by this recommender."); }
/// <summary> /// This query is not supported. /// </summary> /// <param name="item">The item for which related items should be found.</param> /// <param name="relatedItemCount">The maximum number of related items to return.</param> /// <param name="featureSource">The source of the features for the items. Unused.</param> /// <returns>Nothing, since the method always throws.</returns> public IEnumerable <Item> GetRelatedItems(Item item, int relatedItemCount, DummyFeatureSource featureSource = null) { throw new NotSupportedException("Related item prediction is not supported by this recommender"); }
/// <summary> /// This query is not supported. /// </summary> /// <param name="user">The parameter is not used.</param> /// <param name="relatedUserCount">The parameter is not used.</param> /// <param name="featureSource">The parameter is not used.</param> /// <returns>Nothing, since the method always throws.</returns> public IEnumerable <User> GetRelatedUsers(User user, int relatedUserCount, DummyFeatureSource featureSource = null) { throw new NotSupportedException("Related user prediction is not supported by this recommender"); }