/// <summary> /// Runs the module. /// </summary> /// <param name="args">The command line arguments for the module.</param> /// <param name="usagePrefix">The prefix to print before the usage string.</param> /// <returns>True if the run was successful, false otherwise.</returns> public override bool Run(string[] args, string usagePrefix) { string datasetFile = string.Empty; string trainedModelFile = string.Empty; string predictionsFile = string.Empty; int maxRelatedUserCount = 5; int minCommonRatingCount = 5; int minRelatedUserPoolSize = 5; var parser = new CommandLineParser(); parser.RegisterParameterHandler("--data", "FILE", "Dataset to make predictions for", v => datasetFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--model", "FILE", "File with trained model", v => trainedModelFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--predictions", "FILE", "File with generated predictions", v => predictionsFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--max-users", "NUM", "Maximum number of related users for a single user; defaults to 5", v => maxRelatedUserCount = v, CommandLineParameterType.Optional); parser.RegisterParameterHandler("--min-common-items", "NUM", "Minimum number of items that the query user and the related user should have rated in common; defaults to 5", v => minCommonRatingCount = v, CommandLineParameterType.Optional); parser.RegisterParameterHandler("--min-pool-size", "NUM", "Minimum size of the related user pool for a single user; defaults to 5", v => minRelatedUserPoolSize = v, CommandLineParameterType.Optional); if (!parser.TryParse(args, usagePrefix)) { return(false); } RecommenderDataset testDataset = RecommenderDataset.Load(datasetFile); var trainedModel = MatchboxRecommender.Load <RecommenderDataset, User, Item, DummyFeatureSource>(trainedModelFile); var evaluator = new RecommenderEvaluator <RecommenderDataset, User, Item, int, int, Discrete>( Mappings.StarRatingRecommender.ForEvaluation()); IDictionary <User, IEnumerable <User> > relatedUsers = evaluator.FindRelatedUsersWhoRatedSameItems( trainedModel, testDataset, maxRelatedUserCount, minCommonRatingCount, minRelatedUserPoolSize); RecommenderPersistenceUtils.SaveRelatedUsers(predictionsFile, relatedUsers); return(true); }
/// <summary> /// Runs the module. /// </summary> /// <param name="args">The command line arguments for the module.</param> /// <param name="usagePrefix">The prefix to print before the usage string.</param> /// <returns>True if the run was successful, false otherwise.</returns> public override bool Run(string[] args, string usagePrefix) { string testDatasetFile = string.Empty; string predictionsFile = string.Empty; string reportFile = string.Empty; var parser = new CommandLineParser(); parser.RegisterParameterHandler("--test-data", "FILE", "Test dataset used to obtain ground truth", v => testDatasetFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--predictions", "FILE", "Predictions to evaluate", v => predictionsFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--report", "FILE", "Evaluation report file", v => reportFile = v, CommandLineParameterType.Required); if (!parser.TryParse(args, usagePrefix)) { return(false); } RecommenderDataset testDataset = RecommenderDataset.Load(testDatasetFile); IDictionary <User, IDictionary <Item, int> > ratingPredictions = RecommenderPersistenceUtils.LoadPredictedRatings(predictionsFile); var evaluatorMapping = Mappings.StarRatingRecommender.ForEvaluation(); var evaluator = new StarRatingRecommenderEvaluator <RecommenderDataset, User, Item, int>(evaluatorMapping); using (var writer = new StreamWriter(reportFile)) { writer.WriteLine( "Mean absolute error: {0:0.000}", evaluator.RatingPredictionMetric(testDataset, ratingPredictions, Metrics.AbsoluteError)); writer.WriteLine( "Root mean squared error: {0:0.000}", Math.Sqrt(evaluator.RatingPredictionMetric(testDataset, ratingPredictions, Metrics.SquaredError))); } return(true); }
/// <summary> /// Runs the module. /// </summary> /// <param name="args">The command line arguments for the module.</param> /// <param name="usagePrefix">The prefix to print before the usage string.</param> /// <returns>True if the run was successful, false otherwise.</returns> public override bool Run(string[] args, string usagePrefix) { string inputDatasetFile = string.Empty; string outputDatasetFile = string.Empty; var parser = new CommandLineParser(); parser.RegisterParameterHandler("--input-data", "FILE", "Input dataset, treated as if all the ratings are positive", v => inputDatasetFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--output-data", "FILE", "Output dataset with both posisitve and negative data", v => outputDatasetFile = v, CommandLineParameterType.Required); if (!parser.TryParse(args, usagePrefix)) { return(false); } var generatorMapping = Mappings.StarRatingRecommender.WithGeneratedNegativeData(); var inputDataset = RecommenderDataset.Load(inputDatasetFile); var outputDataset = new RecommenderDataset( generatorMapping.GetInstances(inputDataset).Select(i => new RatedUserItem(i.User, i.Item, i.Rating)), generatorMapping.GetRatingInfo(inputDataset)); outputDataset.Save(outputDatasetFile); return(true); }
/// <summary> /// Runs the module. /// </summary> /// <param name="args">The command line arguments for the module.</param> /// <param name="usagePrefix">The prefix to print before the usage string.</param> /// <returns>True if the run was successful, false otherwise.</returns> public override bool Run(string[] args, string usagePrefix) { string inputDatasetFile = string.Empty; string outputTrainingDatasetFile = string.Empty; string outputTestDatasetFile = string.Empty; double trainingOnlyUserFraction = 0.5; double testUserRatingTrainingFraction = 0.25; double coldUserFraction = 0; double coldItemFraction = 0; double ignoredUserFraction = 0; double ignoredItemFraction = 0; bool removeOccasionalColdItems = false; var parser = new CommandLineParser(); parser.RegisterParameterHandler("--input-data", "FILE", "Dataset to split", v => inputDatasetFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--output-data-train", "FILE", "Training part of the split dataset", v => outputTrainingDatasetFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--output-data-test", "FILE", "Test part of the split dataset", v => outputTestDatasetFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--training-users", "NUM", "Fraction of training-only users; defaults to 0.5", (double v) => trainingOnlyUserFraction = v, CommandLineParameterType.Optional); parser.RegisterParameterHandler("--test-user-training-ratings", "NUM", "Fraction of test user ratings for training; defaults to 0.25", (double v) => testUserRatingTrainingFraction = v, CommandLineParameterType.Optional); parser.RegisterParameterHandler("--cold-users", "NUM", "Fraction of cold (test-only) users; defaults to 0", (double v) => coldUserFraction = v, CommandLineParameterType.Optional); parser.RegisterParameterHandler("--cold-items", "NUM", "Fraction of cold (test-only) items; defaults to 0", (double v) => coldItemFraction = v, CommandLineParameterType.Optional); parser.RegisterParameterHandler("--ignored-users", "NUM", "Fraction of ignored users; defaults to 0", (double v) => ignoredUserFraction = v, CommandLineParameterType.Optional); parser.RegisterParameterHandler("--ignored-items", "NUM", "Fraction of ignored items; defaults to 0", (double v) => ignoredItemFraction = v, CommandLineParameterType.Optional); parser.RegisterParameterHandler("--remove-occasional-cold-items", "Remove occasionally produced cold items", () => removeOccasionalColdItems = true); if (!parser.TryParse(args, usagePrefix)) { return(false); } var splittingMapping = Mappings.StarRatingRecommender.SplitToTrainTest( trainingOnlyUserFraction, testUserRatingTrainingFraction, coldUserFraction, coldItemFraction, ignoredUserFraction, ignoredItemFraction, removeOccasionalColdItems); var inputDataset = RecommenderDataset.Load(inputDatasetFile); var outputTrainingDataset = new RecommenderDataset( splittingMapping.GetInstances(SplitInstanceSource.Training(inputDataset)), inputDataset.StarRatingInfo); outputTrainingDataset.Save(outputTrainingDatasetFile); var outputTestDataset = new RecommenderDataset( splittingMapping.GetInstances(SplitInstanceSource.Test(inputDataset)), inputDataset.StarRatingInfo); outputTestDataset.Save(outputTestDatasetFile); return(true); }
/// <summary> /// Runs the module. /// </summary> /// <param name="args">The command line arguments for the module.</param> /// <param name="usagePrefix">The prefix to print before the usage string.</param> /// <returns>True if the run was successful, false otherwise.</returns> public override bool Run(string[] args, string usagePrefix) { string datasetFile = string.Empty; string trainedModelFile = string.Empty; string predictionsFile = string.Empty; var parser = new CommandLineParser(); parser.RegisterParameterHandler("--data", "FILE", "Dataset to make predictions for", v => datasetFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--model", "FILE", "File with trained model", v => trainedModelFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--predictions", "FILE", "File with generated predictions", v => predictionsFile = v, CommandLineParameterType.Required); if (!parser.TryParse(args, usagePrefix)) { return false; } RecommenderDataset testDataset = RecommenderDataset.Load(datasetFile); var trainedModel = MatchboxRecommender.Load<RecommenderDataset, User, Item, DummyFeatureSource>(trainedModelFile); IDictionary<User, IDictionary<Item, int>> predictions = trainedModel.Predict(testDataset); RecommenderPersistenceUtils.SavePredictedRatings(predictionsFile, predictions); return true; }
/// <summary> /// Runs the module. /// </summary> /// <param name="args">The command line arguments for the module.</param> /// <param name="usagePrefix">The prefix to print before the usage string.</param> /// <returns>True if the run was successful, false otherwise.</returns> public override bool Run(string[] args, string usagePrefix) { string testDatasetFile = string.Empty; string predictionsFile = string.Empty; string reportFile = string.Empty; var parser = new CommandLineParser(); parser.RegisterParameterHandler("--test-data", "FILE", "Test dataset used to obtain ground truth", v => testDatasetFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--predictions", "FILE", "Predictions to evaluate", v => predictionsFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--report", "FILE", "Evaluation report file", v => reportFile = v, CommandLineParameterType.Required); if (!parser.TryParse(args, usagePrefix)) { return(false); } RecommenderDataset testDataset = RecommenderDataset.Load(testDatasetFile); int minRating = Mappings.StarRatingRecommender.GetRatingInfo(testDataset).MinStarRating; IDictionary <User, IEnumerable <Item> > recommendedItems = RecommenderPersistenceUtils.LoadRecommendedItems(predictionsFile); var evaluatorMapping = Mappings.StarRatingRecommender.ForEvaluation(); var evaluator = new StarRatingRecommenderEvaluator <RecommenderDataset, User, Item, int>(evaluatorMapping); using (var writer = new StreamWriter(reportFile)) { writer.WriteLine( "NDCG: {0:0.000}", evaluator.ItemRecommendationMetric( testDataset, recommendedItems, Metrics.Ndcg, rating => Convert.ToDouble(rating) - minRating + 1)); } return(true); }
/// <summary> /// Runs the module. /// </summary> /// <param name="args">The command line arguments for the module.</param> /// <param name="usagePrefix">The prefix to print before the usage string.</param> /// <returns>True if the run was successful, false otherwise.</returns> public override bool Run(string[] args, string usagePrefix) { string testDatasetFile = string.Empty; string predictionsFile = string.Empty; string reportFile = string.Empty; int minCommonRatingCount = 5; var parser = new CommandLineParser(); parser.RegisterParameterHandler("--test-data", "FILE", "Test dataset used to obtain ground truth", v => testDatasetFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--predictions", "FILE", "Predictions to evaluate", v => predictionsFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--report", "FILE", "Evaluation report file", v => reportFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--min-common-items", "NUM", "Minimum number of users that the query item and the related item should have been rated by in common; defaults to 5", v => minCommonRatingCount = v, CommandLineParameterType.Optional); if (!parser.TryParse(args, usagePrefix)) { return(false); } RecommenderDataset testDataset = RecommenderDataset.Load(testDatasetFile); IDictionary <Item, IEnumerable <Item> > relatedItems = RecommenderPersistenceUtils.LoadRelatedItems(predictionsFile); var evaluatorMapping = Mappings.StarRatingRecommender.ForEvaluation(); var evaluator = new StarRatingRecommenderEvaluator <RecommenderDataset, User, Item, int>(evaluatorMapping); using (var writer = new StreamWriter(reportFile)) { writer.WriteLine( "L1 Sim NDCG: {0:0.000}", evaluator.RelatedItemsMetric(testDataset, relatedItems, minCommonRatingCount, Metrics.Ndcg, Metrics.NormalizedManhattanSimilarity)); writer.WriteLine( "L2 Sim NDCG: {0:0.000}", evaluator.RelatedItemsMetric(testDataset, relatedItems, minCommonRatingCount, Metrics.Ndcg, Metrics.NormalizedEuclideanSimilarity)); } return(true); }
/// <summary> /// Loads dataset from a given file. /// <para> /// Data file format: /// Row starting with 'R' describes min and max ratings and has form 'R,Min,Max'. /// Rows starting with 'U' describe a single user and have form 'U,UserId,UserFeatures'. /// Rows starting with 'I' describe a single item and have form 'I,ItemId,ItemFeatures'. /// Rows other than that describe instances and should have form 'UserID,ItemID,Rating'. /// Feature description has form 'FeatureIndex1:Value1|FeatureIndex2:Value2|...' /// If all the user features are zero or there are no user features in the dataset at all, the user description can be omitted. Same is true for items. /// </para> /// </summary> /// <param name="fileName">File to load data from.</param> /// <returns>The loaded dataset.</returns> public static RecommenderDataset Load(string fileName) { var rawObservations = new List <Tuple <string, string, int> >(); var userIdToFeatures = new Dictionary <string, Vector>(); var itemIdToFeatures = new Dictionary <string, Vector>(); int?minRating = null, maxRating = null; int userFeatureCount = 0, itemFeatureCount = 0; var parsingContext = new FileParsingContext(fileName); using (var reader = new StreamReader(fileName)) { string line; bool isFirstRecord = true; while ((line = reader.ReadLine()) != null) { parsingContext.NextLine(line); if (line.Length == 0 || line.StartsWith("#")) { continue; // Skip comments and empty lines } string[] splits = line.Split(','); if (isFirstRecord) { //// Parse rating record int minRatingValue = 0, maxRatingValue = 0; if (splits.Length != 3 || splits[0].Trim() != "R" || !int.TryParse(splits[1], out minRatingValue) || !int.TryParse(splits[2], out maxRatingValue)) { parsingContext.RaiseError("Invalid rating info record."); } minRating = minRatingValue; maxRating = maxRatingValue; isFirstRecord = false; } else if (splits[0].Trim() == "U") { //// Parse user record if (splits.Length != 3) { parsingContext.RaiseError("Invalid user record."); } string userId = splits[1].Trim(); if (userIdToFeatures.ContainsKey(userId)) { parsingContext.RaiseError("Record describing user '{0}' is presented more than once.", userId); } Vector features = ParseFeatures(splits[2], parsingContext, ref userFeatureCount); userIdToFeatures.Add(userId, features); } else if (splits[0].Trim() == "I") { //// Parse item record if (splits.Length != 3) { parsingContext.RaiseError("Invalid item record."); } string itemId = splits[1].Trim(); if (itemIdToFeatures.ContainsKey(itemId)) { parsingContext.RaiseError("Record describing item '{0}' is presented more than once.", itemId); } Vector features = ParseFeatures(splits[2], parsingContext, ref itemFeatureCount); itemIdToFeatures.Add(itemId, features); } else { //// Parse instance record string userId = splits[0].Trim(); string itemId = splits[1].Trim(); int rating = 0; if (splits.Length != 3 || !int.TryParse(splits[2], out rating)) { parsingContext.RaiseError("Invalid instance record.", line); } rawObservations.Add(Tuple.Create(userId, itemId, rating)); } } } if (!minRating.HasValue) { parsingContext.RaiseGlobalError("Rating info is missing."); } var result = new RecommenderDataset { StarRatingInfo = new StarRatingInfo(minRating.Value, maxRating.Value) }; foreach (var observation in rawObservations) { string userId = observation.Item1; string itemId = observation.Item2; int rating = observation.Item3; if (rating < minRating.Value || rating > maxRating.Value) { parsingContext.RaiseGlobalError("One of the ratings is inconsistent with the specified rating info."); } User user = RetrieveEntity(userId, result.idToUser, userIdToFeatures, userFeatureCount, (id, features) => new User(id, features)); Item item = RetrieveEntity(itemId, result.idToItem, itemIdToFeatures, itemFeatureCount, (id, features) => new Item(id, features)); result.observations.Add(new RatedUserItem(user, item, rating)); } return(result); }