/// <summary> /// Runs the module. /// </summary> /// <param name="args">The command line arguments for the module.</param> /// <param name="usagePrefix">The prefix to print before the usage string.</param> /// <returns>True if the run was successful, false otherwise.</returns> public override bool Run(string[] args, string usagePrefix) { string inputDatasetFile = string.Empty; string outputDatasetFile = string.Empty; var parser = new CommandLineParser(); parser.RegisterParameterHandler("--input-data", "FILE", "Input dataset, treated as if all the ratings are positive", v => inputDatasetFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--output-data", "FILE", "Output dataset with both posisitve and negative data", v => outputDatasetFile = v, CommandLineParameterType.Required); if (!parser.TryParse(args, usagePrefix)) { return(false); } var generatorMapping = Mappings.StarRatingRecommender.WithGeneratedNegativeData(); var inputDataset = RecommenderDataset.Load(inputDatasetFile); var outputDataset = new RecommenderDataset( generatorMapping.GetInstances(inputDataset).Select(i => new RatedUserItem(i.User, i.Item, i.Rating)), generatorMapping.GetRatingInfo(inputDataset)); outputDataset.Save(outputDatasetFile); return(true); }
/// <summary> /// Runs the module. /// </summary> /// <param name="args">The command line arguments for the module.</param> /// <param name="usagePrefix">The prefix to print before the usage string.</param> /// <returns>True if the run was successful, false otherwise.</returns> public override bool Run(string[] args, string usagePrefix) { string datasetFile = string.Empty; string trainedModelFile = string.Empty; string predictionsFile = string.Empty; var parser = new CommandLineParser(); parser.RegisterParameterHandler("--data", "FILE", "Dataset to make predictions for", v => datasetFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--model", "FILE", "File with trained model", v => trainedModelFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--predictions", "FILE", "File with generated predictions", v => predictionsFile = v, CommandLineParameterType.Required); if (!parser.TryParse(args, usagePrefix)) { return(false); } RecommenderDataset testDataset = RecommenderDataset.Load(datasetFile); var trainedModel = MatchboxRecommender.Load <RecommenderDataset, User, Item, RatingDistribution, DummyFeatureSource>(trainedModelFile); IDictionary <User, IDictionary <Item, int> > predictions = trainedModel.Predict(testDataset); RecommenderPersistenceUtils.SavePredictedRatings(predictionsFile, predictions); return(true); }
/// <summary> /// Runs the module. /// </summary> /// <param name="args">The command line arguments for the module.</param> /// <param name="usagePrefix">The prefix to print before the usage string.</param> /// <returns>True if the run was successful, false otherwise.</returns> public override bool Run(string[] args, string usagePrefix) { string datasetFile = string.Empty; string trainedModelFile = string.Empty; string predictionsFile = string.Empty; int maxRelatedItemCount = 5; int minCommonRatingCount = 5; int minRelatedItemPoolSize = 5; var parser = new CommandLineParser(); parser.RegisterParameterHandler("--data", "FILE", "Dataset to make predictions for", v => datasetFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--model", "FILE", "File with trained model", v => trainedModelFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--predictions", "FILE", "File with generated predictions", v => predictionsFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--max-items", "NUM", "Maximum number of related items for a single item; defaults to 5", v => maxRelatedItemCount = v, CommandLineParameterType.Optional); parser.RegisterParameterHandler("--min-common-users", "NUM", "Minimum number of users that the query item and the related item should have been rated by in common; defaults to 5", v => minCommonRatingCount = v, CommandLineParameterType.Optional); parser.RegisterParameterHandler("--min-pool-size", "NUM", "Minimum size of the related item pool for a single item; defaults to 5", v => minRelatedItemPoolSize = v, CommandLineParameterType.Optional); if (!parser.TryParse(args, usagePrefix)) { return(false); } RecommenderDataset testDataset = RecommenderDataset.Load(datasetFile); var trainedModel = MatchboxRecommender.Load <RecommenderDataset, User, Item, RatingDistribution, DummyFeatureSource>(trainedModelFile); var evaluator = new RecommenderEvaluator <RecommenderDataset, User, Item, int, int, RatingDistribution>( Mappings.StarRatingRecommender.ForEvaluation()); IDictionary <Item, IEnumerable <Item> > relatedItems = evaluator.FindRelatedItemsRatedBySameUsers( trainedModel, testDataset, maxRelatedItemCount, minCommonRatingCount, minRelatedItemPoolSize); RecommenderPersistenceUtils.SaveRelatedItems(predictionsFile, relatedItems); return(true); }
/// <summary> /// Runs the module. /// </summary> /// <param name="args">The command line arguments for the module.</param> /// <param name="usagePrefix">The prefix to print before the usage string.</param> /// <returns>True if the run was successful, false otherwise.</returns> public override bool Run(string[] args, string usagePrefix) { string testDatasetFile = string.Empty; string predictionsFile = string.Empty; string reportFile = string.Empty; var parser = new CommandLineParser(); parser.RegisterParameterHandler("--test-data", "FILE", "Test dataset used to obtain ground truth", v => testDatasetFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--predictions", "FILE", "Predictions to evaluate", v => predictionsFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--report", "FILE", "Evaluation report file", v => reportFile = v, CommandLineParameterType.Required); if (!parser.TryParse(args, usagePrefix)) { return(false); } RecommenderDataset testDataset = RecommenderDataset.Load(testDatasetFile); IDictionary <User, IDictionary <Item, int> > ratingPredictions = RecommenderPersistenceUtils.LoadPredictedRatings(predictionsFile); var evaluatorMapping = Mappings.StarRatingRecommender.ForEvaluation(); var evaluator = new StarRatingRecommenderEvaluator <RecommenderDataset, User, Item, int>(evaluatorMapping); using (var writer = new StreamWriter(reportFile)) { writer.WriteLine( "Mean absolute error: {0:0.000}", evaluator.RatingPredictionMetric(testDataset, ratingPredictions, Metrics.AbsoluteError)); writer.WriteLine( "Root mean squared error: {0:0.000}", Math.Sqrt(evaluator.RatingPredictionMetric(testDataset, ratingPredictions, Metrics.SquaredError))); } return(true); }
/// <summary> /// Runs the module. /// </summary> /// <param name="args">The command line arguments for the module.</param> /// <param name="usagePrefix">The prefix to print before the usage string.</param> /// <returns>True if the run was successful, false otherwise.</returns> public override bool Run(string[] args, string usagePrefix) { string inputDatasetFile = string.Empty; string outputTrainingDatasetFile = string.Empty; string outputTestDatasetFile = string.Empty; double trainingOnlyUserFraction = 0.5; double testUserRatingTrainingFraction = 0.25; double coldUserFraction = 0; double coldItemFraction = 0; double ignoredUserFraction = 0; double ignoredItemFraction = 0; bool removeOccasionalColdItems = false; var parser = new CommandLineParser(); parser.RegisterParameterHandler("--input-data", "FILE", "Dataset to split", v => inputDatasetFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--output-data-train", "FILE", "Training part of the split dataset", v => outputTrainingDatasetFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--output-data-test", "FILE", "Test part of the split dataset", v => outputTestDatasetFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--training-users", "NUM", "Fraction of training-only users; defaults to 0.5", (double v) => trainingOnlyUserFraction = v, CommandLineParameterType.Optional); parser.RegisterParameterHandler("--test-user-training-ratings", "NUM", "Fraction of test user ratings for training; defaults to 0.25", (double v) => testUserRatingTrainingFraction = v, CommandLineParameterType.Optional); parser.RegisterParameterHandler("--cold-users", "NUM", "Fraction of cold (test-only) users; defaults to 0", (double v) => coldUserFraction = v, CommandLineParameterType.Optional); parser.RegisterParameterHandler("--cold-items", "NUM", "Fraction of cold (test-only) items; defaults to 0", (double v) => coldItemFraction = v, CommandLineParameterType.Optional); parser.RegisterParameterHandler("--ignored-users", "NUM", "Fraction of ignored users; defaults to 0", (double v) => ignoredUserFraction = v, CommandLineParameterType.Optional); parser.RegisterParameterHandler("--ignored-items", "NUM", "Fraction of ignored items; defaults to 0", (double v) => ignoredItemFraction = v, CommandLineParameterType.Optional); parser.RegisterParameterHandler("--remove-occasional-cold-items", "Remove occasionally produced cold items", () => removeOccasionalColdItems = true); if (!parser.TryParse(args, usagePrefix)) { return(false); } var splittingMapping = Mappings.StarRatingRecommender.SplitToTrainTest( trainingOnlyUserFraction, testUserRatingTrainingFraction, coldUserFraction, coldItemFraction, ignoredUserFraction, ignoredItemFraction, removeOccasionalColdItems); var inputDataset = RecommenderDataset.Load(inputDatasetFile); var outputTrainingDataset = new RecommenderDataset( splittingMapping.GetInstances(SplitInstanceSource.Training(inputDataset)), inputDataset.StarRatingInfo); outputTrainingDataset.Save(outputTrainingDatasetFile); var outputTestDataset = new RecommenderDataset( splittingMapping.GetInstances(SplitInstanceSource.Test(inputDataset)), inputDataset.StarRatingInfo); outputTestDataset.Save(outputTestDatasetFile); return(true); }
/// <summary> /// Generate dataset if it necessary and loads the dataset using the settings from this configuration element. /// </summary> /// <returns>The loaded dataset.</returns> public override RecommenderDataset Load() { if (!File.Exists(FileName)) { Type t = Type.GetType(Generator); if (t == null) { throw new InvalidOperationException($"{Generator} type is undefined"); } IDatasetGenerator generator = (IDatasetGenerator)Activator.CreateInstance(t); generator.Generate(FileName); } return(RecommenderDataset.Load(this.FileName)); }
/// <summary> /// Initializes a new instance of the <see cref="RecommenderRun"/> class. /// </summary> /// <param name="name">The name of the run.</param> /// <param name="dataset">The dataset to run the test on.</param> /// <param name="foldCount">The number of folds to split the dataset into.</param> /// <param name="splittingMappingFactory">The factory to create instances of the splitting mapping.</param> /// <param name="recommenderFactory">The factory to create instances of the recommender with the specified mapping.</param> /// <param name="tests">The test for the recommender.</param> public RecommenderRun( string name, RecommenderDataset dataset, int foldCount, Func <SplittingMapping> splittingMappingFactory, Func <SplittingMapping, Recommender> recommenderFactory, IEnumerable <RecommenderTest> tests) { Debug.Assert(!string.IsNullOrEmpty(name), "Test run name can not be null or empty."); Debug.Assert(dataset != null, "A valid dataset should be provided."); Debug.Assert(foldCount > 0, "A valid number of folds should be provided."); Debug.Assert(splittingMappingFactory != null, "A valid splitting mapping factory should be provided."); Debug.Assert(recommenderFactory != null, "A valid recommender factory should be provided."); Debug.Assert(tests != null, "A valid collection of recommender tests should be provided."); this.Name = name; this.RecommenderDataset = dataset; this.FoldCount = foldCount; this.SplittingMappingFactory = splittingMappingFactory; this.RecommenderFactory = recommenderFactory; this.Tests = tests.ToList(); }
/// <summary> /// Runs the module. /// </summary> /// <param name="args">The command line arguments for the module.</param> /// <param name="usagePrefix">The prefix to print before the usage string.</param> /// <returns>True if the run was successful, false otherwise.</returns> public override bool Run(string[] args, string usagePrefix) { string testDatasetFile = string.Empty; string predictionsFile = string.Empty; string reportFile = string.Empty; var parser = new CommandLineParser(); parser.RegisterParameterHandler("--test-data", "FILE", "Test dataset used to obtain ground truth", v => testDatasetFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--predictions", "FILE", "Predictions to evaluate", v => predictionsFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--report", "FILE", "Evaluation report file", v => reportFile = v, CommandLineParameterType.Required); if (!parser.TryParse(args, usagePrefix)) { return(false); } RecommenderDataset testDataset = RecommenderDataset.Load(testDatasetFile); int minRating = Mappings.StarRatingRecommender.GetRatingInfo(testDataset).MinStarRating; IDictionary <User, IEnumerable <Item> > recommendedItems = RecommenderPersistenceUtils.LoadRecommendedItems(predictionsFile); var evaluatorMapping = Mappings.StarRatingRecommender.ForEvaluation(); var evaluator = new StarRatingRecommenderEvaluator <RecommenderDataset, User, Item, int>(evaluatorMapping); using (var writer = new StreamWriter(reportFile)) { writer.WriteLine( "NDCG: {0:0.000}", evaluator.ItemRecommendationMetric( testDataset, recommendedItems, Metrics.Ndcg, rating => Convert.ToDouble(rating) - minRating + 1)); } return(true); }
/// <summary> /// Runs the module. /// </summary> /// <param name="args">The command line arguments for the module.</param> /// <param name="usagePrefix">The prefix to print before the usage string.</param> /// <returns>True if the run was successful, false otherwise.</returns> public override bool Run(string[] args, string usagePrefix) { string testDatasetFile = string.Empty; string predictionsFile = string.Empty; string reportFile = string.Empty; int minCommonRatingCount = 5; var parser = new CommandLineParser(); parser.RegisterParameterHandler("--test-data", "FILE", "Test dataset used to obtain ground truth", v => testDatasetFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--predictions", "FILE", "Predictions to evaluate", v => predictionsFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--report", "FILE", "Evaluation report file", v => reportFile = v, CommandLineParameterType.Required); parser.RegisterParameterHandler("--min-common-items", "NUM", "Minimum number of users that the query item and the related item should have been rated by in common; defaults to 5", v => minCommonRatingCount = v, CommandLineParameterType.Optional); if (!parser.TryParse(args, usagePrefix)) { return(false); } RecommenderDataset testDataset = RecommenderDataset.Load(testDatasetFile); IDictionary <Item, IEnumerable <Item> > relatedItems = RecommenderPersistenceUtils.LoadRelatedItems(predictionsFile); var evaluatorMapping = Mappings.StarRatingRecommender.ForEvaluation(); var evaluator = new StarRatingRecommenderEvaluator <RecommenderDataset, User, Item, int>(evaluatorMapping); using (var writer = new StreamWriter(reportFile)) { writer.WriteLine( "L1 Sim NDCG: {0:0.000}", evaluator.RelatedItemsMetric(testDataset, relatedItems, minCommonRatingCount, Metrics.Ndcg, Metrics.NormalizedManhattanSimilarity)); writer.WriteLine( "L2 Sim NDCG: {0:0.000}", evaluator.RelatedItemsMetric(testDataset, relatedItems, minCommonRatingCount, Metrics.Ndcg, Metrics.NormalizedEuclideanSimilarity)); } return(true); }
/// <summary> /// Loads the dataset using the settings from this configuration element. /// </summary> /// <returns>The loaded dataset.</returns> public virtual RecommenderDataset Load() { return(RecommenderDataset.Load(this.FileName)); }
/// <summary> /// Loads dataset from a given file. /// <para> /// Data file format: /// Row starting with 'R' describes min and max ratings and has form 'R,Min,Max'. /// Rows starting with 'U' describe a single user and have form 'U,UserId,UserFeatures'. /// Rows starting with 'I' describe a single item and have form 'I,ItemId,ItemFeatures'. /// Rows other than that describe instances and should have form 'UserID,ItemID,Rating'. /// Feature description has form 'FeatureIndex1:Value1|FeatureIndex2:Value2|...' /// If all the user features are zero or there are no user features in the dataset at all, the user description can be omitted. Same is true for items. /// </para> /// </summary> /// <param name="fileName">File to load data from.</param> /// <returns>The loaded dataset.</returns> public static RecommenderDataset Load(string fileName) { var rawObservations = new List <Tuple <string, string, int> >(); var userIdToFeatures = new Dictionary <string, Vector>(); var itemIdToFeatures = new Dictionary <string, Vector>(); int?minRating = null, maxRating = null; int userFeatureCount = 0, itemFeatureCount = 0; var parsingContext = new FileParsingContext(fileName); using (var reader = new StreamReader(fileName)) { string line; bool isFirstRecord = true; while ((line = reader.ReadLine()) != null) { parsingContext.NextLine(line); if (line.Length == 0 || line.StartsWith("#")) { continue; // Skip comments and empty lines } string[] splits = line.Split(','); if (isFirstRecord) { //// Parse rating record int minRatingValue = 0, maxRatingValue = 0; if (splits.Length != 3 || splits[0].Trim() != "R" || !int.TryParse(splits[1], out minRatingValue) || !int.TryParse(splits[2], out maxRatingValue)) { parsingContext.RaiseError("Invalid rating info record."); } minRating = minRatingValue; maxRating = maxRatingValue; isFirstRecord = false; } else if (splits[0].Trim() == "U") { //// Parse user record if (splits.Length != 3) { parsingContext.RaiseError("Invalid user record."); } string userId = splits[1].Trim(); if (userIdToFeatures.ContainsKey(userId)) { parsingContext.RaiseError("Record describing user '{0}' is presented more than once.", userId); } Vector features = ParseFeatures(splits[2], parsingContext, ref userFeatureCount); userIdToFeatures.Add(userId, features); } else if (splits[0].Trim() == "I") { //// Parse item record if (splits.Length != 3) { parsingContext.RaiseError("Invalid item record."); } string itemId = splits[1].Trim(); if (itemIdToFeatures.ContainsKey(itemId)) { parsingContext.RaiseError("Record describing item '{0}' is presented more than once.", itemId); } Vector features = ParseFeatures(splits[2], parsingContext, ref itemFeatureCount); itemIdToFeatures.Add(itemId, features); } else { //// Parse instance record string userId = splits[0].Trim(); string itemId = splits[1].Trim(); int rating = 0; if (splits.Length != 3 || !int.TryParse(splits[2], out rating)) { parsingContext.RaiseError("Invalid instance record.", line); } rawObservations.Add(Tuple.Create(userId, itemId, rating)); } } } if (!minRating.HasValue) { parsingContext.RaiseGlobalError("Rating info is missing."); } var result = new RecommenderDataset { StarRatingInfo = new StarRatingInfo(minRating.Value, maxRating.Value) }; foreach (var observation in rawObservations) { string userId = observation.Item1; string itemId = observation.Item2; int rating = observation.Item3; if (rating < minRating.Value || rating > maxRating.Value) { parsingContext.RaiseGlobalError("One of the ratings is inconsistent with the specified rating info."); } User user = RetrieveEntity(userId, result.idToUser, userIdToFeatures, userFeatureCount, (id, features) => new User(id, features)); Item item = RetrieveEntity(itemId, result.idToItem, itemIdToFeatures, itemFeatureCount, (id, features) => new Item(id, features)); result.observations.Add(new RatedUserItem(user, item, rating)); } return(result); }