Пример #1
0
        /// <summary>
        /// Runs the module.
        /// </summary>
        /// <param name="args">The command line arguments for the module.</param>
        /// <param name="usagePrefix">The prefix to print before the usage string.</param>
        /// <returns>True if the run was successful, false otherwise.</returns>
        public override bool Run(string[] args, string usagePrefix)
        {
            string datasetFile            = string.Empty;
            string trainedModelFile       = string.Empty;
            string predictionsFile        = string.Empty;
            int    maxRelatedUserCount    = 5;
            int    minCommonRatingCount   = 5;
            int    minRelatedUserPoolSize = 5;

            var parser = new CommandLineParser();

            parser.RegisterParameterHandler("--data", "FILE", "Dataset to make predictions for", v => datasetFile            = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--model", "FILE", "File with trained model", v => trainedModelFile              = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--predictions", "FILE", "File with generated predictions", v => predictionsFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--max-users", "NUM", "Maximum number of related users for a single user; defaults to 5", v => maxRelatedUserCount = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--min-common-items", "NUM", "Minimum number of items that the query user and the related user should have rated in common; defaults to 5", v => minCommonRatingCount = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--min-pool-size", "NUM", "Minimum size of the related user pool for a single user; defaults to 5", v => minRelatedUserPoolSize = v, CommandLineParameterType.Optional);
            if (!parser.TryParse(args, usagePrefix))
            {
                return(false);
            }

            RecommenderDataset testDataset = RecommenderDataset.Load(datasetFile);

            var trainedModel = MatchboxRecommender.Load <RecommenderDataset, User, Item, DummyFeatureSource>(trainedModelFile);
            var evaluator    = new RecommenderEvaluator <RecommenderDataset, User, Item, int, int, Discrete>(
                Mappings.StarRatingRecommender.ForEvaluation());
            IDictionary <User, IEnumerable <User> > relatedUsers = evaluator.FindRelatedUsersWhoRatedSameItems(
                trainedModel, testDataset, maxRelatedUserCount, minCommonRatingCount, minRelatedUserPoolSize);

            RecommenderPersistenceUtils.SaveRelatedUsers(predictionsFile, relatedUsers);

            return(true);
        }
Пример #2
0
        /// <summary>
        /// Runs the module.
        /// </summary>
        /// <param name="args">The command line arguments for the module.</param>
        /// <param name="usagePrefix">The prefix to print before the usage string.</param>
        /// <returns>True if the run was successful, false otherwise.</returns>
        public override bool Run(string[] args, string usagePrefix)
        {
            string testDatasetFile = string.Empty;
            string predictionsFile = string.Empty;
            string reportFile      = string.Empty;

            var parser = new CommandLineParser();

            parser.RegisterParameterHandler("--test-data", "FILE", "Test dataset used to obtain ground truth", v => testDatasetFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--predictions", "FILE", "Predictions to evaluate", v => predictionsFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--report", "FILE", "Evaluation report file", v => reportFile            = v, CommandLineParameterType.Required);
            if (!parser.TryParse(args, usagePrefix))
            {
                return(false);
            }

            RecommenderDataset testDataset = RecommenderDataset.Load(testDatasetFile);
            IDictionary <User, IDictionary <Item, int> > ratingPredictions = RecommenderPersistenceUtils.LoadPredictedRatings(predictionsFile);

            var evaluatorMapping = Mappings.StarRatingRecommender.ForEvaluation();
            var evaluator        = new StarRatingRecommenderEvaluator <RecommenderDataset, User, Item, int>(evaluatorMapping);

            using (var writer = new StreamWriter(reportFile))
            {
                writer.WriteLine(
                    "Mean absolute error: {0:0.000}",
                    evaluator.RatingPredictionMetric(testDataset, ratingPredictions, Metrics.AbsoluteError));
                writer.WriteLine(
                    "Root mean squared error: {0:0.000}",
                    Math.Sqrt(evaluator.RatingPredictionMetric(testDataset, ratingPredictions, Metrics.SquaredError)));
            }

            return(true);
        }
        /// <summary>
        /// Runs the module.
        /// </summary>
        /// <param name="args">The command line arguments for the module.</param>
        /// <param name="usagePrefix">The prefix to print before the usage string.</param>
        /// <returns>True if the run was successful, false otherwise.</returns>
        public override bool Run(string[] args, string usagePrefix)
        {
            string inputDatasetFile  = string.Empty;
            string outputDatasetFile = string.Empty;

            var parser = new CommandLineParser();

            parser.RegisterParameterHandler("--input-data", "FILE", "Input dataset, treated as if all the ratings are positive", v => inputDatasetFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--output-data", "FILE", "Output dataset with both posisitve and negative data", v => outputDatasetFile    = v, CommandLineParameterType.Required);

            if (!parser.TryParse(args, usagePrefix))
            {
                return(false);
            }

            var generatorMapping = Mappings.StarRatingRecommender.WithGeneratedNegativeData();

            var inputDataset  = RecommenderDataset.Load(inputDatasetFile);
            var outputDataset = new RecommenderDataset(
                generatorMapping.GetInstances(inputDataset).Select(i => new RatedUserItem(i.User, i.Item, i.Rating)),
                generatorMapping.GetRatingInfo(inputDataset));

            outputDataset.Save(outputDatasetFile);

            return(true);
        }
Пример #4
0
        /// <summary>
        /// Runs the module.
        /// </summary>
        /// <param name="args">The command line arguments for the module.</param>
        /// <param name="usagePrefix">The prefix to print before the usage string.</param>
        /// <returns>True if the run was successful, false otherwise.</returns>
        public override bool Run(string[] args, string usagePrefix)
        {
            string inputDatasetFile               = string.Empty;
            string outputTrainingDatasetFile      = string.Empty;
            string outputTestDatasetFile          = string.Empty;
            double trainingOnlyUserFraction       = 0.5;
            double testUserRatingTrainingFraction = 0.25;
            double coldUserFraction               = 0;
            double coldItemFraction               = 0;
            double ignoredUserFraction            = 0;
            double ignoredItemFraction            = 0;
            bool   removeOccasionalColdItems      = false;

            var parser = new CommandLineParser();

            parser.RegisterParameterHandler("--input-data", "FILE", "Dataset to split", v => inputDatasetFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--output-data-train", "FILE", "Training part of the split dataset", v => outputTrainingDatasetFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--output-data-test", "FILE", "Test part of the split dataset", v => outputTestDatasetFile          = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--training-users", "NUM", "Fraction of training-only users; defaults to 0.5", (double v) => trainingOnlyUserFraction = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--test-user-training-ratings", "NUM", "Fraction of test user ratings for training; defaults to 0.25", (double v) => testUserRatingTrainingFraction = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--cold-users", "NUM", "Fraction of cold (test-only) users; defaults to 0", (double v) => coldUserFraction   = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--cold-items", "NUM", "Fraction of cold (test-only) items; defaults to 0", (double v) => coldItemFraction   = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--ignored-users", "NUM", "Fraction of ignored users; defaults to 0", (double v) => ignoredUserFraction      = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--ignored-items", "NUM", "Fraction of ignored items; defaults to 0", (double v) => ignoredItemFraction      = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--remove-occasional-cold-items", "Remove occasionally produced cold items", () => removeOccasionalColdItems = true);

            if (!parser.TryParse(args, usagePrefix))
            {
                return(false);
            }

            var splittingMapping = Mappings.StarRatingRecommender.SplitToTrainTest(
                trainingOnlyUserFraction,
                testUserRatingTrainingFraction,
                coldUserFraction,
                coldItemFraction,
                ignoredUserFraction,
                ignoredItemFraction,
                removeOccasionalColdItems);

            var inputDataset          = RecommenderDataset.Load(inputDatasetFile);
            var outputTrainingDataset = new RecommenderDataset(
                splittingMapping.GetInstances(SplitInstanceSource.Training(inputDataset)),
                inputDataset.StarRatingInfo);

            outputTrainingDataset.Save(outputTrainingDatasetFile);
            var outputTestDataset = new RecommenderDataset(
                splittingMapping.GetInstances(SplitInstanceSource.Test(inputDataset)),
                inputDataset.StarRatingInfo);

            outputTestDataset.Save(outputTestDatasetFile);

            return(true);
        }
        /// <summary>
        /// Runs the module.
        /// </summary>
        /// <param name="args">The command line arguments for the module.</param>
        /// <param name="usagePrefix">The prefix to print before the usage string.</param>
        /// <returns>True if the run was successful, false otherwise.</returns>
        public override bool Run(string[] args, string usagePrefix)
        {
            string datasetFile = string.Empty;
            string trainedModelFile = string.Empty;
            string predictionsFile = string.Empty;
            
            var parser = new CommandLineParser();
            parser.RegisterParameterHandler("--data", "FILE", "Dataset to make predictions for", v => datasetFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--model", "FILE", "File with trained model", v => trainedModelFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--predictions", "FILE", "File with generated predictions", v => predictionsFile = v, CommandLineParameterType.Required);
            if (!parser.TryParse(args, usagePrefix))
            {
                return false;
            }

            RecommenderDataset testDataset = RecommenderDataset.Load(datasetFile);
            
            var trainedModel = MatchboxRecommender.Load<RecommenderDataset, User, Item, DummyFeatureSource>(trainedModelFile);
            IDictionary<User, IDictionary<Item, int>> predictions = trainedModel.Predict(testDataset);
            RecommenderPersistenceUtils.SavePredictedRatings(predictionsFile, predictions);

            return true;
        }
        /// <summary>
        /// Runs the module.
        /// </summary>
        /// <param name="args">The command line arguments for the module.</param>
        /// <param name="usagePrefix">The prefix to print before the usage string.</param>
        /// <returns>True if the run was successful, false otherwise.</returns>
        public override bool Run(string[] args, string usagePrefix)
        {
            string testDatasetFile = string.Empty;
            string predictionsFile = string.Empty;
            string reportFile      = string.Empty;

            var parser = new CommandLineParser();

            parser.RegisterParameterHandler("--test-data", "FILE", "Test dataset used to obtain ground truth", v => testDatasetFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--predictions", "FILE", "Predictions to evaluate", v => predictionsFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--report", "FILE", "Evaluation report file", v => reportFile            = v, CommandLineParameterType.Required);
            if (!parser.TryParse(args, usagePrefix))
            {
                return(false);
            }

            RecommenderDataset testDataset = RecommenderDataset.Load(testDatasetFile);
            int minRating = Mappings.StarRatingRecommender.GetRatingInfo(testDataset).MinStarRating;

            IDictionary <User, IEnumerable <Item> > recommendedItems = RecommenderPersistenceUtils.LoadRecommendedItems(predictionsFile);

            var evaluatorMapping = Mappings.StarRatingRecommender.ForEvaluation();
            var evaluator        = new StarRatingRecommenderEvaluator <RecommenderDataset, User, Item, int>(evaluatorMapping);

            using (var writer = new StreamWriter(reportFile))
            {
                writer.WriteLine(
                    "NDCG: {0:0.000}",
                    evaluator.ItemRecommendationMetric(
                        testDataset,
                        recommendedItems,
                        Metrics.Ndcg,
                        rating => Convert.ToDouble(rating) - minRating + 1));
            }

            return(true);
        }
Пример #7
0
        /// <summary>
        /// Runs the module.
        /// </summary>
        /// <param name="args">The command line arguments for the module.</param>
        /// <param name="usagePrefix">The prefix to print before the usage string.</param>
        /// <returns>True if the run was successful, false otherwise.</returns>
        public override bool Run(string[] args, string usagePrefix)
        {
            string testDatasetFile      = string.Empty;
            string predictionsFile      = string.Empty;
            string reportFile           = string.Empty;
            int    minCommonRatingCount = 5;

            var parser = new CommandLineParser();

            parser.RegisterParameterHandler("--test-data", "FILE", "Test dataset used to obtain ground truth", v => testDatasetFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--predictions", "FILE", "Predictions to evaluate", v => predictionsFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--report", "FILE", "Evaluation report file", v => reportFile            = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--min-common-items", "NUM", "Minimum number of users that the query item and the related item should have been rated by in common; defaults to 5", v => minCommonRatingCount = v, CommandLineParameterType.Optional);
            if (!parser.TryParse(args, usagePrefix))
            {
                return(false);
            }

            RecommenderDataset testDataset = RecommenderDataset.Load(testDatasetFile);
            IDictionary <Item, IEnumerable <Item> > relatedItems = RecommenderPersistenceUtils.LoadRelatedItems(predictionsFile);

            var evaluatorMapping = Mappings.StarRatingRecommender.ForEvaluation();
            var evaluator        = new StarRatingRecommenderEvaluator <RecommenderDataset, User, Item, int>(evaluatorMapping);

            using (var writer = new StreamWriter(reportFile))
            {
                writer.WriteLine(
                    "L1 Sim NDCG: {0:0.000}",
                    evaluator.RelatedItemsMetric(testDataset, relatedItems, minCommonRatingCount, Metrics.Ndcg, Metrics.NormalizedManhattanSimilarity));
                writer.WriteLine(
                    "L2 Sim NDCG: {0:0.000}",
                    evaluator.RelatedItemsMetric(testDataset, relatedItems, minCommonRatingCount, Metrics.Ndcg, Metrics.NormalizedEuclideanSimilarity));
            }

            return(true);
        }
Пример #8
0
        /// <summary>
        /// Loads dataset from a given file.
        /// <para>
        /// Data file format:
        /// Row starting with 'R' describes min and max ratings and has form 'R,Min,Max'.
        /// Rows starting with 'U' describe a single user and have form 'U,UserId,UserFeatures'.
        /// Rows starting with 'I' describe a single item and have form 'I,ItemId,ItemFeatures'.
        /// Rows other than that describe instances and should have form 'UserID,ItemID,Rating'.
        /// Feature description has form 'FeatureIndex1:Value1|FeatureIndex2:Value2|...'
        /// If all the user features are zero or there are no user features in the dataset at all, the user description can be omitted. Same is true for items.
        /// </para>
        /// </summary>
        /// <param name="fileName">File to load data from.</param>
        /// <returns>The loaded dataset.</returns>
        public static RecommenderDataset Load(string fileName)
        {
            var rawObservations = new List <Tuple <string, string, int> >();
            var userIdToFeatures = new Dictionary <string, Vector>();
            var itemIdToFeatures = new Dictionary <string, Vector>();
            int?minRating = null, maxRating = null;
            int userFeatureCount = 0, itemFeatureCount = 0;

            var parsingContext = new FileParsingContext(fileName);

            using (var reader = new StreamReader(fileName))
            {
                string line;
                bool   isFirstRecord = true;
                while ((line = reader.ReadLine()) != null)
                {
                    parsingContext.NextLine(line);
                    if (line.Length == 0 || line.StartsWith("#"))
                    {
                        continue; // Skip comments and empty lines
                    }

                    string[] splits = line.Split(',');

                    if (isFirstRecord)
                    {
                        //// Parse rating record

                        int minRatingValue = 0, maxRatingValue = 0;
                        if (splits.Length != 3 ||
                            splits[0].Trim() != "R" ||
                            !int.TryParse(splits[1], out minRatingValue) ||
                            !int.TryParse(splits[2], out maxRatingValue))
                        {
                            parsingContext.RaiseError("Invalid rating info record.");
                        }

                        minRating     = minRatingValue;
                        maxRating     = maxRatingValue;
                        isFirstRecord = false;
                    }
                    else if (splits[0].Trim() == "U")
                    {
                        //// Parse user record

                        if (splits.Length != 3)
                        {
                            parsingContext.RaiseError("Invalid user record.");
                        }

                        string userId = splits[1].Trim();
                        if (userIdToFeatures.ContainsKey(userId))
                        {
                            parsingContext.RaiseError("Record describing user '{0}' is presented more than once.", userId);
                        }

                        Vector features = ParseFeatures(splits[2], parsingContext, ref userFeatureCount);
                        userIdToFeatures.Add(userId, features);
                    }
                    else if (splits[0].Trim() == "I")
                    {
                        //// Parse item record

                        if (splits.Length != 3)
                        {
                            parsingContext.RaiseError("Invalid item record.");
                        }

                        string itemId = splits[1].Trim();
                        if (itemIdToFeatures.ContainsKey(itemId))
                        {
                            parsingContext.RaiseError("Record describing item '{0}' is presented more than once.", itemId);
                        }

                        Vector features = ParseFeatures(splits[2], parsingContext, ref itemFeatureCount);
                        itemIdToFeatures.Add(itemId, features);
                    }
                    else
                    {
                        //// Parse instance record

                        string userId = splits[0].Trim();
                        string itemId = splits[1].Trim();
                        int    rating = 0;
                        if (splits.Length != 3 || !int.TryParse(splits[2], out rating))
                        {
                            parsingContext.RaiseError("Invalid instance record.", line);
                        }

                        rawObservations.Add(Tuple.Create(userId, itemId, rating));
                    }
                }
            }

            if (!minRating.HasValue)
            {
                parsingContext.RaiseGlobalError("Rating info is missing.");
            }

            var result = new RecommenderDataset {
                StarRatingInfo = new StarRatingInfo(minRating.Value, maxRating.Value)
            };

            foreach (var observation in rawObservations)
            {
                string userId = observation.Item1;
                string itemId = observation.Item2;
                int    rating = observation.Item3;

                if (rating < minRating.Value || rating > maxRating.Value)
                {
                    parsingContext.RaiseGlobalError("One of the ratings is inconsistent with the specified rating info.");
                }

                User user = RetrieveEntity(userId, result.idToUser, userIdToFeatures, userFeatureCount, (id, features) => new User(id, features));
                Item item = RetrieveEntity(itemId, result.idToItem, itemIdToFeatures, itemFeatureCount, (id, features) => new Item(id, features));
                result.observations.Add(new RatedUserItem(user, item, rating));
            }

            return(result);
        }