/// <summary>
        /// Runs the module.
        /// </summary>
        /// <param name="args">The command line arguments for the module.</param>
        /// <param name="usagePrefix">The prefix to print before the usage string.</param>
        /// <returns>True if the run was successful, false otherwise.</returns>
        public override bool Run(string[] args, string usagePrefix)
        {
            string inputDatasetFile  = string.Empty;
            string outputDatasetFile = string.Empty;

            var parser = new CommandLineParser();

            parser.RegisterParameterHandler("--input-data", "FILE", "Input dataset, treated as if all the ratings are positive", v => inputDatasetFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--output-data", "FILE", "Output dataset with both posisitve and negative data", v => outputDatasetFile    = v, CommandLineParameterType.Required);

            if (!parser.TryParse(args, usagePrefix))
            {
                return(false);
            }

            var generatorMapping = Mappings.StarRatingRecommender.WithGeneratedNegativeData();

            var inputDataset  = RecommenderDataset.Load(inputDatasetFile);
            var outputDataset = new RecommenderDataset(
                generatorMapping.GetInstances(inputDataset).Select(i => new RatedUserItem(i.User, i.Item, i.Rating)),
                generatorMapping.GetRatingInfo(inputDataset));

            outputDataset.Save(outputDatasetFile);

            return(true);
        }
        /// <summary>
        /// Runs the module.
        /// </summary>
        /// <param name="args">The command line arguments for the module.</param>
        /// <param name="usagePrefix">The prefix to print before the usage string.</param>
        /// <returns>True if the run was successful, false otherwise.</returns>
        public override bool Run(string[] args, string usagePrefix)
        {
            string datasetFile      = string.Empty;
            string trainedModelFile = string.Empty;
            string predictionsFile  = string.Empty;

            var parser = new CommandLineParser();

            parser.RegisterParameterHandler("--data", "FILE", "Dataset to make predictions for", v => datasetFile            = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--model", "FILE", "File with trained model", v => trainedModelFile              = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--predictions", "FILE", "File with generated predictions", v => predictionsFile = v, CommandLineParameterType.Required);
            if (!parser.TryParse(args, usagePrefix))
            {
                return(false);
            }

            RecommenderDataset testDataset = RecommenderDataset.Load(datasetFile);

            var trainedModel = MatchboxRecommender.Load <RecommenderDataset, User, Item, RatingDistribution, DummyFeatureSource>(trainedModelFile);
            IDictionary <User, IDictionary <Item, int> > predictions = trainedModel.Predict(testDataset);

            RecommenderPersistenceUtils.SavePredictedRatings(predictionsFile, predictions);

            return(true);
        }
        /// <summary>
        /// Runs the module.
        /// </summary>
        /// <param name="args">The command line arguments for the module.</param>
        /// <param name="usagePrefix">The prefix to print before the usage string.</param>
        /// <returns>True if the run was successful, false otherwise.</returns>
        public override bool Run(string[] args, string usagePrefix)
        {
            string datasetFile            = string.Empty;
            string trainedModelFile       = string.Empty;
            string predictionsFile        = string.Empty;
            int    maxRelatedItemCount    = 5;
            int    minCommonRatingCount   = 5;
            int    minRelatedItemPoolSize = 5;

            var parser = new CommandLineParser();

            parser.RegisterParameterHandler("--data", "FILE", "Dataset to make predictions for", v => datasetFile            = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--model", "FILE", "File with trained model", v => trainedModelFile              = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--predictions", "FILE", "File with generated predictions", v => predictionsFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--max-items", "NUM", "Maximum number of related items for a single item; defaults to 5", v => maxRelatedItemCount = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--min-common-users", "NUM", "Minimum number of users that the query item and the related item should have been rated by in common; defaults to 5", v => minCommonRatingCount = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--min-pool-size", "NUM", "Minimum size of the related item pool for a single item; defaults to 5", v => minRelatedItemPoolSize = v, CommandLineParameterType.Optional);
            if (!parser.TryParse(args, usagePrefix))
            {
                return(false);
            }

            RecommenderDataset testDataset = RecommenderDataset.Load(datasetFile);

            var trainedModel = MatchboxRecommender.Load <RecommenderDataset, User, Item, RatingDistribution, DummyFeatureSource>(trainedModelFile);
            var evaluator    = new RecommenderEvaluator <RecommenderDataset, User, Item, int, int, RatingDistribution>(
                Mappings.StarRatingRecommender.ForEvaluation());
            IDictionary <Item, IEnumerable <Item> > relatedItems = evaluator.FindRelatedItemsRatedBySameUsers(
                trainedModel, testDataset, maxRelatedItemCount, minCommonRatingCount, minRelatedItemPoolSize);

            RecommenderPersistenceUtils.SaveRelatedItems(predictionsFile, relatedItems);

            return(true);
        }
        /// <summary>
        /// Runs the module.
        /// </summary>
        /// <param name="args">The command line arguments for the module.</param>
        /// <param name="usagePrefix">The prefix to print before the usage string.</param>
        /// <returns>True if the run was successful, false otherwise.</returns>
        public override bool Run(string[] args, string usagePrefix)
        {
            string testDatasetFile = string.Empty;
            string predictionsFile = string.Empty;
            string reportFile      = string.Empty;

            var parser = new CommandLineParser();

            parser.RegisterParameterHandler("--test-data", "FILE", "Test dataset used to obtain ground truth", v => testDatasetFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--predictions", "FILE", "Predictions to evaluate", v => predictionsFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--report", "FILE", "Evaluation report file", v => reportFile            = v, CommandLineParameterType.Required);
            if (!parser.TryParse(args, usagePrefix))
            {
                return(false);
            }

            RecommenderDataset testDataset = RecommenderDataset.Load(testDatasetFile);
            IDictionary <User, IDictionary <Item, int> > ratingPredictions = RecommenderPersistenceUtils.LoadPredictedRatings(predictionsFile);

            var evaluatorMapping = Mappings.StarRatingRecommender.ForEvaluation();
            var evaluator        = new StarRatingRecommenderEvaluator <RecommenderDataset, User, Item, int>(evaluatorMapping);

            using (var writer = new StreamWriter(reportFile))
            {
                writer.WriteLine(
                    "Mean absolute error: {0:0.000}",
                    evaluator.RatingPredictionMetric(testDataset, ratingPredictions, Metrics.AbsoluteError));
                writer.WriteLine(
                    "Root mean squared error: {0:0.000}",
                    Math.Sqrt(evaluator.RatingPredictionMetric(testDataset, ratingPredictions, Metrics.SquaredError)));
            }

            return(true);
        }
Esempio n. 5
0
        /// <summary>
        /// Runs the module.
        /// </summary>
        /// <param name="args">The command line arguments for the module.</param>
        /// <param name="usagePrefix">The prefix to print before the usage string.</param>
        /// <returns>True if the run was successful, false otherwise.</returns>
        public override bool Run(string[] args, string usagePrefix)
        {
            string inputDatasetFile               = string.Empty;
            string outputTrainingDatasetFile      = string.Empty;
            string outputTestDatasetFile          = string.Empty;
            double trainingOnlyUserFraction       = 0.5;
            double testUserRatingTrainingFraction = 0.25;
            double coldUserFraction               = 0;
            double coldItemFraction               = 0;
            double ignoredUserFraction            = 0;
            double ignoredItemFraction            = 0;
            bool   removeOccasionalColdItems      = false;

            var parser = new CommandLineParser();

            parser.RegisterParameterHandler("--input-data", "FILE", "Dataset to split", v => inputDatasetFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--output-data-train", "FILE", "Training part of the split dataset", v => outputTrainingDatasetFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--output-data-test", "FILE", "Test part of the split dataset", v => outputTestDatasetFile          = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--training-users", "NUM", "Fraction of training-only users; defaults to 0.5", (double v) => trainingOnlyUserFraction = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--test-user-training-ratings", "NUM", "Fraction of test user ratings for training; defaults to 0.25", (double v) => testUserRatingTrainingFraction = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--cold-users", "NUM", "Fraction of cold (test-only) users; defaults to 0", (double v) => coldUserFraction   = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--cold-items", "NUM", "Fraction of cold (test-only) items; defaults to 0", (double v) => coldItemFraction   = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--ignored-users", "NUM", "Fraction of ignored users; defaults to 0", (double v) => ignoredUserFraction      = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--ignored-items", "NUM", "Fraction of ignored items; defaults to 0", (double v) => ignoredItemFraction      = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--remove-occasional-cold-items", "Remove occasionally produced cold items", () => removeOccasionalColdItems = true);

            if (!parser.TryParse(args, usagePrefix))
            {
                return(false);
            }

            var splittingMapping = Mappings.StarRatingRecommender.SplitToTrainTest(
                trainingOnlyUserFraction,
                testUserRatingTrainingFraction,
                coldUserFraction,
                coldItemFraction,
                ignoredUserFraction,
                ignoredItemFraction,
                removeOccasionalColdItems);

            var inputDataset          = RecommenderDataset.Load(inputDatasetFile);
            var outputTrainingDataset = new RecommenderDataset(
                splittingMapping.GetInstances(SplitInstanceSource.Training(inputDataset)),
                inputDataset.StarRatingInfo);

            outputTrainingDataset.Save(outputTrainingDatasetFile);
            var outputTestDataset = new RecommenderDataset(
                splittingMapping.GetInstances(SplitInstanceSource.Test(inputDataset)),
                inputDataset.StarRatingInfo);

            outputTestDataset.Save(outputTestDatasetFile);

            return(true);
        }
        /// <summary>
        /// Generate dataset if it necessary and loads the dataset using the settings from this configuration element.
        /// </summary>
        /// <returns>The loaded dataset.</returns>
        public override RecommenderDataset Load()
        {
            if (!File.Exists(FileName))
            {
                Type t = Type.GetType(Generator);
                if (t == null)
                {
                    throw new InvalidOperationException($"{Generator} type is undefined");
                }
                IDatasetGenerator generator = (IDatasetGenerator)Activator.CreateInstance(t);
                generator.Generate(FileName);
            }

            return(RecommenderDataset.Load(this.FileName));
        }
Esempio n. 7
0
        /// <summary>
        /// Initializes a new instance of the <see cref="RecommenderRun"/> class.
        /// </summary>
        /// <param name="name">The name of the run.</param>
        /// <param name="dataset">The dataset to run the test on.</param>
        /// <param name="foldCount">The number of folds to split the dataset into.</param>
        /// <param name="splittingMappingFactory">The factory to create instances of the splitting mapping.</param>
        /// <param name="recommenderFactory">The factory to create instances of the recommender with the specified mapping.</param>
        /// <param name="tests">The test for the recommender.</param>
        public RecommenderRun(
            string name,
            RecommenderDataset dataset,
            int foldCount,
            Func <SplittingMapping> splittingMappingFactory,
            Func <SplittingMapping, Recommender> recommenderFactory,
            IEnumerable <RecommenderTest> tests)
        {
            Debug.Assert(!string.IsNullOrEmpty(name), "Test run name can not be null or empty.");
            Debug.Assert(dataset != null, "A valid dataset should be provided.");
            Debug.Assert(foldCount > 0, "A valid number of folds should be provided.");
            Debug.Assert(splittingMappingFactory != null, "A valid splitting mapping factory should be provided.");
            Debug.Assert(recommenderFactory != null, "A valid recommender factory should be provided.");
            Debug.Assert(tests != null, "A valid collection of recommender tests should be provided.");

            this.Name = name;
            this.RecommenderDataset      = dataset;
            this.FoldCount               = foldCount;
            this.SplittingMappingFactory = splittingMappingFactory;
            this.RecommenderFactory      = recommenderFactory;
            this.Tests = tests.ToList();
        }
        /// <summary>
        /// Runs the module.
        /// </summary>
        /// <param name="args">The command line arguments for the module.</param>
        /// <param name="usagePrefix">The prefix to print before the usage string.</param>
        /// <returns>True if the run was successful, false otherwise.</returns>
        public override bool Run(string[] args, string usagePrefix)
        {
            string testDatasetFile = string.Empty;
            string predictionsFile = string.Empty;
            string reportFile      = string.Empty;

            var parser = new CommandLineParser();

            parser.RegisterParameterHandler("--test-data", "FILE", "Test dataset used to obtain ground truth", v => testDatasetFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--predictions", "FILE", "Predictions to evaluate", v => predictionsFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--report", "FILE", "Evaluation report file", v => reportFile            = v, CommandLineParameterType.Required);
            if (!parser.TryParse(args, usagePrefix))
            {
                return(false);
            }

            RecommenderDataset testDataset = RecommenderDataset.Load(testDatasetFile);
            int minRating = Mappings.StarRatingRecommender.GetRatingInfo(testDataset).MinStarRating;

            IDictionary <User, IEnumerable <Item> > recommendedItems = RecommenderPersistenceUtils.LoadRecommendedItems(predictionsFile);

            var evaluatorMapping = Mappings.StarRatingRecommender.ForEvaluation();
            var evaluator        = new StarRatingRecommenderEvaluator <RecommenderDataset, User, Item, int>(evaluatorMapping);

            using (var writer = new StreamWriter(reportFile))
            {
                writer.WriteLine(
                    "NDCG: {0:0.000}",
                    evaluator.ItemRecommendationMetric(
                        testDataset,
                        recommendedItems,
                        Metrics.Ndcg,
                        rating => Convert.ToDouble(rating) - minRating + 1));
            }

            return(true);
        }
Esempio n. 9
0
        /// <summary>
        /// Runs the module.
        /// </summary>
        /// <param name="args">The command line arguments for the module.</param>
        /// <param name="usagePrefix">The prefix to print before the usage string.</param>
        /// <returns>True if the run was successful, false otherwise.</returns>
        public override bool Run(string[] args, string usagePrefix)
        {
            string testDatasetFile      = string.Empty;
            string predictionsFile      = string.Empty;
            string reportFile           = string.Empty;
            int    minCommonRatingCount = 5;

            var parser = new CommandLineParser();

            parser.RegisterParameterHandler("--test-data", "FILE", "Test dataset used to obtain ground truth", v => testDatasetFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--predictions", "FILE", "Predictions to evaluate", v => predictionsFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--report", "FILE", "Evaluation report file", v => reportFile            = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--min-common-items", "NUM", "Minimum number of users that the query item and the related item should have been rated by in common; defaults to 5", v => minCommonRatingCount = v, CommandLineParameterType.Optional);
            if (!parser.TryParse(args, usagePrefix))
            {
                return(false);
            }

            RecommenderDataset testDataset = RecommenderDataset.Load(testDatasetFile);
            IDictionary <Item, IEnumerable <Item> > relatedItems = RecommenderPersistenceUtils.LoadRelatedItems(predictionsFile);

            var evaluatorMapping = Mappings.StarRatingRecommender.ForEvaluation();
            var evaluator        = new StarRatingRecommenderEvaluator <RecommenderDataset, User, Item, int>(evaluatorMapping);

            using (var writer = new StreamWriter(reportFile))
            {
                writer.WriteLine(
                    "L1 Sim NDCG: {0:0.000}",
                    evaluator.RelatedItemsMetric(testDataset, relatedItems, minCommonRatingCount, Metrics.Ndcg, Metrics.NormalizedManhattanSimilarity));
                writer.WriteLine(
                    "L2 Sim NDCG: {0:0.000}",
                    evaluator.RelatedItemsMetric(testDataset, relatedItems, minCommonRatingCount, Metrics.Ndcg, Metrics.NormalizedEuclideanSimilarity));
            }

            return(true);
        }
Esempio n. 10
0
 /// <summary>
 /// Loads the dataset using the settings from this configuration element.
 /// </summary>
 /// <returns>The loaded dataset.</returns>
 public virtual RecommenderDataset Load()
 {
     return(RecommenderDataset.Load(this.FileName));
 }
        /// <summary>
        /// Loads dataset from a given file.
        /// <para>
        /// Data file format:
        /// Row starting with 'R' describes min and max ratings and has form 'R,Min,Max'.
        /// Rows starting with 'U' describe a single user and have form 'U,UserId,UserFeatures'.
        /// Rows starting with 'I' describe a single item and have form 'I,ItemId,ItemFeatures'.
        /// Rows other than that describe instances and should have form 'UserID,ItemID,Rating'.
        /// Feature description has form 'FeatureIndex1:Value1|FeatureIndex2:Value2|...'
        /// If all the user features are zero or there are no user features in the dataset at all, the user description can be omitted. Same is true for items.
        /// </para>
        /// </summary>
        /// <param name="fileName">File to load data from.</param>
        /// <returns>The loaded dataset.</returns>
        public static RecommenderDataset Load(string fileName)
        {
            var rawObservations = new List <Tuple <string, string, int> >();
            var userIdToFeatures = new Dictionary <string, Vector>();
            var itemIdToFeatures = new Dictionary <string, Vector>();
            int?minRating = null, maxRating = null;
            int userFeatureCount = 0, itemFeatureCount = 0;

            var parsingContext = new FileParsingContext(fileName);

            using (var reader = new StreamReader(fileName))
            {
                string line;
                bool   isFirstRecord = true;
                while ((line = reader.ReadLine()) != null)
                {
                    parsingContext.NextLine(line);
                    if (line.Length == 0 || line.StartsWith("#"))
                    {
                        continue; // Skip comments and empty lines
                    }

                    string[] splits = line.Split(',');

                    if (isFirstRecord)
                    {
                        //// Parse rating record

                        int minRatingValue = 0, maxRatingValue = 0;
                        if (splits.Length != 3 ||
                            splits[0].Trim() != "R" ||
                            !int.TryParse(splits[1], out minRatingValue) ||
                            !int.TryParse(splits[2], out maxRatingValue))
                        {
                            parsingContext.RaiseError("Invalid rating info record.");
                        }

                        minRating     = minRatingValue;
                        maxRating     = maxRatingValue;
                        isFirstRecord = false;
                    }
                    else if (splits[0].Trim() == "U")
                    {
                        //// Parse user record

                        if (splits.Length != 3)
                        {
                            parsingContext.RaiseError("Invalid user record.");
                        }

                        string userId = splits[1].Trim();
                        if (userIdToFeatures.ContainsKey(userId))
                        {
                            parsingContext.RaiseError("Record describing user '{0}' is presented more than once.", userId);
                        }

                        Vector features = ParseFeatures(splits[2], parsingContext, ref userFeatureCount);
                        userIdToFeatures.Add(userId, features);
                    }
                    else if (splits[0].Trim() == "I")
                    {
                        //// Parse item record

                        if (splits.Length != 3)
                        {
                            parsingContext.RaiseError("Invalid item record.");
                        }

                        string itemId = splits[1].Trim();
                        if (itemIdToFeatures.ContainsKey(itemId))
                        {
                            parsingContext.RaiseError("Record describing item '{0}' is presented more than once.", itemId);
                        }

                        Vector features = ParseFeatures(splits[2], parsingContext, ref itemFeatureCount);
                        itemIdToFeatures.Add(itemId, features);
                    }
                    else
                    {
                        //// Parse instance record

                        string userId = splits[0].Trim();
                        string itemId = splits[1].Trim();
                        int    rating = 0;
                        if (splits.Length != 3 || !int.TryParse(splits[2], out rating))
                        {
                            parsingContext.RaiseError("Invalid instance record.", line);
                        }

                        rawObservations.Add(Tuple.Create(userId, itemId, rating));
                    }
                }
            }

            if (!minRating.HasValue)
            {
                parsingContext.RaiseGlobalError("Rating info is missing.");
            }

            var result = new RecommenderDataset {
                StarRatingInfo = new StarRatingInfo(minRating.Value, maxRating.Value)
            };

            foreach (var observation in rawObservations)
            {
                string userId = observation.Item1;
                string itemId = observation.Item2;
                int    rating = observation.Item3;

                if (rating < minRating.Value || rating > maxRating.Value)
                {
                    parsingContext.RaiseGlobalError("One of the ratings is inconsistent with the specified rating info.");
                }

                User user = RetrieveEntity(userId, result.idToUser, userIdToFeatures, userFeatureCount, (id, features) => new User(id, features));
                Item item = RetrieveEntity(itemId, result.idToItem, itemIdToFeatures, itemFeatureCount, (id, features) => new Item(id, features));
                result.observations.Add(new RatedUserItem(user, item, rating));
            }

            return(result);
        }