Exemple #1
0
        /// <summary>Write item predictions (scores) to a TextWriter object</summary>
        /// <param name="recommender">the <see cref="IRecommender"/> to use for making the predictions</param>
        /// <param name="train">a user-wise <see cref="IPosOnlyFeedback"/> containing the items already observed</param>
        /// <param name="candidate_items">list of candidate items</param>
        /// <param name="num_predictions">number of items to return per user, -1 if there should be no limit</param>
        /// <param name="writer">the <see cref="TextWriter"/> to write to</param>
        /// <param name="users">a list of users to make recommendations for; if null, all users in train will be provided with recommendations</param>
        /// <param name="user_mapping">an <see cref="IMapping"/> object for the user IDs</param>
        /// <param name="item_mapping">an <see cref="IMapping"/> object for the item IDs</param>
        /// <param name="repeated_items">true if items that a user has already accessed shall also be predicted</param>
        static public void WritePredictions(
            this IRecommender recommender,
            IPosOnlyFeedback train,
            ICollection <int> candidate_items,
            int num_predictions,
            TextWriter writer,
            IList <int> users     = null,
            IMapping user_mapping = null, IMapping item_mapping = null,
            bool repeated_items   = false)
        {
            if (users == null)
            {
                users = new List <int>(train.AllUsers);
            }

            ICollection <int> ignore_items = new int[0];

            foreach (int user_id in users)
            {
                if (!repeated_items)
                {
                    ignore_items = train.UserMatrix[user_id];
                }
                WritePredictions(recommender, user_id, candidate_items, ignore_items, num_predictions, writer, user_mapping, item_mapping);
            }
        }
        /// <summary>Create a simple split of rating prediction data</summary>
        /// <param name="feedback">the dataset</param>
        /// <param name="ratio">the ratio of ratings to use for validation</param>
        public PosOnlyFeedbackSimpleSplit(IPosOnlyFeedback feedback, double ratio)
        {
            if (ratio <= 0)
            {
                throw new ArgumentException();
            }

            // create train/test data structures
            var train = new T();
            var test  = new T();

            // assign indices to training or validation part
            Random random = new Random();

            foreach (int user_id in feedback.AllUsers)
            {
                foreach (int item_id in feedback.UserMatrix[user_id])
                {
                    if (random.NextDouble() < ratio)
                    {
                        test.Add(user_id, item_id);
                    }
                    else
                    {
                        train.Add(user_id, item_id);
                    }
                }
            }

            // create split data structures
            Train = new List <IPosOnlyFeedback>(NumberOfFolds);
            Test  = new List <IPosOnlyFeedback>(NumberOfFolds);
            Train.Add(train);
            Test.Add(test);
        }
        /// <summary>Create a simple split of positive-only item prediction data</summary>
        /// <param name="feedback">the dataset</param>
        /// <param name="ratio">the ratio of positive events to use for validation</param>
        public PosOnlyFeedbackSimpleSplit(IPosOnlyFeedback feedback, double ratio)
        {
            if (ratio <= 0)
            {
                throw new ArgumentException("ratio must be greater than 0");
            }

            // create train/test data structures
            var Train = new T();
            var Test  = new T();

            // assign indices to training or validation part
            Random random = MyMediaLite.Random.GetInstance();

            foreach (int index in feedback.RandomIndex)
            {
                if (random.NextDouble() < ratio)
                {
                    Test.Add(feedback.Users[index], feedback.Items[index]);
                }
                else
                {
                    Train.Add(feedback.Users[index], feedback.Items[index]);
                }
            }

            this.Train = new IPosOnlyFeedback[] { Train };
            this.Test  = new IPosOnlyFeedback[] { Test };
        }
Exemple #4
0
		/// <param name="candidate_items">a list of integers with all candidate items</param>
		/// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
		/// <param name="test">test cases</param>
		/// <param name="training">training data</param>
		static public IList<int> Candidates(
			IList<int> candidate_items,
			CandidateItems candidate_item_mode,
			IPosOnlyFeedback test,
			IPosOnlyFeedback training)
		{
			IList<int> test_items = (test != null) ? test.AllItems : new int[0];
			IList<int> result = null;

			switch (candidate_item_mode)
			{
				case CandidateItems.TRAINING:
					result = training.AllItems.ToArray();
					break;
				case CandidateItems.TEST:
					result = test.AllItems.ToArray();
					break;
				case CandidateItems.OVERLAP:
					result = test_items.Intersect(training.AllItems).ToList();
					break;
				case CandidateItems.UNION:
					result = test_items.Union(training.AllItems).ToList();
					break;
				case CandidateItems.EXPLICIT:
					if (candidate_items == null)
						throw new ArgumentNullException("candidate_items");
					result = candidate_items.ToArray();
					break;
				default:
					throw new ArgumentException("Unknown candidate_item_mode: " + candidate_item_mode.ToString());
			}

			result.Shuffle();
			return result;
		}
Exemple #5
0
        /// <summary>Display data statistics for item recommendation datasets</summary>
        /// <param name="training_data">the training dataset</param>
        /// <param name="test_data">the test dataset</param>
        /// <param name="user_attributes">the user attributes</param>
        /// <param name="item_attributes">the item attributes</param>
        public static string Statistics(
            this IPosOnlyFeedback training_data, IPosOnlyFeedback test_data = null,
            List <IBooleanMatrix> user_attributes = null, List <IBooleanMatrix> item_attributes = null)
        {
            // training data stats
            int    num_users   = training_data.AllUsers.Count;
            int    num_items   = training_data.AllItems.Count;
            long   matrix_size = (long)num_users * num_items;
            long   empty_size  = (long)matrix_size - training_data.Count;
            double sparsity    = (double)100L * empty_size / matrix_size;
            string s           = string.Format(CultureInfo.InvariantCulture, "training data: {0} users, {1} items, {2} events, sparsity {3,0:0.#####}\n", num_users, num_items, training_data.Count, sparsity);

            // test data stats
            if (test_data != null)
            {
                num_users   = test_data.AllUsers.Count;
                num_items   = test_data.AllItems.Count;
                matrix_size = (long)num_users * num_items;
                empty_size  = (long)matrix_size - test_data.Count;
                sparsity    = (double)100L * empty_size / matrix_size;               // TODO depends on the eval scheme whether this is correct
                s          += string.Format(CultureInfo.InvariantCulture, "test data:     {0} users, {1} items, {2} events, sparsity {3,0:0.#####}\n", num_users, num_items, test_data.Count, sparsity);
            }

            return(s + Statistics(user_attributes, item_attributes));
        }
Exemple #6
0
        /// <param name="candidate_items">a list of integers with all candidate items</param>
        /// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
        /// <param name="test">test cases</param>
        /// <param name="training">training data</param>
        public static IList<int> Candidates(
			IList<int> candidate_items,
			CandidateItems candidate_item_mode,
			IPosOnlyFeedback test,
			IPosOnlyFeedback training)
        {
            switch (candidate_item_mode)
            {
                case CandidateItems.TRAINING: return training.AllItems;
                case CandidateItems.TEST:     return test.AllItems;
                case CandidateItems.OVERLAP:
                    var result = test.AllItems.Intersect(training.AllItems).ToList();
                    result.Shuffle();
                    return result;
                case CandidateItems.UNION:
                    result = test.AllItems.Union(training.AllItems).ToList();
                    result.Shuffle();
                    return result;
                case CandidateItems.EXPLICIT:
                    if (candidate_items == null)
                        throw new ArgumentNullException("candidate_items");
                    return candidate_items;
                default:
                    throw new ArgumentException("Unknown candidate_item_mode: " + candidate_item_mode.ToString());
            }
        }
Exemple #7
0
        /// <param name="candidate_items">a list of integers with all candidate items</param>
        /// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
        /// <param name="test">test cases</param>
        /// <param name="training">training data</param>
        static public IList <int> Candidates(
            IList <int> candidate_items,
            CandidateItems candidate_item_mode,
            IPosOnlyFeedback test,
            IPosOnlyFeedback training)
        {
            switch (candidate_item_mode)
            {
            case CandidateItems.TRAINING: return(training.AllItems);

            case CandidateItems.TEST:     return(test.AllItems);

            case CandidateItems.OVERLAP:
                var result = test.AllItems.Intersect(training.AllItems).ToList();
                result.Shuffle();
                return(result);

            case CandidateItems.UNION:
                result = test.AllItems.Union(training.AllItems).ToList();
                result.Shuffle();
                return(result);

            case CandidateItems.EXPLICIT:
                if (candidate_items == null)
                {
                    throw new ArgumentNullException("candidate_items");
                }
                return(candidate_items);

            default:
                throw new ArgumentException("Unknown candidate_item_mode: " + candidate_item_mode.ToString());
            }
        }
Exemple #8
0
        /// <summary>Online evaluation for rankings of items</summary>
        /// <remarks>
        /// The evaluation protocol works as follows:
        /// For every test user, evaluate on the test items, and then add the those test items to the training set and perform an incremental update.
        /// The sequence of users is random.
        /// </remarks>
        /// <param name="recommender">the item recommender to be evaluated</param>
        /// <param name="test">test cases</param>
        /// <param name="training">training data (must be connected to the recommender's training data)</param>
        /// <param name="test_users">a list of all test user IDs</param>
        /// <param name="candidate_items">a list of all candidate item IDs</param>
        /// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
        /// <returns>a dictionary containing the evaluation results (averaged by user)</returns>
        public static ItemRecommendationEvaluationResults EvaluateOnline(
			this IRecommender recommender,
			IPosOnlyFeedback test, IPosOnlyFeedback training,
			IList<int> test_users, IList<int> candidate_items,
			CandidateItems candidate_item_mode)
        {
            var incremental_recommender = recommender as IIncrementalItemRecommender;
            if (incremental_recommender == null)
                throw new ArgumentException("recommender must be of type IIncrementalItemRecommender");

            // prepare candidate items once to avoid recreating them
            switch (candidate_item_mode)
            {
                case CandidateItems.TRAINING: candidate_items = training.AllItems; break;
                case CandidateItems.TEST:     candidate_items = test.AllItems; break;
                case CandidateItems.OVERLAP:  candidate_items = new List<int>(test.AllItems.Intersect(training.AllItems)); break;
                case CandidateItems.UNION:    candidate_items = new List<int>(test.AllItems.Union(training.AllItems)); break;
            }

            test_users.Shuffle();
            var results_by_user = new Dictionary<int, ItemRecommendationEvaluationResults>();
            foreach (int user_id in test_users)
            {
                if (candidate_items.Intersect(test.ByUser[user_id]).Count() == 0)
                    continue;

                // prepare data
                var current_test_data = new PosOnlyFeedback<SparseBooleanMatrix>();
                foreach (int index in test.ByUser[user_id])
                    current_test_data.Add(user_id, test.Items[index]);
                // evaluate user
                var current_result = Items.Evaluate(recommender, current_test_data, training, current_test_data.AllUsers, candidate_items, CandidateItems.EXPLICIT);
                results_by_user[user_id] = current_result;

                // update recommender
                var tuples = new List<Tuple<int, int>>();
                foreach (int index in test.ByUser[user_id])
                    tuples.Add(Tuple.Create(user_id, test.Items[index]));
                incremental_recommender.AddFeedback(tuples);
            }

            var results = new ItemRecommendationEvaluationResults();

            foreach (int u in results_by_user.Keys)
                foreach (string measure in Items.Measures)
                    results[measure] += results_by_user[u][measure];

            foreach (string measure in Items.Measures)
                results[measure] /= results_by_user.Count;

            results["num_users"] = results_by_user.Count;
            results["num_items"] = candidate_items.Count;
            results["num_lists"] = results_by_user.Count;

            return results;
        }
Exemple #9
0
        /// <summary>Create a k-fold split of positive-only item prediction data</summary>
        /// <remarks>See the class description for details.</remarks>
        /// <param name="feedback">the dataset</param>
        /// <param name="num_folds">the number of folds</param>
        public PosOnlyFeedbackCrossValidationSplit(IPosOnlyFeedback feedback, uint num_folds)
        {
            if (num_folds < 2)
            {
                throw new ArgumentException("num_folds must be at least 2.");
            }

            NumberOfFolds = num_folds;
            Train         = new IPosOnlyFeedback[num_folds];
            Test          = new IPosOnlyFeedback[num_folds];
            for (int f = 0; f < num_folds; f++)
            {
                Train[f] = new T();
                Test[f]  = new T();
            }

            // assign events to folds
            int pos = 0;

            foreach (int item_id in feedback.AllItems)
            {
                var item_indices = feedback.ByItem[item_id];

                if (item_indices.Count < num_folds)
                {
                    foreach (int index in item_indices)
                    {
                        for (int f = 0; f < num_folds; f++)
                        {
                            Train[f].Add(feedback.Users[index], feedback.Items[index]);
                        }
                    }
                }
                else
                {
                    item_indices.Shuffle();

                    foreach (int index in item_indices)
                    {
                        int user_id = feedback.Users[index];
                        for (int f = 0; f < num_folds; f++)
                        {
                            if (pos % num_folds == f)
                            {
                                Test[f].Add(user_id, item_id);
                            }
                            else
                            {
                                Train[f].Add(user_id, item_id);
                            }
                        }
                        pos++;
                    }
                }
            }
        }
Exemple #10
0
        /// <summary>Write item predictions (scores) to a file</summary>
        /// <param name="recommender">the <see cref="IRecommender"/> to use for making the predictions</param>
        /// <param name="train">a user-wise <see cref="IPosOnlyFeedback"/> containing the items already observed</param>
        /// <param name="candidate_items">list of candidate items</param>
        /// <param name="num_predictions">number of items to return per user, -1 if there should be no limit</param>
        /// <param name="filename">the name of the file to write to</param>
        /// <param name="users">a list of users to make recommendations for</param>
        /// <param name="user_mapping">an <see cref="IEntityMapping"/> object for the user IDs</param>
        /// <param name="item_mapping">an <see cref="IEntityMapping"/> object for the item IDs</param>
        public static void WritePredictions(
			this IRecommender recommender,
			IPosOnlyFeedback train,
			System.Collections.Generic.IList<int> candidate_items,
			int num_predictions,
			string filename,
			System.Collections.Generic.IList<int> users = null,
			IEntityMapping user_mapping = null, IEntityMapping item_mapping = null)
        {
            using (var writer = new StreamWriter(filename))
                WritePredictions(recommender, train, candidate_items, num_predictions, writer, users, user_mapping, item_mapping);
        }
Exemple #11
0
        /// <summary>Write item predictions (scores) for all users to a TextWriter object</summary>
        /// <param name="recommender">the <see cref="IRecommender"/> to use for making the predictions</param>
        /// <param name="train">a user-wise <see cref="IPosOnlyFeedback"/> containing the items already observed</param>
        /// <param name="relevant_items">the list of candidate items</param>
        /// <param name="num_predictions">the number of items to return per user, -1 if there should be no limit</param>
        /// <param name="user_mapping">an <see cref="IEntityMapping"/> object for the user IDs</param>
        /// <param name="item_mapping">an <see cref="IEntityMapping"/> object for the item IDs</param>
        /// <param name="writer">the <see cref="TextWriter"/> to write to</param>
        static public void WritePredictions(
            IRecommender recommender,
            IPosOnlyFeedback train,
            ICollection <int> relevant_items,
            int num_predictions,
            IEntityMapping user_mapping, IEntityMapping item_mapping,
            TextWriter writer)
        {
            var relevant_users = new List <int>(user_mapping.InternalIDs);

            WritePredictions(recommender, train, relevant_users, relevant_items, num_predictions, user_mapping, item_mapping, writer);
        }
Exemple #12
0
        /// <summary>Write item predictions (scores) to a file</summary>
        /// <param name="recommender">the <see cref="IRecommender"/> to use for making the predictions</param>
        /// <param name="train">a user-wise <see cref="IPosOnlyFeedback"/> containing the items already observed</param>
        /// <param name="candidate_items">list of candidate items</param>
        /// <param name="num_predictions">number of items to return per user, -1 if there should be no limit</param>
        /// <param name="filename">the name of the file to write to</param>
        /// <param name="users">a list of users to make recommendations for</param>
        /// <param name="user_mapping">an <see cref="IMapping"/> object for the user IDs</param>
        /// <param name="item_mapping">an <see cref="IMapping"/> object for the item IDs</param>
        /// <param name="repeated_items">true if items that a user has already accessed shall also be predicted</param>
        public static void WritePredictions(
			this IRecommender recommender,
			IPosOnlyFeedback train,
			IList<int> candidate_items,
			int num_predictions,
			string filename,
			IList<int> users = null,
			IMapping user_mapping = null, IMapping item_mapping = null,
			bool repeated_items = false)
        {
            using (var writer = new StreamWriter(filename))
                WritePredictions(recommender, train, candidate_items, num_predictions, writer, users, user_mapping, item_mapping, repeated_items);
        }
Exemple #13
0
 /// <summary>Write item predictions (scores) to a file</summary>
 /// <param name="recommender">the <see cref="IRecommender"/> to use for making the predictions</param>
 /// <param name="train">a user-wise <see cref="IPosOnlyFeedback"/> containing the items already observed</param>
 /// <param name="candidate_items">list of candidate items</param>
 /// <param name="num_predictions">number of items to return per user, -1 if there should be no limit</param>
 /// <param name="filename">the name of the file to write to</param>
 /// <param name="users">a list of users to make recommendations for</param>
 /// <param name="user_mapping">an <see cref="IMapping"/> object for the user IDs</param>
 /// <param name="item_mapping">an <see cref="IMapping"/> object for the item IDs</param>
 /// <param name="repeated_items">true if items that a user has already accessed shall also be predicted</param>
 static public void WritePredictions(
     this IRecommender recommender,
     IPosOnlyFeedback train,
     IList <int> candidate_items,
     int num_predictions,
     string filename,
     IList <int> users     = null,
     IMapping user_mapping = null, IMapping item_mapping = null,
     bool repeated_items   = false)
 {
     using (var writer = FileSystem.CreateStreamWriter(filename))
         WritePredictions(recommender, train, candidate_items, num_predictions, writer, users, user_mapping, item_mapping, repeated_items);
 }
Exemple #14
0
		// TODO generalize more to save code ...
		// TODO generalize that normal protocol is just an instance of this? Only if w/o performance penalty ...

		/// <summary>For a given user and the test dataset, return a dictionary of items filtered by attributes</summary>
		/// <param name="user_id">the user ID</param>
		/// <param name="test">the test dataset</param>
		/// <param name="item_attributes"></param>
		/// <returns>a dictionary containing a mapping from attribute IDs to collections of item IDs</returns>
		static public Dictionary<int, ICollection<int>> GetFilteredItems(int user_id, IPosOnlyFeedback test,
		                                                                 SparseBooleanMatrix item_attributes)
		{
			var filtered_items = new Dictionary<int, ICollection<int>>();

			foreach (int item_id in test.UserMatrix[user_id])
				foreach (int attribute_id in item_attributes[item_id])
					if (filtered_items.ContainsKey(attribute_id))
						filtered_items[attribute_id].Add(item_id);
					else
						filtered_items[attribute_id] = new HashSet<int>() { item_id };

			return filtered_items;
		}
Exemple #15
0
 /// <summary>Write item predictions (scores) to a TextWriter object</summary>
 /// <param name="recommender">the <see cref="IRecommender"/> to use for making the predictions</param>
 /// <param name="train">a user-wise <see cref="IPosOnlyFeedback"/> containing the items already observed</param>
 /// <param name="relevant_users">a list of users to make recommendations for</param>
 /// <param name="relevant_items">the list of candidate items</param>
 /// <param name="num_predictions">the number of items to return per user, -1 if there should be no limit</param>
 /// <param name="user_mapping">an <see cref="IEntityMapping"/> object for the user IDs</param>
 /// <param name="item_mapping">an <see cref="IEntityMapping"/> object for the item IDs</param>
 /// <param name="writer">the <see cref="TextWriter"/> to write to</param>
 static public void WritePredictions(
     IRecommender recommender,
     IPosOnlyFeedback train,
     IList <int> relevant_users,
     ICollection <int> relevant_items,
     int num_predictions,
     IEntityMapping user_mapping, IEntityMapping item_mapping,
     TextWriter writer)
 {
     foreach (int user_id in relevant_users)
     {
         var ignore_items = train.UserMatrix[user_id];
         WritePredictions(recommender, user_id, relevant_items, ignore_items, num_predictions, user_mapping, item_mapping, writer);
     }
 }
Exemple #16
0
        /// <summary>Write item predictions (scores) to a TextWriter object</summary>
        /// <param name="recommender">the <see cref="IRecommender"/> to use for making the predictions</param>
        /// <param name="train">a user-wise <see cref="IPosOnlyFeedback"/> containing the items already observed</param>
        /// <param name="candidate_items">list of candidate items</param>
        /// <param name="num_predictions">number of items to return per user, -1 if there should be no limit</param>
        /// <param name="writer">the <see cref="TextWriter"/> to write to</param>
        /// <param name="users">a list of users to make recommendations for; if null, all users in train will be provided with recommendations</param>
        /// <param name="user_mapping">an <see cref="IEntityMapping"/> object for the user IDs</param>
        /// <param name="item_mapping">an <see cref="IEntityMapping"/> object for the item IDs</param>
        public static void WritePredictions(
			this IRecommender recommender,
			IPosOnlyFeedback train,
			System.Collections.Generic.IList<int> candidate_items,
			int num_predictions,
			TextWriter writer,
			System.Collections.Generic.IList<int> users = null,
			IEntityMapping user_mapping = null, IEntityMapping item_mapping = null)
        {
            if (users == null)
                users = new List<int>(train.AllUsers);

            foreach (int user_id in users)
            {
                var ignore_items = train.UserMatrix[user_id];
                WritePredictions(recommender, user_id, candidate_items, ignore_items, num_predictions, writer, user_mapping, item_mapping);
            }
        }
Exemple #17
0
        // TODO there are too many different versions of this method interface - we should simplify the API

        /// <summary>Write item predictions (scores) for all users to a file</summary>
        /// <param name="recommender">the <see cref="IRecommender"/> to use for making the predictions</param>
        /// <param name="train">a user-wise <see cref="IPosOnlyFeedback"/> containing the items already observed</param>
        /// <param name="relevant_items">the list of candidate items</param>
        /// <param name="num_predictions">the number of items to return per user, -1 if there should be no limit</param>
        /// <param name="user_mapping">an <see cref="IEntityMapping"/> object for the user IDs</param>
        /// <param name="item_mapping">an <see cref="IEntityMapping"/> object for the item IDs</param>
        /// <param name="filename">the name of the file to write to</param>
        static public void WritePredictions(
            IRecommender recommender,
            IPosOnlyFeedback train,
            ICollection <int> relevant_items,
            int num_predictions,
            IEntityMapping user_mapping, IEntityMapping item_mapping,
            string filename)
        {
            if (filename.Equals("-"))
            {
                WritePredictions(recommender, train, relevant_items, num_predictions, user_mapping, item_mapping, Console.Out);
            }
            else
            {
                using (var writer = new StreamWriter(filename))
                    WritePredictions(recommender, train, relevant_items, num_predictions, user_mapping, item_mapping, writer);
            }
        }
    static TimeSpan EvaluateRecommender(BPRMF_Mapping recommender, IPosOnlyFeedback test_data, IPosOnlyFeedback train_data)
    {
        Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "fit {0}", recommender.ComputeFit()));

        TimeSpan seconds = Utils.MeasureTime(delegate()
        {
            var result = Items.Evaluate(
                recommender,
                test_data,
                train_data,
                test_data.AllUsers,
                relevant_items
                );
            DisplayResults(result);
        });

        Console.Write(" testing " + seconds);

        return(seconds);
    }
Exemple #19
0
		public void SetUp()
		{
			training_data = new PosOnlyFeedback<SparseBooleanMatrix>();
			training_data.Add(1, 1);
			training_data.Add(1, 2);
			training_data.Add(2, 2);
			training_data.Add(2, 3);
			training_data.Add(3, 1);
			training_data.Add(3, 2);

			recommender = new MostPopular() { Feedback = training_data };
			recommender.Train();

			test_data = new PosOnlyFeedback<SparseBooleanMatrix>();
			test_data.Add(2, 3);
			test_data.Add(2, 4);
			test_data.Add(4, 4);

			all_users = Enumerable.Range(1, 4).ToList();
			candidate_items = Enumerable.Range(1, 5).ToList();
		}
Exemple #20
0
        /// <summary>Write item predictions (scores) to a TextWriter object</summary>
        /// <param name="recommender">the <see cref="IRecommender"/> to use for making the predictions</param>
        /// <param name="train">a user-wise <see cref="IPosOnlyFeedback"/> containing the items already observed</param>
        /// <param name="candidate_items">list of candidate items</param>
        /// <param name="num_predictions">number of items to return per user, -1 if there should be no limit</param>
        /// <param name="writer">the <see cref="TextWriter"/> to write to</param>
        /// <param name="users">a list of users to make recommendations for; if null, all users in train will be provided with recommendations</param>
        /// <param name="user_mapping">an <see cref="IMapping"/> object for the user IDs</param>
        /// <param name="item_mapping">an <see cref="IMapping"/> object for the item IDs</param>
        /// <param name="repeated_items">true if items that a user has already accessed shall also be predicted</param>
        public static void WritePredictions(
			this IRecommender recommender,
			IPosOnlyFeedback train,
			ICollection<int> candidate_items,
			int num_predictions,
			TextWriter writer,
			IList<int> users = null,
			IMapping user_mapping = null, IMapping item_mapping = null,
			bool repeated_items = false)
        {
            if (users == null)
                users = new List<int>(train.AllUsers);

            ICollection<int> ignore_items = new int[0];
            foreach (int user_id in users)
            {
                if (!repeated_items)
                    ignore_items = train.UserMatrix[user_id];
                WritePredictions(recommender, user_id, candidate_items, ignore_items, num_predictions, writer, user_mapping, item_mapping);
            }
        }
Exemple #21
0
        /// <summary>Display data statistics for item recommendation datasets</summary>
        /// <param name="training_data">the training dataset</param>
        /// <param name="test_data">the test dataset</param>
        /// <param name="recommender">the recommender that will be used</param>
        public static void DisplayDataStats(IPosOnlyFeedback training_data, IPosOnlyFeedback test_data, IItemRecommender recommender)
        {
            // training data stats
            int    num_users   = training_data.AllUsers.Count;
            int    num_items   = training_data.AllItems.Count;
            long   matrix_size = (long)num_users * num_items;
            long   empty_size  = (long)matrix_size - training_data.Count;
            double sparsity    = (double)100L * empty_size / matrix_size;

            Console.WriteLine(string.Format(CultureInfo.InvariantCulture, "training data: {0} users, {1} items, {2} events, sparsity {3,0:0.#####}", num_users, num_items, training_data.Count, sparsity));

            // test data stats
            if (test_data != null)
            {
                num_users   = test_data.AllUsers.Count;
                num_items   = test_data.AllItems.Count;
                matrix_size = (long)num_users * num_items;
                empty_size  = (long)matrix_size - test_data.Count;
                sparsity    = (double)100L * empty_size / matrix_size;
                Console.WriteLine(string.Format(CultureInfo.InvariantCulture, "test data:     {0} users, {1} items, {2} events, sparsity {3,0:0.#####}", num_users, num_items, test_data.Count, sparsity));
            }

            // attribute stats
            if (recommender is IUserAttributeAwareRecommender)
            {
                Console.WriteLine("{0} user attributes for {1} users",
                                  ((IUserAttributeAwareRecommender)recommender).NumUserAttributes,
                                  ((IUserAttributeAwareRecommender)recommender).UserAttributes.NumberOfRows);
            }
            if (recommender is IItemAttributeAwareRecommender)
            {
                Console.WriteLine("{0} item attributes for {1} items",
                                  ((IItemAttributeAwareRecommender)recommender).NumItemAttributes,
                                  ((IItemAttributeAwareRecommender)recommender).ItemAttributes.NumberOfRows);
            }
        }
        public void SetUp()
        {
            training_data = new PosOnlyFeedback <SparseBooleanMatrix>();
            training_data.Add(1, 1);
            training_data.Add(1, 2);
            training_data.Add(2, 2);
            training_data.Add(2, 3);
            training_data.Add(3, 1);
            training_data.Add(3, 2);

            recommender = new MostPopular()
            {
                Feedback = training_data
            };
            recommender.Train();

            test_data = new PosOnlyFeedback <SparseBooleanMatrix>();
            test_data.Add(2, 3);
            test_data.Add(2, 4);
            test_data.Add(4, 4);

            all_users       = Enumerable.Range(1, 4).ToList();
            candidate_items = Enumerable.Range(1, 5).ToList();
        }
    public static void Main(string[] args)
    {
        AppDomain.CurrentDomain.UnhandledException += new UnhandledExceptionEventHandler(Handlers.UnhandledExceptionHandler);

        // check number of command line parameters
        if (args.Length < 4)
            Usage("Not enough arguments.");

        // read command line parameters
        RecommenderParameters parameters = null;
        try	{ parameters = new RecommenderParameters(args, 4);	}
        catch (ArgumentException e)	{ Usage(e.Message); 		}

        // other parameters
        string data_dir             = parameters.GetRemoveString( "data_dir");
        string relevant_items_file  = parameters.GetRemoveString( "relevant_items");
        string item_attributes_file = parameters.GetRemoveString( "item_attributes");
        string user_attributes_file = parameters.GetRemoveString( "user_attributes");
        //string save_mapping_file    = parameters.GetRemoveString( "save_model");
        int random_seed             = parameters.GetRemoveInt32(  "random_seed", -1);
        bool no_eval                = parameters.GetRemoveBool(   "no_eval", false);
        bool compute_fit            = parameters.GetRemoveBool(   "compute_fit", false);

        if (random_seed != -1)
            MyMediaLite.Util.Random.InitInstance(random_seed);

        // main data files and method
        string trainfile = args[0].Equals("-") ? "-" : Path.Combine(data_dir, args[0]);
        string testfile  = args[1].Equals("-") ? "-" : Path.Combine(data_dir, args[1]);
        string load_model_file = args[2];
        string method    = args[3];

        // set correct recommender
        switch (method)
        {
            case "BPR-MF-ItemMapping":
                recommender = Recommender.Configure(bprmf_map, parameters, Usage);
                break;
            case "BPR-MF-ItemMapping-Optimal":
                recommender = Recommender.Configure(bprmf_map_bpr, parameters, Usage);
                break;
            case "BPR-MF-ItemMapping-Complex":
                recommender = Recommender.Configure(bprmf_map_com, parameters, Usage);
                break;
            case "BPR-MF-ItemMapping-kNN":
                recommender = Recommender.Configure(bprmf_map_knn, parameters, Usage);
                break;
            case "BPR-MF-ItemMapping-SVR":
                recommender = Recommender.Configure(bprmf_map_svr, parameters, Usage);
                break;
            case "BPR-MF-UserMapping":
                recommender = Recommender.Configure(bprmf_user_map, parameters, Usage);
                break;
            case "BPR-MF-UserMapping-Optimal":
                recommender = Recommender.Configure(bprmf_user_map_bpr, parameters, Usage);
                break;
            default:
                Usage(string.Format("Unknown method: '{0}'", method));
                break;
        }

        if (parameters.CheckForLeftovers())
            Usage(-1);

        // ID mapping objects
        var user_mapping = new EntityMapping();
        var item_mapping = new EntityMapping();

        // training data
        training_data = ItemRecommendation.Read(Path.Combine(data_dir, trainfile), user_mapping, item_mapping);
        recommender.Feedback = training_data;

        // relevant items
        if (! relevant_items_file.Equals(string.Empty) )
            relevant_items = new HashSet<int>(item_mapping.ToInternalID(Utils.ReadIntegers(Path.Combine(data_dir, relevant_items_file))));
        else
            relevant_items = training_data.AllItems;

        // user attributes
        if (recommender is IUserAttributeAwareRecommender)
        {
            if (user_attributes_file.Equals(string.Empty))
                Usage("Recommender expects user_attributes=FILE.");
            else
                ((IUserAttributeAwareRecommender)recommender).UserAttributes = AttributeData.Read(Path.Combine(data_dir, user_attributes_file), user_mapping);
        }

        // item attributes
        if (recommender is IItemAttributeAwareRecommender)
        {
            if (item_attributes_file.Equals(string.Empty))
                Usage("Recommender expects item_attributes=FILE.");
            else
                ((IItemAttributeAwareRecommender)recommender).ItemAttributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping);
        }

        // test data
        test_data = ItemRecommendation.Read( Path.Combine(data_dir, testfile), user_mapping, item_mapping );

        TimeSpan seconds;

        Recommender.LoadModel(recommender, load_model_file);

        // set the maximum user and item IDs in the recommender - this is important for the cold start use case
        recommender.MaxUserID = user_mapping.InternalIDs.Max();
        recommender.MaxItemID = item_mapping.InternalIDs.Max();

        DisplayDataStats();

        Console.Write(recommender.ToString() + " ");

        if (compute_fit)
        {
            seconds = Utils.MeasureTime( delegate() {
                int num_iter = recommender.NumIterMapping;
                recommender.NumIterMapping = 0;
                recommender.LearnAttributeToFactorMapping();
                Console.Error.WriteLine();
                Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "iteration {0} fit {1}", -1, recommender.ComputeFit()));

                recommender.NumIterMapping = 1;
                for (int i = 0; i < num_iter; i++, i++)
                {
                    recommender.IterateMapping();
                    Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "iteration {0} fit {1}", i, recommender.ComputeFit()));
                }
                recommender.NumIterMapping = num_iter; // restore
            } );
        }
        else
        {
            seconds = Utils.MeasureTime( delegate() {
                recommender.LearnAttributeToFactorMapping();
            } );
        }
        Console.Write("mapping_time " + seconds + " ");

        if (!no_eval)
            seconds = EvaluateRecommender(recommender, test_data, training_data);
        Console.WriteLine();
    }
Exemple #24
0
        /// <summary>Evaluation for rankings of items</summary>
        /// <remarks>
        /// User-item combinations that appear in both sets are ignored for the test set, and thus in the evaluation,
        /// except the boolean argument repeated_events is set.
        ///
        /// The evaluation measures are listed in the Measures property.
        /// Additionally, 'num_users' and 'num_items' report the number of users that were used to compute the results
        /// and the number of items that were taken into account.
        ///
        /// Literature:
        /// <list type="bullet">
        ///   <item><description>
        ///   C. Manning, P. Raghavan, H. Schütze: Introduction to Information Retrieval, Cambridge University Press, 2008
        ///   </description></item>
        /// </list>
        ///
        /// On multi-core/multi-processor systems, the routine tries to use as many cores as possible,
        /// which should to an almost linear speed-up.
        /// </remarks>
        /// <param name="recommender">item recommender</param>
        /// <param name="test">test cases</param>
        /// <param name="training">training data</param>
        /// <param name="test_users">a list of integers with all test users; if null, use all users in the test cases</param>
        /// <param name="candidate_items">a list of integers with all candidate items</param>
        /// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
        /// <param name="repeated_events">allow repeated events in the evaluation (i.e. items accessed by a user before may be in the recommended list)</param>
        /// <returns>a dictionary containing the evaluation results (default is false)</returns>
        public static ItemRecommendationEvaluationResults Evaluate(
            this IRecommender recommender,
            IPosOnlyFeedback test,
            IPosOnlyFeedback training,
            IList<int> test_users = null,
            IList<int> candidate_items = null,
            CandidateItems candidate_item_mode = CandidateItems.OVERLAP,
            bool repeated_events = false)
        {
            switch (candidate_item_mode)
            {
                case CandidateItems.TRAINING: candidate_items = training.AllItems; break;
                case CandidateItems.TEST: candidate_items = test.AllItems; break;
                case CandidateItems.OVERLAP: candidate_items = new List<int>(test.AllItems.Intersect(training.AllItems)); break;
                case CandidateItems.UNION: candidate_items = new List<int>(test.AllItems.Union(training.AllItems)); break;
            }
            if (candidate_items == null)
                throw new ArgumentNullException("candidate_items");
            if (test_users == null)
                test_users = test.AllUsers;

            int num_users = 0;
            var result = new ItemRecommendationEvaluationResults();

            // make sure that UserMatrix is completely initialized before entering parallel code
            var training_user_matrix = training.UserMatrix;
            var test_user_matrix = test.UserMatrix;

            Parallel.ForEach(test_users, user_id =>
            {
                try
                {
                    var correct_items = new HashSet<int>(test_user_matrix[user_id]);
                    correct_items.IntersectWith(candidate_items);

                    // the number of items that will be used for this user
                    var candidate_items_in_train = training_user_matrix[user_id] == null ? new HashSet<int>() : new HashSet<int>(training_user_matrix[user_id]);
                    candidate_items_in_train.IntersectWith(candidate_items);
                    int num_eval_items = candidate_items.Count - (repeated_events ? 0 : candidate_items_in_train.Count());

                    // skip all users that have 0 or #candidate_items test items
                    if (correct_items.Count == 0)
                        return;
                    if (num_eval_items == correct_items.Count)
                        return;

                    IList<int> prediction_list = recommender.PredictItems(user_id, candidate_items);
                    if (prediction_list.Count != candidate_items.Count)
                        throw new Exception("Not all items have been ranked.");

                    ICollection<int> ignore_items = (repeated_events || training_user_matrix[user_id] == null) ? new int[0] : training_user_matrix[user_id];

                    double auc = AUC.Compute(prediction_list, correct_items, ignore_items);
                    double map = PrecisionAndRecall.AP(prediction_list, correct_items, ignore_items);
                    double ndcg = NDCG.Compute(prediction_list, correct_items, ignore_items);
                    double rr = ReciprocalRank.Compute(prediction_list, correct_items, ignore_items);
                    var positions = new int[] { 3, 5, 10 };  // DH: added for p@3 & r@3
                    var prec = PrecisionAndRecall.PrecisionAt(prediction_list, correct_items, ignore_items, positions);
                    var recall = PrecisionAndRecall.RecallAt(prediction_list, correct_items, ignore_items, positions);

                    // thread-safe incrementing
                    lock (result)
                    {
                        num_users++;
                        result["AUC"] += (float)auc;
                        result["MAP"] += (float)map;
                        result["NDCG"] += (float)ndcg;
                        result["MRR"] += (float)rr;
                        result["prec@3"] += (float)prec[3];
                        result["prec@5"] += (float)prec[5];
                        result["prec@10"] += (float)prec[10];
                        result["recall@3"] += (float)recall[3];
                        result["recall@5"] += (float)recall[5];
                        result["recall@10"] += (float)recall[10];
                    }

                    if (num_users % 1000 == 0)
                        Console.Error.Write(".");
                    if (num_users % 60000 == 0)
                        Console.Error.WriteLine();
                }
                catch (Exception e)
                {
                    Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace);
                    Console.Error.WriteLine("===> ERROR: user_id=" + user_id);
                    Console.Error.WriteLine("===> ERROR: training_user_matrix[user_id]=" + training_user_matrix[user_id]);
                    throw e;
                }
            });

            foreach (string measure in Measures)
                result[measure] /= num_users;
            result["num_users"] = num_users;
            result["num_lists"] = num_users;
            result["num_items"] = candidate_items.Count;

            return result;
        }
Exemple #25
0
        /// <summary>Online evaluation for rankings of items</summary>
        /// <remarks>
        /// The evaluation protocol works as follows:
        /// For every test user, evaluate on the test items, and then add the those test items to the training set and perform an incremental update.
        /// The sequence of users is random.
        /// </remarks>
        /// <param name="recommender">the item recommender to be evaluated</param>
        /// <param name="test">test cases</param>
        /// <param name="training">training data (must be connected to the recommender's training data)</param>
        /// <param name="test_users">a list of all test user IDs</param>
        /// <param name="candidate_items">a list of all candidate item IDs</param>
        /// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
        /// <returns>a dictionary containing the evaluation results (averaged by user)</returns>
        static public ItemRecommendationEvaluationResults EvaluateOnline(
            this IRecommender recommender,
            IPosOnlyFeedback test, IPosOnlyFeedback training,
            IList <int> test_users, IList <int> candidate_items,
            CandidateItems candidate_item_mode)
        {
            var incremental_recommender = recommender as IIncrementalItemRecommender;

            if (incremental_recommender == null)
            {
                throw new ArgumentException("recommender must be of type IIncrementalItemRecommender");
            }

            candidate_items = Items.Candidates(candidate_items, candidate_item_mode, test, training);

            test_users.Shuffle();
            var results_by_user = new Dictionary <int, ItemRecommendationEvaluationResults>();

            foreach (int user_id in test_users)
            {
                if (candidate_items.Intersect(test.ByUser[user_id]).Count() == 0)
                {
                    continue;
                }

                // prepare data
                var current_test_data = new PosOnlyFeedback <SparseBooleanMatrix>();
                foreach (int index in test.ByUser[user_id])
                {
                    current_test_data.Add(user_id, test.Items[index]);
                }
                // evaluate user
                var current_result = Items.Evaluate(recommender, current_test_data, training, current_test_data.AllUsers, candidate_items, CandidateItems.EXPLICIT);
                results_by_user[user_id] = current_result;

                // update recommender
                var tuples = new List <Tuple <int, int> >();
                foreach (int index in test.ByUser[user_id])
                {
                    tuples.Add(Tuple.Create(user_id, test.Items[index]));
                }
                incremental_recommender.AddFeedback(tuples);
                // TODO candidate_items should be updated properly
            }

            var results = new ItemRecommendationEvaluationResults();

            foreach (int u in results_by_user.Keys)
            {
                foreach (string measure in Items.Measures)
                {
                    results[measure] += results_by_user[u][measure];
                }
            }

            foreach (string measure in Items.Measures)
            {
                results[measure] /= results_by_user.Count;
            }

            results["num_users"] = results_by_user.Count;
            results["num_items"] = candidate_items.Count;
            results["num_lists"] = results_by_user.Count;

            return(results);
        }
Exemple #26
0
        /// <summary>Evaluation for rankings of items recommended to groups</summary>
        /// <remarks>
        /// </remarks>
        /// <param name="recommender">group recommender</param>
        /// <param name="test">test cases</param>
        /// <param name="train">training data</param>
        /// <param name="group_to_user">group to user relation</param>
        /// <param name="candidate_items">a collection of integers with all candidate items</param>
        /// <param name="ignore_overlap">if true, ignore items that appear for a group in the training set when evaluating for that user</param>
        /// <returns>a dictionary containing the evaluation results</returns>
        public static ItemRecommendationEvaluationResults Evaluate(
			this GroupRecommender recommender,
			IPosOnlyFeedback test,
			IPosOnlyFeedback train,
			SparseBooleanMatrix group_to_user,
			ICollection<int> candidate_items,
			bool ignore_overlap = true)
        {
            var result = new ItemRecommendationEvaluationResults();

            int num_groups = 0;

            foreach (int group_id in group_to_user.NonEmptyRowIDs)
            {
                var users = group_to_user.GetEntriesByRow(group_id);

                var correct_items = new HashSet<int>();
                foreach (int user_id in users)
                    correct_items.UnionWith(test.UserMatrix[user_id]);
                correct_items.IntersectWith(candidate_items);

                var candidate_items_in_train = new HashSet<int>();
                foreach (int user_id in users)
                    candidate_items_in_train.UnionWith(train.UserMatrix[user_id]);
                candidate_items_in_train.IntersectWith(candidate_items);
                int num_eval_items = candidate_items.Count - (ignore_overlap ? candidate_items_in_train.Count() : 0);

                // skip all groups that have 0 or #candidate_items test items
                if (correct_items.Count == 0)
                    continue;
                if (num_eval_items - correct_items.Count == 0)
                    continue;

                IList<int> prediction_list = recommender.RankItems(users, candidate_items);
                if (prediction_list.Count != candidate_items.Count)
                    throw new Exception("Not all items have been ranked.");

                var ignore_items = ignore_overlap ? candidate_items_in_train : new HashSet<int>();

                double auc  = AUC.Compute(prediction_list, correct_items, ignore_items);
                double map  = PrecisionAndRecall.AP(prediction_list, correct_items, ignore_items);
                double ndcg = NDCG.Compute(prediction_list, correct_items, ignore_items);
                double rr   = ReciprocalRank.Compute(prediction_list, correct_items, ignore_items);
                var positions = new int[] { 5, 10 };
                var prec   = PrecisionAndRecall.PrecisionAt(prediction_list, correct_items, ignore_items, positions);
                var recall = PrecisionAndRecall.RecallAt(prediction_list, correct_items, ignore_items, positions);

                // thread-safe incrementing
                lock(result)
                {
                    num_groups++;
                    result["AUC"]       += (float) auc;
                    result["MAP"]       += (float) map;
                    result["NDCG"]      += (float) ndcg;
                    result["MRR"]       += (float) rr;
                    result["prec@5"]    += (float) prec[5];
                    result["prec@10"]   += (float) prec[10];
                    result["recall@5"]  += (float) recall[5];
                    result["recall@10"] += (float) recall[10];
                }

                if (num_groups % 1000 == 0)
                    Console.Error.Write(".");
                if (num_groups % 60000 == 0)
                    Console.Error.WriteLine();
            }

            result["num_groups"] = num_groups;
            result["num_lists"]  = num_groups;
            result["num_items"]  = candidate_items.Count;

            return result;
        }
Exemple #27
0
        /// <summary>Write item predictions (scores) to a TextWriter object</summary>
        /// <param name="recommender">the <see cref="IRecommender"/> to use for making the predictions</param>
        /// <param name="train">a user-wise <see cref="IPosOnlyFeedback"/> containing the items already observed</param>
        /// <param name="relevant_users">a list of users to make recommendations for</param>
        /// <param name="relevant_items">the list of candidate items</param>
        /// <param name="num_predictions">the number of items to return per user, -1 if there should be no limit</param>
        /// <param name="user_mapping">an <see cref="IEntityMapping"/> object for the user IDs</param>
        /// <param name="item_mapping">an <see cref="IEntityMapping"/> object for the item IDs</param>
        /// <param name="writer">the <see cref="TextWriter"/> to write to</param>
        public static void WritePredictions(
			IRecommender recommender,
			IPosOnlyFeedback train,
			IList<int> relevant_users,
			ICollection<int> relevant_items,
			int num_predictions,
			IEntityMapping user_mapping, IEntityMapping item_mapping,
			TextWriter writer)
        {
            foreach (int user_id in relevant_users)
            {
                var ignore_items = train.UserMatrix[user_id];
                WritePredictions(recommender, user_id, relevant_items, ignore_items, num_predictions, user_mapping, item_mapping, writer);
            }
        }
Exemple #28
0
        /// <summary>Write item predictions (scores) for all users to a TextWriter object</summary>
        /// <param name="recommender">the <see cref="IRecommender"/> to use for making the predictions</param>
        /// <param name="train">a user-wise <see cref="IPosOnlyFeedback"/> containing the items already observed</param>
        /// <param name="relevant_items">the list of candidate items</param>
        /// <param name="num_predictions">the number of items to return per user, -1 if there should be no limit</param>
        /// <param name="user_mapping">an <see cref="IEntityMapping"/> object for the user IDs</param>
        /// <param name="item_mapping">an <see cref="IEntityMapping"/> object for the item IDs</param>
        /// <param name="writer">the <see cref="TextWriter"/> to write to</param>
        public static void WritePredictions(
			IRecommender recommender,
			IPosOnlyFeedback train,
			ICollection<int> relevant_items,
			int num_predictions,
			IEntityMapping user_mapping, IEntityMapping item_mapping,
			TextWriter writer)
        {
            var relevant_users = new List<int>(user_mapping.InternalIDs);
            WritePredictions(recommender, train, relevant_users, relevant_items, num_predictions, user_mapping, item_mapping, writer);
        }
Exemple #29
0
        // TODO generalize more to save code ...
        // TODO generalize that normal protocol is just an instance of this? Only if w/o performance penalty ...
        /// <summary>For a given user and the test dataset, return a dictionary of items filtered by attributes</summary>
        /// <param name="user_id">the user ID</param>
        /// <param name="test">the test dataset</param>
        /// <param name="item_attributes"></param>
        /// <returns>a dictionary containing a mapping from attribute IDs to collections of item IDs</returns>
        public static Dictionary<int, ICollection<int>> GetFilteredItems(int user_id, IPosOnlyFeedback test,
		                                                                 SparseBooleanMatrix item_attributes)
        {
            var filtered_items = new Dictionary<int, ICollection<int>>();

            foreach (int item_id in test.UserMatrix[user_id])
                foreach (int attribute_id in item_attributes[item_id])
                    if (filtered_items.ContainsKey(attribute_id))
                        filtered_items[attribute_id].Add(item_id);
                    else
                        filtered_items[attribute_id] = new HashSet<int>() { item_id };

            return filtered_items;
        }
Exemple #30
0
        /// <summary>Evaluation for rankings of items</summary>
        /// <remarks>
        /// User-item combinations that appear in both sets are ignored for the test set, and thus in the evaluation.
        /// The evaluation measures are listed in the ItemPredictionMeasures property.
        /// Additionally, 'num_users' and 'num_items' report the number of users that were used to compute the results
        /// and the number of items that were taken into account.
        ///
        /// Literature:
        ///   C. Manning, P. Raghavan, H. Schütze: Introduction to Information Retrieval, Cambridge University Press, 2008
        /// </remarks>
        /// <param name="recommender">item recommender</param>
        /// <param name="test">test cases</param>
        /// <param name="train">training data</param>
        /// <param name="relevant_users">a collection of integers with all relevant users</param>
        /// <param name="relevant_items">a collection of integers with all relevant items</param>
        /// <returns>a dictionary containing the evaluation results</returns>
        static public Dictionary <string, double> Evaluate(
            IItemRecommender recommender,
            IPosOnlyFeedback test,
            IPosOnlyFeedback train,
            ICollection <int> relevant_users,
            ICollection <int> relevant_items)
        {
            if (train.Overlap(test) > 0)
            {
                Console.Error.WriteLine("WARNING: Overlapping train and test data");
            }

            // compute evaluation measures
            double auc_sum     = 0;
            double map_sum     = 0;
            double prec_5_sum  = 0;
            double prec_10_sum = 0;
            double prec_15_sum = 0;
            double ndcg_sum    = 0;
            int    num_users   = 0;

            foreach (int user_id in relevant_users)
            {
                var correct_items = new HashSet <int>(test.UserMatrix[user_id]);
                correct_items.IntersectWith(relevant_items);

                // the number of items that are really relevant for this user
                var relevant_items_in_train = new HashSet <int>(train.UserMatrix[user_id]);
                relevant_items_in_train.IntersectWith(relevant_items);
                int num_eval_items = relevant_items.Count - relevant_items_in_train.Count();

                // skip all users that have 0 or #relevant_items test items
                if (correct_items.Count == 0)
                {
                    continue;
                }
                if (num_eval_items - correct_items.Count == 0)
                {
                    continue;
                }

                num_users++;
                int[] prediction = Prediction.PredictItems(recommender, user_id, relevant_items);

                auc_sum     += AUC(prediction, correct_items, train.UserMatrix[user_id]);
                map_sum     += MAP(prediction, correct_items, train.UserMatrix[user_id]);
                ndcg_sum    += NDCG(prediction, correct_items, train.UserMatrix[user_id]);
                prec_5_sum  += PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 5);
                prec_10_sum += PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 10);
                prec_15_sum += PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 15);

                if (prediction.Length != relevant_items.Count)
                {
                    throw new Exception("Not all items have been ranked.");
                }

                if (num_users % 1000 == 0)
                {
                    Console.Error.Write(".");
                }
                if (num_users % 20000 == 0)
                {
                    Console.Error.WriteLine();
                }
            }

            var result = new Dictionary <string, double>();

            result["AUC"]       = auc_sum / num_users;
            result["MAP"]       = map_sum / num_users;
            result["NDCG"]      = ndcg_sum / num_users;
            result["prec@5"]    = prec_5_sum / num_users;
            result["prec@10"]   = prec_10_sum / num_users;
            result["prec@15"]   = prec_15_sum / num_users;
            result["num_users"] = num_users;
            result["num_lists"] = num_users;
            result["num_items"] = relevant_items.Count;

            return(result);
        }
Exemple #31
0
        // TODO consider micro- (by item) and macro-averaging (by user, the current thing)
        /// <summary>Online evaluation for rankings of items</summary>
        /// <remarks>
        /// </remarks>
        /// <param name="recommender">item recommender</param>
        /// <param name="test">test cases</param>
        /// <param name="train">training data (must be connected to the recommender's training data)</param>
        /// <param name="relevant_users">a collection of integers with all relevant users</param>
        /// <param name="relevant_items">a collection of integers with all relevant items</param>
        /// <returns>a dictionary containing the evaluation results (averaged by user)</returns>
        public static Dictionary<string, double> EvaluateOnline(
			IItemRecommender recommender,
			IPosOnlyFeedback test, IPosOnlyFeedback train,
		    ICollection<int> relevant_users, ICollection<int> relevant_items)
        {
            // for better handling, move test data points into arrays
            var users = new int[test.Count];
            var items = new int[test.Count];
            int pos = 0;
            foreach (int user_id in test.UserMatrix.NonEmptyRowIDs)
                foreach (int item_id in test.UserMatrix[user_id])
                {
                    users[pos] = user_id;
                    items[pos] = item_id;
                    pos++;
                }

            // random order of the test data points  // TODO chronological order
            var random_index = new int[test.Count];
            for (int index = 0; index < random_index.Length; index++)
                random_index[index] = index;
            Util.Utils.Shuffle<int>(random_index);

            var results_by_user = new Dictionary<int, Dictionary<string, double>>();

            foreach (int index in random_index)
            {
                if (relevant_users.Contains(users[index]) && relevant_items.Contains(items[index]))
                {
                    // evaluate user
                    var current_test = new PosOnlyFeedback<SparseBooleanMatrix>();
                    current_test.Add(users[index], items[index]);
                    var current_result = Evaluate(recommender, current_test, train, current_test.AllUsers, relevant_items);

                    if (current_result["num_users"] == 1)
                        if (results_by_user.ContainsKey(users[index]))
                        {
                            foreach (string measure in Measures)
                                results_by_user[users[index]][measure] += current_result[measure];
                            results_by_user[users[index]]["num_items"]++;
                        }
                        else
                        {
                            results_by_user[users[index]] = current_result;
                            results_by_user[users[index]]["num_items"] = 1;
                            results_by_user[users[index]].Remove("num_users");
                        }
                }

                // update recommender
                recommender.AddFeedback(users[index], items[index]);
            }

            var results = new Dictionary<string, double>();
            foreach (string measure in Measures)
                results[measure] = 0;

            foreach (int u in results_by_user.Keys)
                foreach (string measure in Measures)
                    results[measure] += results_by_user[u][measure] / results_by_user[u]["num_items"];

            foreach (string measure in Measures)
                results[measure] /= results_by_user.Count;

            results["num_users"] = results_by_user.Count;
            results["num_items"] = relevant_items.Count;
            results["num_lists"] = test.Count; // FIXME this is not exact

            return results;
        }
Exemple #32
0
        /// <summary>Evaluation for rankings of filtered items</summary>
        /// <remarks>
        /// </remarks>
        /// <param name="recommender">item recommender</param>
        /// <param name="test">test cases</param>
        /// <param name="train">training data</param>
        /// <param name="item_attributes">the item attributes to be used for filtering</param>
        /// <param name="relevant_users">a collection of integers with all relevant users</param>
        /// <param name="relevant_items">a collection of integers with all relevant items</param>
        /// <returns>a dictionary containing the evaluation results</returns>
        public static Dictionary<string, double> Evaluate(
			IItemRecommender recommender,
			IPosOnlyFeedback test,
			IPosOnlyFeedback train,
		    SparseBooleanMatrix item_attributes,
		    ICollection<int> relevant_users,
			ICollection<int> relevant_items)
        {
            if (train.Overlap(test) > 0)
                Console.Error.WriteLine("WARNING: Overlapping train and test data");

            SparseBooleanMatrix items_by_attribute = (SparseBooleanMatrix) item_attributes.Transpose();

            // compute evaluation measures
            double auc_sum     = 0;
            double map_sum     = 0;
            double prec_5_sum  = 0;
            double prec_10_sum = 0;
            double prec_15_sum = 0;
            double ndcg_sum    = 0;

            // for counting the users and the evaluation lists
            int num_lists = 0;
            int num_users = 0;
            int last_user_id = -1;

            foreach (int user_id in relevant_users)
            {
                var filtered_items = GetFilteredItems(user_id, test, item_attributes);

                foreach (int attribute_id in filtered_items.Keys)
                {
                    // TODO optimize this a bit, currently it is quite naive
                    var relevant_filtered_items = new HashSet<int>(items_by_attribute[attribute_id]);
                    relevant_filtered_items.IntersectWith(relevant_items);

                    var correct_items = new HashSet<int>(filtered_items[attribute_id]);
                    correct_items.IntersectWith(relevant_filtered_items);

                    // the number of items that are really relevant for this user
                    var relevant_items_in_train = new HashSet<int>(train.UserMatrix[user_id]);
                    relevant_items_in_train.IntersectWith(relevant_filtered_items);
                    int num_eval_items = relevant_filtered_items.Count - relevant_items_in_train.Count();

                    // skip all users that have 0 or #relevant_filtered_items test items
                    if (correct_items.Count == 0)
                        continue;
                    if (num_eval_items - correct_items.Count == 0)
                        continue;

                    // counting stats
                    num_lists++;
                    if (last_user_id != user_id)
                    {
                        last_user_id = user_id;
                        num_users++;
                    }

                    // evaluation
                    int[] prediction = Prediction.PredictItems(recommender, user_id, relevant_filtered_items);

                    auc_sum     += Items.AUC(prediction, correct_items, train.UserMatrix[user_id]);
                    map_sum     += Items.MAP(prediction, correct_items, train.UserMatrix[user_id]);
                    ndcg_sum    += Items.NDCG(prediction, correct_items, train.UserMatrix[user_id]);
                    prec_5_sum  += Items.PrecisionAt(prediction, correct_items, train.UserMatrix[user_id],  5);
                    prec_10_sum += Items.PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 10);
                    prec_15_sum += Items.PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 15);

                    if (prediction.Length != relevant_filtered_items.Count)
                        throw new Exception("Not all items have been ranked.");

                    if (num_lists % 1000 == 0)
                        Console.Error.Write(".");
                    if (num_lists % 20000 == 0)
                        Console.Error.WriteLine();
                }
            }

            var result = new Dictionary<string, double>();
            result.Add("AUC",     auc_sum / num_lists);
            result.Add("MAP",     map_sum / num_lists);
            result.Add("NDCG",    ndcg_sum / num_lists);
            result.Add("prec@5",  prec_5_sum / num_lists);
            result.Add("prec@10", prec_10_sum / num_lists);
            result.Add("prec@15", prec_15_sum / num_lists);
            result.Add("num_users", num_users);
            result.Add("num_lists", num_lists);
            result.Add("num_items", relevant_items.Count);

            return result;
        }
Exemple #33
0
        /// <summary>Evaluation for rankings of items</summary>
        /// <remarks>
        /// User-item combinations that appear in both sets are ignored for the test set, and thus in the evaluation.
        /// The evaluation measures are listed in the ItemPredictionMeasures property.
        /// Additionally, 'num_users' and 'num_items' report the number of users that were used to compute the results
        /// and the number of items that were taken into account.
        ///
        /// Literature:
        ///   C. Manning, P. Raghavan, H. Schütze: Introduction to Information Retrieval, Cambridge University Press, 2008
        /// </remarks>
        /// <param name="recommender">item recommender</param>
        /// <param name="test">test cases</param>
        /// <param name="train">training data</param>
        /// <param name="relevant_users">a collection of integers with all relevant users</param>
        /// <param name="relevant_items">a collection of integers with all relevant items</param>
        /// <returns>a dictionary containing the evaluation results</returns>
        public static Dictionary<string, double> Evaluate(
			IItemRecommender recommender,
			IPosOnlyFeedback test,
			IPosOnlyFeedback train,
		    ICollection<int> relevant_users,
			ICollection<int> relevant_items)
        {
            if (train.Overlap(test) > 0)
                Console.Error.WriteLine("WARNING: Overlapping train and test data");

            // compute evaluation measures
            double auc_sum     = 0;
            double map_sum     = 0;
            double prec_5_sum  = 0;
            double prec_10_sum = 0;
            double prec_15_sum = 0;
            double ndcg_sum    = 0;
            int num_users      = 0;

            foreach (int user_id in relevant_users)
            {
                var correct_items = new HashSet<int>(test.UserMatrix[user_id]);
                correct_items.IntersectWith(relevant_items);

                // the number of items that are really relevant for this user
                var relevant_items_in_train = new HashSet<int>(train.UserMatrix[user_id]);
                relevant_items_in_train.IntersectWith(relevant_items);
                int num_eval_items = relevant_items.Count - relevant_items_in_train.Count();

                // skip all users that have 0 or #relevant_items test items
                if (correct_items.Count == 0)
                    continue;
                if (num_eval_items - correct_items.Count == 0)
                    continue;

                num_users++;
                int[] prediction = Prediction.PredictItems(recommender, user_id, relevant_items);

                auc_sum     += AUC(prediction, correct_items, train.UserMatrix[user_id]);
                map_sum     += MAP(prediction, correct_items, train.UserMatrix[user_id]);
                ndcg_sum    += NDCG(prediction, correct_items, train.UserMatrix[user_id]);
                prec_5_sum  += PrecisionAt(prediction, correct_items, train.UserMatrix[user_id],  5);
                prec_10_sum += PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 10);
                prec_15_sum += PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 15);

                if (prediction.Length != relevant_items.Count)
                    throw new Exception("Not all items have been ranked.");

                if (num_users % 1000 == 0)
                    Console.Error.Write(".");
                if (num_users % 20000 == 0)
                    Console.Error.WriteLine();
            }

            var result = new Dictionary<string, double>();
            result["AUC"]       = auc_sum / num_users;
            result["MAP"]       = map_sum / num_users;
            result["NDCG"]      = ndcg_sum / num_users;
            result["prec@5"]    = prec_5_sum / num_users;
            result["prec@10"]   = prec_10_sum / num_users;
            result["prec@15"]   = prec_15_sum / num_users;
            result["num_users"] = num_users;
            result["num_lists"] = num_users;
            result["num_items"] = relevant_items.Count;

            return result;
        }
    static void LoadData()
    {
        TimeSpan loading_time = Wrap.MeasureTime(delegate() {
            // training data
            training_file = Path.Combine(data_dir, training_file);
            training_data = double.IsNaN(rating_threshold)
                ? ItemData.Read(training_file, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE)
                : ItemDataRatingThreshold.Read(training_file, rating_threshold, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE);

            // user attributes
            if (user_attributes_file != null)
                user_attributes = AttributeData.Read(Path.Combine(data_dir, user_attributes_file), user_mapping);
            if (recommender is IUserAttributeAwareRecommender)
                ((IUserAttributeAwareRecommender)recommender).UserAttributes = user_attributes;

            // item attributes
            if (item_attributes_file != null)
                item_attributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping);
            if (recommender is IItemAttributeAwareRecommender)
                ((IItemAttributeAwareRecommender)recommender).ItemAttributes = item_attributes;

            // user relation
            if (recommender is IUserRelationAwareRecommender)
            {
                ((IUserRelationAwareRecommender)recommender).UserRelation = RelationData.Read(Path.Combine(data_dir, user_relations_file), user_mapping);
                Console.WriteLine("relation over {0} users", ((IUserRelationAwareRecommender)recommender).NumUsers);
            }

            // item relation
            if (recommender is IItemRelationAwareRecommender)
            {
                ((IItemRelationAwareRecommender)recommender).ItemRelation = RelationData.Read(Path.Combine(data_dir, item_relations_file), item_mapping);
                Console.WriteLine("relation over {0} items", ((IItemRelationAwareRecommender)recommender).NumItems);
            }

            // user groups
            if (user_groups_file != null)
            {
                group_to_user = RelationData.Read(Path.Combine(data_dir, user_groups_file), user_mapping); // assumption: user and user group IDs are disjoint
                user_groups = group_to_user.NonEmptyRowIDs;
                Console.WriteLine("{0} user groups", user_groups.Count);
            }

            // test data
            if (test_ratio == 0)
            {
                if (test_file != null)
                {
                    test_file = Path.Combine(data_dir, test_file);
                    test_data = double.IsNaN(rating_threshold)
                        ? ItemData.Read(test_file, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE)
                        : ItemDataRatingThreshold.Read(test_file, rating_threshold, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE);
                }
            }
            else
            {
                var split = new PosOnlyFeedbackSimpleSplit<PosOnlyFeedback<SparseBooleanMatrix>>(training_data, test_ratio);
                training_data = split.Train[0];
                test_data     = split.Test[0];
            }

            if (group_method == "GroupsAsUsers")
            {
                Console.WriteLine("group recommendation strategy: {0}", group_method);
                // TODO verify what is going on here

                //var training_data_group = new PosOnlyFeedback<SparseBooleanMatrix>();
                // transform groups to users
                foreach (int group_id in group_to_user.NonEmptyRowIDs)
                    foreach (int user_id in group_to_user[group_id])
                        foreach (int item_id in training_data.UserMatrix.GetEntriesByRow(user_id))
                            training_data.Add(group_id, item_id);
                // add the users that do not belong to groups

                //training_data = training_data_group;

                // transform groups to users
                var test_data_group = new PosOnlyFeedback<SparseBooleanMatrix>();
                foreach (int group_id in group_to_user.NonEmptyRowIDs)
                    foreach (int user_id in group_to_user[group_id])
                        foreach (int item_id in test_data.UserMatrix.GetEntriesByRow(user_id))
                            test_data_group.Add(group_id, item_id);

                test_data = test_data_group;

                group_method = null; // deactivate s.t. the normal eval routines are used
            }

            if (user_prediction)
            {
                // swap file names for test users and candidate items
                var ruf = test_users_file;
                var rif = candidate_items_file;
                test_users_file = rif;
                candidate_items_file = ruf;

                // swap user and item mappings
                var um = user_mapping;
                var im = item_mapping;
                user_mapping = im;
                item_mapping = um;

                // transpose training and test data
                training_data = training_data.Transpose();

                // transpose test data
                if (test_data != null)
                    test_data = test_data.Transpose();
            }

            if (recommender is MyMediaLite.ItemRecommendation.ItemRecommender)
                ((ItemRecommender)recommender).Feedback = training_data;

            // test users
            if (test_users_file != null)
                test_users = user_mapping.ToInternalID( File.ReadLines(Path.Combine(data_dir, test_users_file)).ToArray() );
            else
                test_users = test_data != null ? test_data.AllUsers : training_data.AllUsers;

            // if necessary, perform user sampling
            if (num_test_users > 0 && num_test_users < test_users.Count)
            {
                var old_test_users = new HashSet<int>(test_users);
                var new_test_users = new int[num_test_users];
                for (int i = 0; i < num_test_users; i++)
                {
                    int random_index = MyMediaLite.Util.Random.GetInstance().Next(old_test_users.Count - 1);
                    new_test_users[i] = old_test_users.ElementAt(random_index);
                    old_test_users.Remove(new_test_users[i]);
                }
                test_users = new_test_users;
            }

            // candidate items
            if (candidate_items_file != null)
                candidate_items = item_mapping.ToInternalID( File.ReadLines(Path.Combine(data_dir, candidate_items_file)).ToArray() );
            else if (all_items)
                candidate_items = Enumerable.Range(0, item_mapping.InternalIDs.Max() + 1).ToArray();

            if (candidate_items != null)
                eval_item_mode = CandidateItems.EXPLICIT;
            else if (in_training_items)
                eval_item_mode = CandidateItems.TRAINING;
            else if (in_test_items)
                eval_item_mode = CandidateItems.TEST;
            else if (overlap_items)
                eval_item_mode = CandidateItems.OVERLAP;
            else
                eval_item_mode = CandidateItems.UNION;
        });
        Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "loading_time {0,0:0.##}", loading_time.TotalSeconds));
        Console.Error.WriteLine("memory {0}", Memory.Usage);
    }
Exemple #35
0
        /// <summary>Evaluation for rankings of items</summary>
        /// <remarks>
        /// User-item combinations that appear in both sets are ignored for the test set, and thus in the evaluation,
        /// except the boolean argument repeated_events is set.
        ///
        /// The evaluation measures are listed in the Measures property.
        /// Additionally, 'num_users' and 'num_items' report the number of users that were used to compute the results
        /// and the number of items that were taken into account.
        ///
        /// Literature:
        /// <list type="bullet">
        ///   <item><description>
        ///   C. Manning, P. Raghavan, H. Schütze: Introduction to Information Retrieval, Cambridge University Press, 2008
        ///   </description></item>
        /// </list>
        ///
        /// On multi-core/multi-processor systems, the routine tries to use as many cores as possible,
        /// which should to an almost linear speed-up.
        /// </remarks>
        /// <param name="recommender">item recommender</param>
        /// <param name="test">test cases</param>
        /// <param name="training">training data</param>
        /// <param name="test_users">a list of integers with all test users; if null, use all users in the test cases</param>
        /// <param name="candidate_items">a list of integers with all candidate items</param>
        /// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
        /// <param name="repeated_events">allow repeated events in the evaluation (i.e. items accessed by a user before may be in the recommended list)</param>
        /// <param name="n">length of the item list to evaluate -- if set to -1 (default), use the complete list, otherwise compute evaluation measures on the top n items</param>
        /// <returns>a dictionary containing the evaluation results (default is false)</returns>
        static public ItemRecommendationEvaluationResults Evaluate(
            this IRecommender recommender,
            IPosOnlyFeedback test,
            IPosOnlyFeedback training,
            IList <int> test_users             = null,
            IList <int> candidate_items        = null,
            CandidateItems candidate_item_mode = CandidateItems.OVERLAP,
            RepeatedEvents repeated_events     = RepeatedEvents.No,
            int n = -1)
        {
            if (test_users == null)
            {
                test_users = test.AllUsers;
            }
            candidate_items = Candidates(candidate_items, candidate_item_mode, test, training);

            var result = new ItemRecommendationEvaluationResults();

            // make sure that the user matrix is completely initialized before entering parallel code
            var training_user_matrix = training.UserMatrix;
            var test_user_matrix     = test.UserMatrix;

            int num_users = 0;

            Parallel.ForEach(test_users, user_id => {
                try
                {
                    var correct_items = new HashSet <int>(test_user_matrix[user_id]);
                    correct_items.IntersectWith(candidate_items);
                    if (correct_items.Count == 0)
                    {
                        return;
                    }

                    var ignore_items_for_this_user = new HashSet <int>(
                        repeated_events == RepeatedEvents.Yes || training_user_matrix[user_id] == null ? new int[0] : training_user_matrix[user_id]
                        );

                    ignore_items_for_this_user.IntersectWith(candidate_items);
                    int num_candidates_for_this_user = candidate_items.Count - ignore_items_for_this_user.Count;
                    if (correct_items.Count == num_candidates_for_this_user)
                    {
                        return;
                    }

                    var prediction      = recommender.Recommend(user_id, candidate_items: candidate_items, n: n, ignore_items: ignore_items_for_this_user);
                    var prediction_list = (from t in prediction select t.Item1).ToArray();

                    int num_dropped_items = num_candidates_for_this_user - prediction.Count;
                    double auc            = AUC.Compute(prediction_list, correct_items, num_dropped_items);
                    double map            = PrecisionAndRecall.AP(prediction_list, correct_items);
                    double ndcg           = NDCG.Compute(prediction_list, correct_items);
                    double rr             = ReciprocalRank.Compute(prediction_list, correct_items);
                    var positions         = new int[] { 5, 10 };
                    var prec   = PrecisionAndRecall.PrecisionAt(prediction_list, correct_items, positions);
                    var recall = PrecisionAndRecall.RecallAt(prediction_list, correct_items, positions);

                    // thread-safe incrementing
                    lock (result)
                    {
                        num_users++;
                        result["AUC"]       += (float)auc;
                        result["MAP"]       += (float)map;
                        result["NDCG"]      += (float)ndcg;
                        result["MRR"]       += (float)rr;
                        result["prec@5"]    += (float)prec[5];
                        result["prec@10"]   += (float)prec[10];
                        result["recall@5"]  += (float)recall[5];
                        result["recall@10"] += (float)recall[10];
                    }

                    if (num_users % 1000 == 0)
                    {
                        Console.Error.Write(".");
                    }
                    if (num_users % 60000 == 0)
                    {
                        Console.Error.WriteLine();
                    }
                }
                catch (Exception e)
                {
                    Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace);
                    throw;
                }
            });

            foreach (string measure in Measures)
            {
                result[measure] /= num_users;
            }
            result["num_users"] = num_users;
            result["num_lists"] = num_users;
            result["num_items"] = candidate_items.Count;

            return(result);
        }
    protected override void LoadData()
    {
        TimeSpan loading_time = Wrap.MeasureTime(delegate() {
            base.LoadData();

            // training data
            training_data = double.IsNaN(rating_threshold)
                                ? ItemData.Read(training_file, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE)
                                : ItemDataRatingThreshold.Read(training_file, rating_threshold, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE);

            // test data
            if (test_ratio == 0)
            {
                if (test_file != null)
                {
                    test_data = double.IsNaN(rating_threshold)
                                                ? ItemData.Read(test_file, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE)
                                                : ItemDataRatingThreshold.Read(test_file, rating_threshold, user_mapping, item_mapping, file_format == ItemDataFileFormat.IGNORE_FIRST_LINE);
                }
            }
            else
            {
                var split     = new PosOnlyFeedbackSimpleSplit <PosOnlyFeedback <SparseBooleanMatrix> >(training_data, test_ratio);
                training_data = split.Train[0];
                test_data     = split.Test[0];
            }

            if (user_prediction)
            {
                // swap file names for test users and candidate items
                var ruf              = test_users_file;
                var rif              = candidate_items_file;
                test_users_file      = rif;
                candidate_items_file = ruf;

                // swap user and item mappings
                var um       = user_mapping;
                var im       = item_mapping;
                user_mapping = im;
                item_mapping = um;

                // transpose training and test data
                training_data = training_data.Transpose();

                // transpose test data
                if (test_data != null)
                {
                    test_data = test_data.Transpose();
                }
            }

            for (int i = 0; i < recommenders.Count; i++)
            {
                if (recommenders[i] is MyMediaLite.ItemRecommendation.ItemRecommender)
                {
                    ((ItemRecommender)recommenders[i]).Feedback = training_data;
                }
            }
            // test users
            if (test_users_file != null)
            {
                test_users = user_mapping.ToInternalID(File.ReadLines(Path.Combine(data_dir, test_users_file)).ToArray());
            }
            else
            {
                test_users = test_data != null ? test_data.AllUsers : training_data.AllUsers;
            }

            // if necessary, perform user sampling
            if (num_test_users > 0 && num_test_users < test_users.Count)
            {
                var old_test_users = new HashSet <int>(test_users);
                var new_test_users = new int[num_test_users];
                for (int i = 0; i < num_test_users; i++)
                {
                    int random_index  = MyMediaLite.Random.GetInstance().Next(old_test_users.Count - 1);
                    new_test_users[i] = old_test_users.ElementAt(random_index);
                    old_test_users.Remove(new_test_users[i]);
                }
                test_users = new_test_users;
            }

            // candidate items
            if (candidate_items_file != null)
            {
                candidate_items = item_mapping.ToInternalID(File.ReadLines(Path.Combine(data_dir, candidate_items_file)).ToArray());
            }
            else if (all_items)
            {
                candidate_items = Enumerable.Range(0, item_mapping.InternalIDs.Max() + 1).ToArray();
            }

            if (candidate_items != null)
            {
                eval_item_mode = CandidateItems.EXPLICIT;
            }
            else if (in_training_items)
            {
                eval_item_mode = CandidateItems.TRAINING;
            }
            else if (in_test_items)
            {
                eval_item_mode = CandidateItems.TEST;
            }
            else if (overlap_items)
            {
                eval_item_mode = CandidateItems.OVERLAP;
            }
            else
            {
                eval_item_mode = CandidateItems.UNION;
            }
        });

        //Salvar arquivos


        List <string> linesToWrite = new List <string>();

        for (int i = 0; i < training_data.UserMatrix.NumberOfRows; i++)
        {
            IList <int> columns = training_data.UserMatrix.GetEntriesByRow(i);
            for (int j = 0; j < columns.Count; j++)
            {
                StringBuilder line = new StringBuilder();
                line.Append(i.ToString() + " " + columns[j].ToString());
                linesToWrite.Add(line.ToString());
            }
        }
        System.IO.File.WriteAllLines("training.data", linesToWrite.ToArray());


        linesToWrite = new List <string>();
        for (int i = 0; i < test_data.UserMatrix.NumberOfRows; i++)
        {
            IList <int> columns = test_data.UserMatrix.GetEntriesByRow(i);
            for (int j = 0; j < columns.Count; j++)
            {
                StringBuilder line = new StringBuilder();
                line.Append(i.ToString() + " " + columns[j].ToString());
                linesToWrite.Add(line.ToString());
            }
        }
        System.IO.File.WriteAllLines("test.data", linesToWrite.ToArray());


        /*
         * List<string> linesToWrite = new List<string>();
         * for (int rowIndex = 0; rowIndex < training_data.AllItems.Count; rowIndex++)
         * {
         *
         * }*/

        Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "loading_time {0,0:0.##}", loading_time.TotalSeconds));
        Console.Error.WriteLine("memory {0}", Memory.Usage);
    }
 /// <summary>Compute the number of overlapping events in two feedback datasets</summary>
 /// <param name="s">the feedback dataset to compare to</param>
 /// <returns>the number of overlapping events, i.e. events that have the same user and item ID</returns>
 public int Overlap(IPosOnlyFeedback s)
 {
     return(UserMatrix.Overlap(s.UserMatrix));
 }
Exemple #38
0
        // TODO consider micro- (by item) and macro-averaging (by user, the current thing)
        /// <summary>Online evaluation for rankings of items</summary>
        /// <remarks>
        /// </remarks>
        /// <param name="recommender">item recommender</param>
        /// <param name="test">test cases</param>
        /// <param name="train">training data (must be connected to the recommender's training data)</param>
        /// <param name="relevant_users">a collection of integers with all relevant users</param>
        /// <param name="relevant_items">a collection of integers with all relevant items</param>
        /// <returns>a dictionary containing the evaluation results (averaged by user)</returns>
        static public Dictionary <string, double> EvaluateOnline(
            IItemRecommender recommender,
            IPosOnlyFeedback test, IPosOnlyFeedback train,
            ICollection <int> relevant_users, ICollection <int> relevant_items)
        {
            // for better handling, move test data points into arrays
            var users = new int[test.Count];
            var items = new int[test.Count];
            int pos   = 0;

            foreach (int user_id in test.UserMatrix.NonEmptyRowIDs)
            {
                foreach (int item_id in test.UserMatrix[user_id])
                {
                    users[pos] = user_id;
                    items[pos] = item_id;
                    pos++;
                }
            }

            // random order of the test data points  // TODO chronological order
            var random_index = new int[test.Count];

            for (int index = 0; index < random_index.Length; index++)
            {
                random_index[index] = index;
            }
            Util.Utils.Shuffle <int>(random_index);

            var results_by_user = new Dictionary <int, Dictionary <string, double> >();

            foreach (int index in random_index)
            {
                if (relevant_users.Contains(users[index]) && relevant_items.Contains(items[index]))
                {
                    // evaluate user
                    var current_test = new PosOnlyFeedback <SparseBooleanMatrix>();
                    current_test.Add(users[index], items[index]);
                    var current_result = Evaluate(recommender, current_test, train, current_test.AllUsers, relevant_items);

                    if (current_result["num_users"] == 1)
                    {
                        if (results_by_user.ContainsKey(users[index]))
                        {
                            foreach (string measure in Measures)
                            {
                                results_by_user[users[index]][measure] += current_result[measure];
                            }
                            results_by_user[users[index]]["num_items"]++;
                        }
                        else
                        {
                            results_by_user[users[index]] = current_result;
                            results_by_user[users[index]]["num_items"] = 1;
                            results_by_user[users[index]].Remove("num_users");
                        }
                    }
                }

                // update recommender
                recommender.AddFeedback(users[index], items[index]);
            }

            var results = new Dictionary <string, double>();

            foreach (string measure in Measures)
            {
                results[measure] = 0;
            }

            foreach (int u in results_by_user.Keys)
            {
                foreach (string measure in Measures)
                {
                    results[measure] += results_by_user[u][measure] / results_by_user[u]["num_items"];
                }
            }

            foreach (string measure in Measures)
            {
                results[measure] /= results_by_user.Count;
            }

            results["num_users"] = results_by_user.Count;
            results["num_items"] = relevant_items.Count;
            results["num_lists"] = test.Count;             // FIXME this is not exact

            return(results);
        }
Exemple #39
0
        // TODO there are too many different versions of this method interface - we should simplify the API
        /// <summary>Write item predictions (scores) for all users to a file</summary>
        /// <param name="recommender">the <see cref="IRecommender"/> to use for making the predictions</param>
        /// <param name="train">a user-wise <see cref="IPosOnlyFeedback"/> containing the items already observed</param>
        /// <param name="relevant_items">the list of candidate items</param>
        /// <param name="num_predictions">the number of items to return per user, -1 if there should be no limit</param>
        /// <param name="user_mapping">an <see cref="IEntityMapping"/> object for the user IDs</param>
        /// <param name="item_mapping">an <see cref="IEntityMapping"/> object for the item IDs</param>
        /// <param name="filename">the name of the file to write to</param>
        public static void WritePredictions(
			IRecommender recommender,
			IPosOnlyFeedback train,
			ICollection<int> relevant_items,
			int num_predictions,
			IEntityMapping user_mapping, IEntityMapping item_mapping,
			string filename)
        {
            if (filename.Equals("-"))
                WritePredictions(recommender, train, relevant_items, num_predictions, user_mapping, item_mapping, Console.Out);
            else
                using ( var writer = new StreamWriter(filename) )
                    WritePredictions(recommender, train, relevant_items, num_predictions, user_mapping, item_mapping, writer);
        }
Exemple #40
0
        /// <summary>Evaluation for rankings of items</summary>
        /// <remarks>
        /// User-item combinations that appear in both sets are ignored for the test set, and thus in the evaluation,
        /// except the boolean argument repeated_events is set.
        ///
        /// The evaluation measures are listed in the Measures property.
        /// Additionally, 'num_users' and 'num_items' report the number of users that were used to compute the results
        /// and the number of items that were taken into account.
        ///
        /// Literature:
        /// <list type="bullet">
        ///   <item><description>
        ///   C. Manning, P. Raghavan, H. Schütze: Introduction to Information Retrieval, Cambridge University Press, 2008
        ///   </description></item>
        /// </list>
        ///
        /// On multi-core/multi-processor systems, the routine tries to use as many cores as possible,
        /// which should to an almost linear speed-up.
        /// </remarks>
        /// <param name="recommender">item recommender</param>
        /// <param name="test">test cases</param>
        /// <param name="training">training data</param>
        /// <param name="test_users">a list of integers with all test users; if null, use all users in the test cases</param>
        /// <param name="candidate_items">a list of integers with all candidate items</param>
        /// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
        /// <param name="repeated_events">allow repeated events in the evaluation (i.e. items accessed by a user before may be in the recommended list)</param>
        /// <param name="n">length of the item list to evaluate -- if set to -1 (default), use the complete list, otherwise compute evaluation measures on the top n items</param>
        /// <returns>a dictionary containing the evaluation results (default is false)</returns>
        public static ItemRecommendationEvaluationResults Evaluate(
			this IRecommender recommender,
			IPosOnlyFeedback test,
			IPosOnlyFeedback training,
			IList<int> test_users = null,
			IList<int> candidate_items = null,
			CandidateItems candidate_item_mode = CandidateItems.OVERLAP,
			RepeatedEvents repeated_events = RepeatedEvents.No,
			int n = -1)
        {
            if (test_users == null)
                test_users = test.AllUsers;
            candidate_items = Candidates(candidate_items, candidate_item_mode, test, training);

            var result = new ItemRecommendationEvaluationResults();

            // make sure that the user matrix is completely initialized before entering parallel code
            var training_user_matrix = training.UserMatrix;
            var test_user_matrix     = test.UserMatrix;

            int num_users = 0;
            Parallel.ForEach(test_users, user_id => {
                try
                {
                    var correct_items = new HashSet<int>(test_user_matrix[user_id]);
                    correct_items.IntersectWith(candidate_items);
                    if (correct_items.Count == 0)
                        return;

                    var ignore_items_for_this_user = new HashSet<int>(
                        repeated_events == RepeatedEvents.Yes || training_user_matrix[user_id] == null ? new int[0] : training_user_matrix[user_id]
                    );

                    ignore_items_for_this_user.IntersectWith(candidate_items);
                    int num_candidates_for_this_user = candidate_items.Count - ignore_items_for_this_user.Count;
                    if (correct_items.Count == num_candidates_for_this_user)
                        return;

                    var prediction = recommender.Recommend(user_id, candidate_items:candidate_items, n:n, ignore_items:ignore_items_for_this_user);
                    var prediction_list = (from t in prediction select t.Item1).ToArray();

                    int num_dropped_items = num_candidates_for_this_user - prediction.Count;
                    double auc  = AUC.Compute(prediction_list, correct_items, num_dropped_items);
                    double map  = PrecisionAndRecall.AP(prediction_list, correct_items);
                    double ndcg = NDCG.Compute(prediction_list, correct_items);
                    double rr   = ReciprocalRank.Compute(prediction_list, correct_items);
                    var positions = new int[] { 5, 10 };
                    var prec   = PrecisionAndRecall.PrecisionAt(prediction_list, correct_items, positions);
                    var recall = PrecisionAndRecall.RecallAt(prediction_list, correct_items, positions);

                    // thread-safe incrementing
                    lock (result)
                    {
                        num_users++;
                        result["AUC"]       += (float) auc;
                        result["MAP"]       += (float) map;
                        result["NDCG"]      += (float) ndcg;
                        result["MRR"]       += (float) rr;
                        result["prec@5"]    += (float) prec[5];
                        result["prec@10"]   += (float) prec[10];
                        result["recall@5"]  += (float) recall[5];
                        result["recall@10"] += (float) recall[10];
                    }

                    if (num_users % 1000 == 0)
                        Console.Error.Write(".");
                    if (num_users % 60000 == 0)
                        Console.Error.WriteLine();
                }
                catch (Exception e)
                {
                    Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace);
                    throw;
                }
            });

            foreach (string measure in Measures)
                result[measure] /= num_users;
            result["num_users"] = num_users;
            result["num_lists"] = num_users;
            result["num_items"] = candidate_items.Count;

            return result;
        }
Exemple #41
0
        // TODO consider micro- (by item) and macro-averaging (by user, the current thing); repeated events
        /// <summary>Online evaluation for rankings of items</summary>
        /// <remarks>
        /// </remarks>
        /// <param name="recommender">the item recommender to be evaluated</param>
        /// <param name="test">test cases</param>
        /// <param name="training">training data (must be connected to the recommender's training data)</param>
        /// <param name="test_users">a list of all test user IDs</param>
        /// <param name="candidate_items">a list of all candidate item IDs</param>
        /// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
        /// <returns>a dictionary containing the evaluation results (averaged by user)</returns>
        public static ItemRecommendationEvaluationResults EvaluateOnline(
			this IRecommender recommender,
			IPosOnlyFeedback test, IPosOnlyFeedback training,
			IList<int> test_users, IList<int> candidate_items,
			CandidateItems candidate_item_mode)
        {
            var incremental_recommender = recommender as IIncrementalItemRecommender;
            if (incremental_recommender == null)
                throw new ArgumentException("recommender must be of type IIncrementalItemRecommender");

            // prepare candidate items once to avoid recreating them
            switch (candidate_item_mode)
            {
                case CandidateItems.TRAINING: candidate_items = training.AllItems; break;
                case CandidateItems.TEST:     candidate_items = test.AllItems; break;
                case CandidateItems.OVERLAP:  candidate_items = new List<int>(test.AllItems.Intersect(training.AllItems)); break;
                case CandidateItems.UNION:    candidate_items = new List<int>(test.AllItems.Union(training.AllItems)); break;
            }
            candidate_item_mode = CandidateItems.EXPLICIT;

            // for better handling, move test data points into arrays
            var users = new int[test.Count];
            var items = new int[test.Count];
            int pos = 0;
            foreach (int user_id in test.UserMatrix.NonEmptyRowIDs)
                foreach (int item_id in test.UserMatrix[user_id])
                {
                    users[pos] = user_id;
                    items[pos] = item_id;
                    pos++;
                }

            // random order of the test data points  // TODO chronological order
            var random_index = new int[test.Count];
            for (int index = 0; index < random_index.Length; index++)
                random_index[index] = index;
            random_index.Shuffle();

            var results_by_user = new Dictionary<int, ItemRecommendationEvaluationResults>();

            int num_lists = 0;

            foreach (int index in random_index)
            {
                if (test_users.Contains(users[index]) && candidate_items.Contains(items[index]))
                {
                    // evaluate user
                    var current_test = new PosOnlyFeedback<SparseBooleanMatrix>();
                    current_test.Add(users[index], items[index]);
                    var current_result = Items.Evaluate(recommender, current_test, training, current_test.AllUsers, candidate_items, candidate_item_mode);

                    if (current_result["num_users"] == 1)
                        if (results_by_user.ContainsKey(users[index]))
                        {
                            foreach (string measure in Items.Measures)
                                results_by_user[users[index]][measure] += current_result[measure];
                            results_by_user[users[index]]["num_items"]++;
                            num_lists++;
                        }
                        else
                        {
                            results_by_user[users[index]] = current_result;
                            results_by_user[users[index]]["num_items"] = 1;
                            results_by_user[users[index]].Remove("num_users");
                        }
                }

                // update recommender
                incremental_recommender.AddFeedback(users[index], items[index]);
            }

            var results = new ItemRecommendationEvaluationResults();

            foreach (int u in results_by_user.Keys)
                foreach (string measure in Items.Measures)
                    results[measure] += results_by_user[u][measure] / results_by_user[u]["num_items"];

            foreach (string measure in Items.Measures)
                results[measure] /= results_by_user.Count;

            results["num_users"] = results_by_user.Count;
            results["num_items"] = candidate_items.Count;
            results["num_lists"] = num_lists;

            return results;
        }
Exemple #42
0
    static void LoadData()
    {
        TimeSpan loading_time = Utils.MeasureTime(delegate() {
            // training data
            training_data = ItemRecommendation.Read(Path.Combine(data_dir, training_file), user_mapping, item_mapping);

            // relevant users and items
            if (relevant_users_file != null)
            {
                relevant_users = new HashSet <int>(user_mapping.ToInternalID(Utils.ReadIntegers(Path.Combine(data_dir, relevant_users_file))));
            }
            else
            {
                relevant_users = training_data.AllUsers;
            }
            if (relevant_items_file != null)
            {
                relevant_items = new HashSet <int>(item_mapping.ToInternalID(Utils.ReadIntegers(Path.Combine(data_dir, relevant_items_file))));
            }
            else
            {
                relevant_items = training_data.AllItems;
            }

            if (!(recommender is MyMediaLite.ItemRecommendation.Random))
            {
                ((ItemRecommender)recommender).Feedback = training_data;
            }

            // user attributes
            if (recommender is IUserAttributeAwareRecommender)
            {
                if (user_attributes_file == null)
                {
                    Usage("Recommender expects --user-attributes=FILE.");
                }
                else
                {
                    ((IUserAttributeAwareRecommender)recommender).UserAttributes = AttributeData.Read(Path.Combine(data_dir, user_attributes_file), user_mapping);
                }
            }

            // item attributes
            if (recommender is IItemAttributeAwareRecommender)
            {
                if (item_attributes_file == null)
                {
                    Usage("Recommender expects --item-attributes=FILE.");
                }
                else
                {
                    ((IItemAttributeAwareRecommender)recommender).ItemAttributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping);
                }
            }
            if (filtered_eval)
            {
                if (item_attributes_file == null)
                {
                    Usage("--filtered-evaluation expects --item-attributes=FILE.");
                }
                else
                {
                    item_attributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping);
                }
            }

            // user relation
            if (recommender is IUserRelationAwareRecommender)
            {
                if (user_relations_file == null)
                {
                    Usage("Recommender expects --user-relation=FILE.");
                }
                else
                {
                    ((IUserRelationAwareRecommender)recommender).UserRelation = RelationData.Read(Path.Combine(data_dir, user_relations_file), user_mapping);
                    Console.WriteLine("relation over {0} users", ((IUserRelationAwareRecommender)recommender).NumUsers);                     // TODO move to DisplayDataStats
                }
            }

            // item relation
            if (recommender is IItemRelationAwareRecommender)
            {
                if (user_relations_file == null)
                {
                    Usage("Recommender expects --item-relation=FILE.");
                }
                else
                {
                    ((IItemRelationAwareRecommender)recommender).ItemRelation = RelationData.Read(Path.Combine(data_dir, item_relations_file), item_mapping);
                    Console.WriteLine("relation over {0} items", ((IItemRelationAwareRecommender)recommender).NumItems);                     // TODO move to DisplayDataStats
                }
            }

            // test data
            if (test_ratio == 0)
            {
                if (test_file != null)
                {
                    test_data = ItemRecommendation.Read(Path.Combine(data_dir, test_file), user_mapping, item_mapping);
                }
            }
            else
            {
                var split     = new PosOnlyFeedbackSimpleSplit <PosOnlyFeedback <SparseBooleanMatrix> >(training_data, test_ratio);
                training_data = split.Train[0];
                test_data     = split.Test[0];
            }
        });

        Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "loading_time {0,0:0.##}", loading_time.TotalSeconds));
    }
Exemple #43
0
    static void LoadData()
    {
        TimeSpan loading_time = Utils.MeasureTime(delegate() {
            // training data
            training_data = ItemRecommendation.Read(Path.Combine(data_dir, training_file), user_mapping, item_mapping);

            // relevant users and items
            if (relevant_users_file != null)
                relevant_users = new HashSet<int>(user_mapping.ToInternalID(Utils.ReadIntegers(Path.Combine(data_dir, relevant_users_file))));
            else
                relevant_users = training_data.AllUsers;
            if (relevant_items_file != null)
                relevant_items = new HashSet<int>(item_mapping.ToInternalID(Utils.ReadIntegers(Path.Combine(data_dir, relevant_items_file))));
            else
                relevant_items = training_data.AllItems;

            if (! (recommender is MyMediaLite.ItemRecommendation.Random))
                ((ItemRecommender)recommender).Feedback = training_data;

            // user attributes
            if (recommender is IUserAttributeAwareRecommender)
            {
                if (user_attributes_file == null)
                    Usage("Recommender expects --user-attributes=FILE.");
                else
                    ((IUserAttributeAwareRecommender)recommender).UserAttributes = AttributeData.Read(Path.Combine(data_dir, user_attributes_file), user_mapping);
            }

            // item attributes
            if (recommender is IItemAttributeAwareRecommender)
            {
                if (item_attributes_file == null)
                    Usage("Recommender expects --item-attributes=FILE.");
                else
                    ((IItemAttributeAwareRecommender)recommender).ItemAttributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping);
            }
            if (filtered_eval)
            {
                if (item_attributes_file == null)
                    Usage("--filtered-evaluation expects --item-attributes=FILE.");
                else
                    item_attributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping);
            }

            // user relation
            if (recommender is IUserRelationAwareRecommender)
                if (user_relations_file == null)
                {
                    Usage("Recommender expects --user-relation=FILE.");
                }
                else
                {
                    ((IUserRelationAwareRecommender)recommender).UserRelation = RelationData.Read(Path.Combine(data_dir, user_relations_file), user_mapping);
                    Console.WriteLine("relation over {0} users", ((IUserRelationAwareRecommender)recommender).NumUsers); // TODO move to DisplayDataStats
                }

            // item relation
            if (recommender is IItemRelationAwareRecommender)
                if (user_relations_file == null)
                {
                    Usage("Recommender expects --item-relation=FILE.");
                }
                else
                {
                    ((IItemRelationAwareRecommender)recommender).ItemRelation = RelationData.Read(Path.Combine(data_dir, item_relations_file), item_mapping);
                    Console.WriteLine("relation over {0} items", ((IItemRelationAwareRecommender)recommender).NumItems); // TODO move to DisplayDataStats
                }

            // test data
            if (test_ratio == 0)
            {
                if (test_file != null)
                    test_data = ItemRecommendation.Read(Path.Combine(data_dir, test_file), user_mapping, item_mapping);
            }
            else
            {
                var split = new PosOnlyFeedbackSimpleSplit<PosOnlyFeedback<SparseBooleanMatrix>>(training_data, test_ratio);
                training_data = split.Train[0];
                test_data     = split.Test[0];
            }
        });
        Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "loading_time {0,0:0.##}", loading_time.TotalSeconds));
    }
    public static void Main(string[] args)
    {
        AppDomain.CurrentDomain.UnhandledException += new UnhandledExceptionEventHandler(Handlers.UnhandledExceptionHandler);

        // check number of command line parameters
        if (args.Length < 4)
        {
            Usage("Not enough arguments.");
        }

        // read command line parameters
        RecommenderParameters parameters = null;

        try     { parameters = new RecommenderParameters(args, 4); }
        catch (ArgumentException e)     { Usage(e.Message); }

        // other parameters
        string data_dir             = parameters.GetRemoveString("data_dir");
        string relevant_items_file  = parameters.GetRemoveString("relevant_items");
        string item_attributes_file = parameters.GetRemoveString("item_attributes");
        string user_attributes_file = parameters.GetRemoveString("user_attributes");
        //string save_mapping_file    = parameters.GetRemoveString( "save_model");
        int  random_seed = parameters.GetRemoveInt32("random_seed", -1);
        bool no_eval     = parameters.GetRemoveBool("no_eval", false);
        bool compute_fit = parameters.GetRemoveBool("compute_fit", false);

        if (random_seed != -1)
        {
            MyMediaLite.Util.Random.InitInstance(random_seed);
        }

        // main data files and method
        string trainfile       = args[0].Equals("-") ? "-" : Path.Combine(data_dir, args[0]);
        string testfile        = args[1].Equals("-") ? "-" : Path.Combine(data_dir, args[1]);
        string load_model_file = args[2];
        string method          = args[3];

        // set correct recommender
        switch (method)
        {
        case "BPR-MF-ItemMapping":
            recommender = Recommender.Configure(bprmf_map, parameters, Usage);
            break;

        case "BPR-MF-ItemMapping-Optimal":
            recommender = Recommender.Configure(bprmf_map_bpr, parameters, Usage);
            break;

        case "BPR-MF-ItemMapping-Complex":
            recommender = Recommender.Configure(bprmf_map_com, parameters, Usage);
            break;

        case "BPR-MF-ItemMapping-kNN":
            recommender = Recommender.Configure(bprmf_map_knn, parameters, Usage);
            break;

        case "BPR-MF-ItemMapping-SVR":
            recommender = Recommender.Configure(bprmf_map_svr, parameters, Usage);
            break;

        case "BPR-MF-UserMapping":
            recommender = Recommender.Configure(bprmf_user_map, parameters, Usage);
            break;

        case "BPR-MF-UserMapping-Optimal":
            recommender = Recommender.Configure(bprmf_user_map_bpr, parameters, Usage);
            break;

        default:
            Usage(string.Format("Unknown method: '{0}'", method));
            break;
        }

        if (parameters.CheckForLeftovers())
        {
            Usage(-1);
        }

        // ID mapping objects
        var user_mapping = new EntityMapping();
        var item_mapping = new EntityMapping();

        // training data
        training_data        = ItemRecommendation.Read(Path.Combine(data_dir, trainfile), user_mapping, item_mapping);
        recommender.Feedback = training_data;

        // relevant items
        if (!relevant_items_file.Equals(string.Empty))
        {
            relevant_items = new HashSet <int>(item_mapping.ToInternalID(Utils.ReadIntegers(Path.Combine(data_dir, relevant_items_file))));
        }
        else
        {
            relevant_items = training_data.AllItems;
        }

        // user attributes
        if (recommender is IUserAttributeAwareRecommender)
        {
            if (user_attributes_file.Equals(string.Empty))
            {
                Usage("Recommender expects user_attributes=FILE.");
            }
            else
            {
                ((IUserAttributeAwareRecommender)recommender).UserAttributes = AttributeData.Read(Path.Combine(data_dir, user_attributes_file), user_mapping);
            }
        }

        // item attributes
        if (recommender is IItemAttributeAwareRecommender)
        {
            if (item_attributes_file.Equals(string.Empty))
            {
                Usage("Recommender expects item_attributes=FILE.");
            }
            else
            {
                ((IItemAttributeAwareRecommender)recommender).ItemAttributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping);
            }
        }

        // test data
        test_data = ItemRecommendation.Read(Path.Combine(data_dir, testfile), user_mapping, item_mapping);

        TimeSpan seconds;

        Recommender.LoadModel(recommender, load_model_file);

        // set the maximum user and item IDs in the recommender - this is important for the cold start use case
        recommender.MaxUserID = user_mapping.InternalIDs.Max();
        recommender.MaxItemID = item_mapping.InternalIDs.Max();

        DisplayDataStats();

        Console.Write(recommender.ToString() + " ");

        if (compute_fit)
        {
            seconds = Utils.MeasureTime(delegate() {
                int num_iter = recommender.NumIterMapping;
                recommender.NumIterMapping = 0;
                recommender.LearnAttributeToFactorMapping();
                Console.Error.WriteLine();
                Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "iteration {0} fit {1}", -1, recommender.ComputeFit()));

                recommender.NumIterMapping = 1;
                for (int i = 0; i < num_iter; i++, i++)
                {
                    recommender.IterateMapping();
                    Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "iteration {0} fit {1}", i, recommender.ComputeFit()));
                }
                recommender.NumIterMapping = num_iter;                 // restore
            });
        }
        else
        {
            seconds = Utils.MeasureTime(delegate() {
                recommender.LearnAttributeToFactorMapping();
            });
        }
        Console.Write("mapping_time " + seconds + " ");

        if (!no_eval)
        {
            seconds = EvaluateRecommender(recommender, test_data, training_data);
        }
        Console.WriteLine();
    }
    static TimeSpan EvaluateRecommender(BPRMF_Mapping recommender, IPosOnlyFeedback test_data, IPosOnlyFeedback train_data)
    {
        Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "fit {0}", recommender.ComputeFit()));

        TimeSpan seconds = Utils.MeasureTime( delegate()
            {
                var result = Items.Evaluate(
                                recommender,
                                test_data,
                                train_data,
                                test_data.AllUsers,
                                relevant_items
                );
                DisplayResults(result);
            } );
        Console.Write(" testing " + seconds);

        return seconds;
    }
Exemple #46
0
		/// <summary>Display data statistics for item recommendation datasets</summary>
		/// <param name="training_data">the training dataset</param>
		/// <param name="test_data">the test dataset</param>
		/// <param name="user_attributes">the user attributes</param>
		/// <param name="item_attributes">the item attributes</param>
		public static string Statistics(
			this IPosOnlyFeedback training_data, IPosOnlyFeedback test_data = null,
			IBooleanMatrix user_attributes = null, IBooleanMatrix item_attributes = null)
		{
			// training data stats
			int num_users = training_data.AllUsers.Count;
			int num_items = training_data.AllItems.Count;
			long matrix_size = (long) num_users * num_items;
			long empty_size  = (long) matrix_size - training_data.Count;
			double sparsity = (double) 100L * empty_size / matrix_size;
			string s = string.Format(CultureInfo.InvariantCulture, "training data: {0} users, {1} items, {2} events, sparsity {3,0:0.#####}\n", num_users, num_items, training_data.Count, sparsity);

			// test data stats
			if (test_data != null)
			{
				num_users = test_data.AllUsers.Count;
				num_items = test_data.AllItems.Count;
				matrix_size = (long) num_users * num_items;
				empty_size  = (long) matrix_size - test_data.Count;
				sparsity = (double) 100L * empty_size / matrix_size; // TODO depends on the eval scheme whether this is correct
				s += string.Format(CultureInfo.InvariantCulture, "test data:     {0} users, {1} items, {2} events, sparsity {3,0:0.#####}\n", num_users, num_items, test_data.Count, sparsity);
			}

			return s + Statistics(user_attributes, item_attributes);
		}
Exemple #47
0
		/// <summary>Evaluation for rankings of filtered items</summary>
		/// <remarks>
		/// </remarks>
		/// <param name="recommender">item recommender</param>
		/// <param name="test">test cases</param>
		/// <param name="train">training data</param>
		/// <param name="item_attributes">the item attributes to be used for filtering</param>
		/// <param name="relevant_users">a collection of integers with all relevant users</param>
		/// <param name="relevant_items">a collection of integers with all relevant items</param>
		/// <returns>a dictionary containing the evaluation results</returns>
		static public Dictionary<string, double> Evaluate(
			IItemRecommender recommender,
			IPosOnlyFeedback test,
			IPosOnlyFeedback train,
		    SparseBooleanMatrix item_attributes,
		    ICollection<int> relevant_users,
			ICollection<int> relevant_items)
		{
			if (train.Overlap(test) > 0)
				Console.Error.WriteLine("WARNING: Overlapping train and test data");

			SparseBooleanMatrix items_by_attribute = (SparseBooleanMatrix) item_attributes.Transpose();

			// compute evaluation measures
			double auc_sum     = 0;
			double map_sum     = 0;
			double prec_5_sum  = 0;
			double prec_10_sum = 0;
			double prec_15_sum = 0;
			double ndcg_sum    = 0;

			// for counting the users and the evaluation lists
			int num_lists = 0;
			int num_users = 0;
			int last_user_id = -1;

			foreach (int user_id in relevant_users)
			{
				var filtered_items = GetFilteredItems(user_id, test, item_attributes);

				foreach (int attribute_id in filtered_items.Keys)
				{
					// TODO optimize this a bit, currently it is quite naive
					var relevant_filtered_items = new HashSet<int>(items_by_attribute[attribute_id]);
					relevant_filtered_items.IntersectWith(relevant_items);

					var correct_items = new HashSet<int>(filtered_items[attribute_id]);
					correct_items.IntersectWith(relevant_filtered_items);

					// the number of items that are really relevant for this user
					var relevant_items_in_train = new HashSet<int>(train.UserMatrix[user_id]);
					relevant_items_in_train.IntersectWith(relevant_filtered_items);
					int num_eval_items = relevant_filtered_items.Count - relevant_items_in_train.Count();

					// skip all users that have 0 or #relevant_filtered_items test items
					if (correct_items.Count == 0)
						continue;
					if (num_eval_items - correct_items.Count == 0)
						continue;

					// counting stats
					num_lists++;
					if (last_user_id != user_id)
					{
						last_user_id = user_id;
						num_users++;
					}

					// evaluation
					int[] prediction = Prediction.PredictItems(recommender, user_id, relevant_filtered_items);

					auc_sum     += Items.AUC(prediction, correct_items, train.UserMatrix[user_id]);
					map_sum     += Items.MAP(prediction, correct_items, train.UserMatrix[user_id]);
					ndcg_sum    += Items.NDCG(prediction, correct_items, train.UserMatrix[user_id]);
					prec_5_sum  += Items.PrecisionAt(prediction, correct_items, train.UserMatrix[user_id],  5);
					prec_10_sum += Items.PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 10);
					prec_15_sum += Items.PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 15);

					if (prediction.Length != relevant_filtered_items.Count)
						throw new Exception("Not all items have been ranked.");

					if (num_lists % 1000 == 0)
						Console.Error.Write(".");
					if (num_lists % 20000 == 0)
						Console.Error.WriteLine();
				}
			}

			var result = new Dictionary<string, double>();
			result.Add("AUC",     auc_sum / num_lists);
			result.Add("MAP",     map_sum / num_lists);
			result.Add("NDCG",    ndcg_sum / num_lists);
			result.Add("prec@5",  prec_5_sum / num_lists);
			result.Add("prec@10", prec_10_sum / num_lists);
			result.Add("prec@15", prec_15_sum / num_lists);
			result.Add("num_users", num_users);
			result.Add("num_lists", num_lists);
			result.Add("num_items", relevant_items.Count);

			return result;
		}