예제 #1
0
        /// <summary>Evaluate on the folds of a dataset split</summary>
        /// <param name="recommender">an item recommender</param>
        /// <param name="split">a dataset split</param>
        /// <param name="test_users">a collection of integers with all test users</param>
        /// <param name="candidate_items">a collection of integers with all candidate items</param>
        /// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
        /// <param name="compute_fit">if set to true measure fit on the training data as well</param>
        /// <param name="show_results">set to true to print results to STDERR</param>
        /// <returns>a dictionary containing the average results over the different folds of the split</returns>
        public static ItemRecommendationEvaluationResults DoCrossValidation(
			this IRecommender recommender,
			ISplit<IPosOnlyFeedback> split,
			IList<int> test_users,
			IList<int> candidate_items,
			CandidateItems candidate_item_mode = CandidateItems.OVERLAP,
			bool compute_fit = false,
			bool show_results = false)
        {
            var avg_results = new ItemRecommendationEvaluationResults();

            if (!(recommender is ItemRecommender))
                throw new ArgumentException("recommender must be of type ItemRecommender");

            Parallel.For(0, (int) split.NumberOfFolds, fold =>
            {
                try
                {
                    var split_recommender = (ItemRecommender) recommender.Clone(); // avoid changes in recommender
                    split_recommender.Feedback = split.Train[fold];
                    split_recommender.Train();
                    var fold_results = Items.Evaluate(split_recommender, split.Test[fold], split.Train[fold], test_users, candidate_items, candidate_item_mode);
                    if (compute_fit)
                        fold_results["fit"] = (float) split_recommender.ComputeFit();

                    // thread-safe stats
                    lock (avg_results)
                        foreach (var key in fold_results.Keys)
                            if (avg_results.ContainsKey(key))
                                avg_results[key] += fold_results[key];
                            else
                                avg_results[key] = fold_results[key];

                    if (show_results)
                        Console.Error.WriteLine("fold {0} {1}", fold, fold_results);
                }
                catch (Exception e)
                {
                    Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace);
                    throw;
                }
            });

            foreach (var key in Items.Measures)
                avg_results[key] /= split.NumberOfFolds;
            avg_results["num_users"] /= split.NumberOfFolds;
            avg_results["num_items"] /= split.NumberOfFolds;
            if (compute_fit)
                avg_results["fit"] /= split.NumberOfFolds;

            return avg_results;
        }
		/// <summary>Evaluate on the folds of a dataset split</summary>
		/// <param name="recommender">an item recommender</param>
		/// <param name="split">a dataset split</param>
		/// <param name="candidate_items">a collection of integers with all candidate items</param>
		/// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
		/// <param name="compute_fit">if set to true measure fit on the training data as well</param>
		/// <param name="show_results">set to true to print results to STDERR</param>
		/// <returns>a dictionary containing the average results over the different folds of the split</returns>
		static public EvaluationResults DoRatingBasedRankingCrossValidation(
			this RatingPredictor recommender,
			ISplit<IRatings> split,
			IList<int> candidate_items,
			CandidateItems candidate_item_mode = CandidateItems.OVERLAP,
			bool compute_fit = false,
			bool show_results = false)
		{
			var avg_results = new ItemRecommendationEvaluationResults();

			Parallel.For(0, (int) split.NumberOfFolds, fold =>
			{
				try
				{
					var split_recommender = (RatingPredictor) recommender.Clone(); // avoid changes in recommender
					split_recommender.Ratings = split.Train[fold];
					split_recommender.Train();

					var test_data_posonly = new PosOnlyFeedback<SparseBooleanMatrix>(split.Test[fold]);
					var training_data_posonly = new PosOnlyFeedback<SparseBooleanMatrix>(split.Train[fold]);
					IList<int> test_users = test_data_posonly.AllUsers;
					var fold_results = Items.Evaluate(split_recommender, test_data_posonly, training_data_posonly, test_users, candidate_items, candidate_item_mode);
					if (compute_fit)
						fold_results["fit"] = (float) split_recommender.ComputeFit();

					// thread-safe stats
					lock (avg_results)
						foreach (var key in fold_results.Keys)
							if (avg_results.ContainsKey(key))
								avg_results[key] += fold_results[key];
							else
								avg_results[key] = fold_results[key];

					if (show_results)
						Console.Error.WriteLine("fold {0} {1}", fold, fold_results);
				}
				catch (Exception e)
				{
					Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace);
					throw;
				}
			});

			foreach (var key in Items.Measures)
				avg_results[key] /= split.NumberOfFolds;
			avg_results["num_users"] /= split.NumberOfFolds;
			avg_results["num_items"] /= split.NumberOfFolds;

			return avg_results;
		}
예제 #3
0
		/// <summary>Online evaluation for rankings of items</summary>
		/// <remarks>
		/// The evaluation protocol works as follows:
		/// For every test user, evaluate on the test items, and then add the those test items to the training set and perform an incremental update.
		/// The sequence of users is random.
		/// </remarks>
		/// <param name="recommender">the item recommender to be evaluated</param>
		/// <param name="test">test cases</param>
		/// <param name="training">training data (must be connected to the recommender's training data)</param>
		/// <param name="test_users">a list of all test user IDs</param>
		/// <param name="candidate_items">a list of all candidate item IDs</param>
		/// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
		/// <returns>a dictionary containing the evaluation results (averaged by user)</returns>
		static public ItemRecommendationEvaluationResults EvaluateOnline(
			this IRecommender recommender,
			IPosOnlyFeedback test, IPosOnlyFeedback training,
			IList<int> test_users, IList<int> candidate_items,
			CandidateItems candidate_item_mode)
		{
			var incremental_recommender = recommender as IIncrementalItemRecommender;
			if (incremental_recommender == null)
				throw new ArgumentException("recommender must be of type IIncrementalItemRecommender");

			candidate_items = Items.Candidates(candidate_items, candidate_item_mode, test, training);

			test_users.Shuffle();
			var results_by_user = new Dictionary<int, ItemRecommendationEvaluationResults>();
			foreach (int user_id in test_users)
			{
				if (candidate_items.Intersect(test.ByUser[user_id]).Count() == 0)
					continue;

				// prepare data
				var current_test_data = new PosOnlyFeedback<SparseBooleanMatrix>();
				foreach (int index in test.ByUser[user_id])
					current_test_data.Add(user_id, test.Items[index]);
				// evaluate user
				var current_result = Items.Evaluate(recommender, current_test_data, training, current_test_data.AllUsers, candidate_items, CandidateItems.EXPLICIT);
				results_by_user[user_id] = current_result;

				// update recommender
				var tuples = new List<Tuple<int, int>>();
				foreach (int index in test.ByUser[user_id])
					tuples.Add(Tuple.Create(user_id, test.Items[index]));
				incremental_recommender.AddFeedback(tuples);
				// TODO candidate_items should be updated properly
			}

			var results = new ItemRecommendationEvaluationResults();

			foreach (int u in results_by_user.Keys)
				foreach (string measure in Items.Measures)
					results[measure] += results_by_user[u][measure];

			foreach (string measure in Items.Measures)
				results[measure] /= results_by_user.Count;

			results["num_users"] = results_by_user.Count;
			results["num_items"] = candidate_items.Count;
			results["num_lists"] = results_by_user.Count;

			return results;
		}
예제 #4
0
    private static void writeAvgResults(List <MyMediaLite.Eval.ItemRecommendationEvaluationResults> result_list)
    {
        MyMediaLite.Eval.ItemRecommendationEvaluationResults avg_results = new MyMediaLite.Eval.ItemRecommendationEvaluationResults();
        int j = 0;

        foreach (MyMediaLite.Eval.ItemRecommendationEvaluationResults result in result_list)
        {
            foreach (var key in result.Keys)
            {
                if (!avg_results.ContainsKey(key))
                {
                    avg_results.Add(key, 0f);
                }
                avg_results[key] += result[key];
            }
            j += 1;
        }
        Console.WriteLine("Avg results after " + j.ToString() + " iterations.");
        foreach (var key in avg_results.Keys)
        {
            Console.WriteLine("{0}={1}", key, avg_results [key] / j);
        }
    }
예제 #5
0
        /// <summary>Online evaluation for rankings of items</summary>
        /// <remarks>
        /// The evaluation protocol works as follows:
        /// For every test user, evaluate on the test items, and then add the those test items to the training set and perform an incremental update.
        /// The sequence of users is random.
        /// </remarks>
        /// <param name="recommender">the item recommender to be evaluated</param>
        /// <param name="test">test cases</param>
        /// <param name="training">training data (must be connected to the recommender's training data)</param>
        /// <param name="test_users">a list of all test user IDs</param>
        /// <param name="candidate_items">a list of all candidate item IDs</param>
        /// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
        /// <returns>a dictionary containing the evaluation results (averaged by user)</returns>
        static public ItemRecommendationEvaluationResults EvaluateOnline(
            this IRecommender recommender,
            IPosOnlyFeedback test, IPosOnlyFeedback training,
            IList <int> test_users, IList <int> candidate_items,
            CandidateItems candidate_item_mode)
        {
            var incremental_recommender = recommender as IIncrementalItemRecommender;

            if (incremental_recommender == null)
            {
                throw new ArgumentException("recommender must be of type IIncrementalItemRecommender");
            }

            candidate_items = Items.Candidates(candidate_items, candidate_item_mode, test, training);

            test_users.Shuffle();
            var results_by_user = new Dictionary <int, ItemRecommendationEvaluationResults>();

            foreach (int user_id in test_users)
            {
                if (candidate_items.Intersect(test.ByUser[user_id]).Count() == 0)
                {
                    continue;
                }

                // prepare data
                var current_test_data = new PosOnlyFeedback <SparseBooleanMatrix>();
                foreach (int index in test.ByUser[user_id])
                {
                    current_test_data.Add(user_id, test.Items[index]);
                }
                // evaluate user
                var current_result = Items.Evaluate(recommender, current_test_data, training, current_test_data.AllUsers, candidate_items, CandidateItems.EXPLICIT);
                results_by_user[user_id] = current_result;

                // update recommender
                var tuples = new List <Tuple <int, int> >();
                foreach (int index in test.ByUser[user_id])
                {
                    tuples.Add(Tuple.Create(user_id, test.Items[index]));
                }
                incremental_recommender.AddFeedback(tuples);
                // TODO candidate_items should be updated properly
            }

            var results = new ItemRecommendationEvaluationResults();

            foreach (int u in results_by_user.Keys)
            {
                foreach (string measure in Items.Measures)
                {
                    results[measure] += results_by_user[u][measure];
                }
            }

            foreach (string measure in Items.Measures)
            {
                results[measure] /= results_by_user.Count;
            }

            results["num_users"] = results_by_user.Count;
            results["num_items"] = candidate_items.Count;
            results["num_lists"] = results_by_user.Count;

            return(results);
        }
예제 #6
0
        /// <summary>Evaluation for rankings of items</summary>
        /// <remarks>
        /// User-item combinations that appear in both sets are ignored for the test set, and thus in the evaluation,
        /// except the boolean argument repeated_events is set.
        ///
        /// The evaluation measures are listed in the Measures property.
        /// Additionally, 'num_users' and 'num_items' report the number of users that were used to compute the results
        /// and the number of items that were taken into account.
        ///
        /// Literature:
        /// <list type="bullet">
        ///   <item><description>
        ///   C. Manning, P. Raghavan, H. Schütze: Introduction to Information Retrieval, Cambridge University Press, 2008
        ///   </description></item>
        /// </list>
        ///
        /// On multi-core/multi-processor systems, the routine tries to use as many cores as possible,
        /// which should to an almost linear speed-up.
        /// </remarks>
        /// <param name="recommender">item recommender</param>
        /// <param name="test">test cases</param>
        /// <param name="training">training data</param>
        /// <param name="test_users">a list of integers with all test users; if null, use all users in the test cases</param>
        /// <param name="candidate_items">a list of integers with all candidate items</param>
        /// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
        /// <param name="repeated_events">allow repeated events in the evaluation (i.e. items accessed by a user before may be in the recommended list)</param>
        /// <param name="n">length of the item list to evaluate -- if set to -1 (default), use the complete list, otherwise compute evaluation measures on the top n items</param>
        /// <returns>a dictionary containing the evaluation results (default is false)</returns>
        public static ItemRecommendationEvaluationResults Evaluate(
			this IRecommender recommender,
			IPosOnlyFeedback test,
			IPosOnlyFeedback training,
			IList<int> test_users = null,
			IList<int> candidate_items = null,
			CandidateItems candidate_item_mode = CandidateItems.OVERLAP,
			RepeatedEvents repeated_events = RepeatedEvents.No,
			int n = -1)
        {
            if (test_users == null)
                test_users = test.AllUsers;
            candidate_items = Candidates(candidate_items, candidate_item_mode, test, training);

            var result = new ItemRecommendationEvaluationResults();

            // make sure that the user matrix is completely initialized before entering parallel code
            var training_user_matrix = training.UserMatrix;
            var test_user_matrix     = test.UserMatrix;

            int num_users = 0;
            Parallel.ForEach(test_users, user_id => {
                try
                {
                    var correct_items = new HashSet<int>(test_user_matrix[user_id]);
                    correct_items.IntersectWith(candidate_items);
                    if (correct_items.Count == 0)
                        return;

                    var ignore_items_for_this_user = new HashSet<int>(
                        repeated_events == RepeatedEvents.Yes || training_user_matrix[user_id] == null ? new int[0] : training_user_matrix[user_id]
                    );

                    ignore_items_for_this_user.IntersectWith(candidate_items);
                    int num_candidates_for_this_user = candidate_items.Count - ignore_items_for_this_user.Count;
                    if (correct_items.Count == num_candidates_for_this_user)
                        return;

                    var prediction = recommender.Recommend(user_id, candidate_items:candidate_items, n:n, ignore_items:ignore_items_for_this_user);
                    var prediction_list = (from t in prediction select t.Item1).ToArray();

                    int num_dropped_items = num_candidates_for_this_user - prediction.Count;
                    double auc  = AUC.Compute(prediction_list, correct_items, num_dropped_items);
                    double map  = PrecisionAndRecall.AP(prediction_list, correct_items);
                    double ndcg = NDCG.Compute(prediction_list, correct_items);
                    double rr   = ReciprocalRank.Compute(prediction_list, correct_items);
                    var positions = new int[] { 5, 10 };
                    var prec   = PrecisionAndRecall.PrecisionAt(prediction_list, correct_items, positions);
                    var recall = PrecisionAndRecall.RecallAt(prediction_list, correct_items, positions);

                    // thread-safe incrementing
                    lock (result)
                    {
                        num_users++;
                        result["AUC"]       += (float) auc;
                        result["MAP"]       += (float) map;
                        result["NDCG"]      += (float) ndcg;
                        result["MRR"]       += (float) rr;
                        result["prec@5"]    += (float) prec[5];
                        result["prec@10"]   += (float) prec[10];
                        result["recall@5"]  += (float) recall[5];
                        result["recall@10"] += (float) recall[10];
                    }

                    if (num_users % 1000 == 0)
                        Console.Error.Write(".");
                    if (num_users % 60000 == 0)
                        Console.Error.WriteLine();
                }
                catch (Exception e)
                {
                    Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace);
                    throw;
                }
            });

            foreach (string measure in Measures)
                result[measure] /= num_users;
            result["num_users"] = num_users;
            result["num_lists"] = num_users;
            result["num_items"] = candidate_items.Count;

            return result;
        }
예제 #7
0
        /// <summary>Evaluate an iterative recommender on the folds of a dataset split, display results on STDOUT</summary>
        /// <param name="recommender">an item recommender</param>
        /// <param name="split">a positive-only feedback dataset split</param>
        /// <param name="test_users">a collection of integers with all test users</param>
        /// <param name="candidate_items">a collection of integers with all candidate items</param>
        /// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
        /// <param name="repeated_events">allow repeated events in the evaluation (i.e. items accessed by a user before may be in the recommended list)</param>
        /// <param name="max_iter">the maximum number of iterations</param>
        /// <param name="find_iter">the report interval</param>
        /// <param name="show_fold_results">if set to true to print per-fold results to STDERR</param>
        public static void DoIterativeCrossValidation(
			this IRecommender recommender,
			ISplit<IPosOnlyFeedback> split,
			IList<int> test_users,
			IList<int> candidate_items,
			CandidateItems candidate_item_mode,
			RepeatedEvents repeated_events,
			uint max_iter,
			uint find_iter = 1,
			bool show_fold_results = false)
        {
            if (!(recommender is IIterativeModel))
                throw new ArgumentException("recommender must be of type IIterativeModel");
            if (!(recommender is ItemRecommender))
                throw new ArgumentException("recommender must be of type ItemRecommender");

            var split_recommenders     = new ItemRecommender[split.NumberOfFolds];
            var iterative_recommenders = new IIterativeModel[split.NumberOfFolds];
            var fold_results = new ItemRecommendationEvaluationResults[split.NumberOfFolds];

            // initial training and evaluation
            Parallel.For(0, (int) split.NumberOfFolds, i =>
            {
                try
                {
                    split_recommenders[i] = (ItemRecommender) recommender.Clone(); // to avoid changes in recommender
                    split_recommenders[i].Feedback = split.Train[i];
                    split_recommenders[i].Train();
                    iterative_recommenders[i] = (IIterativeModel) split_recommenders[i];
                    fold_results[i] = Items.Evaluate(split_recommenders[i], split.Test[i], split.Train[i], test_users, candidate_items, candidate_item_mode, repeated_events);
                    if (show_fold_results)
                        Console.WriteLine("fold {0} {1} iteration {2}", i, fold_results, iterative_recommenders[i].NumIter);
                }
                catch (Exception e)
                {
                    Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace);
                    throw;
                }
            });
            Console.WriteLine("{0} iteration {1}", new ItemRecommendationEvaluationResults(fold_results), iterative_recommenders[0].NumIter);

            // iterative training and evaluation
            for (int it = (int) iterative_recommenders[0].NumIter + 1; it <= max_iter; it++)
            {
                Parallel.For(0, (int) split.NumberOfFolds, i =>
                {
                    try
                    {
                        iterative_recommenders[i].Iterate();

                        if (it % find_iter == 0)
                        {
                            fold_results[i] = Items.Evaluate(split_recommenders[i], split.Test[i], split.Train[i], test_users, candidate_items, candidate_item_mode, repeated_events);
                            if (show_fold_results)
                                Console.WriteLine("fold {0} {1} iteration {2}", i, fold_results, it);
                        }
                    }
                    catch (Exception e)
                    {
                        Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace);
                        throw;
                    }
                });
                Console.WriteLine("{0} iteration {1}", new ItemRecommendationEvaluationResults(fold_results), it);
            }
        }
예제 #8
0
        /// <summary>Evaluation for rankings of items</summary>
        /// <remarks>
        /// User-item combinations that appear in both sets are ignored for the test set, and thus in the evaluation,
        /// except the boolean argument repeated_events is set.
        ///
        /// The evaluation measures are listed in the Measures property.
        /// Additionally, 'num_users' and 'num_items' report the number of users that were used to compute the results
        /// and the number of items that were taken into account.
        ///
        /// Literature:
        /// <list type="bullet">
        ///   <item><description>
        ///   C. Manning, P. Raghavan, H. Schütze: Introduction to Information Retrieval, Cambridge University Press, 2008
        ///   </description></item>
        /// </list>
        ///
        /// On multi-core/multi-processor systems, the routine tries to use as many cores as possible,
        /// which should to an almost linear speed-up.
        /// </remarks>
        /// <param name="recommender">item recommender</param>
        /// <param name="test">test cases</param>
        /// <param name="training">training data</param>
        /// <param name="test_users">a list of integers with all test users; if null, use all users in the test cases</param>
        /// <param name="candidate_items">a list of integers with all candidate items</param>
        /// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
        /// <param name="repeated_events">allow repeated events in the evaluation (i.e. items accessed by a user before may be in the recommended list)</param>
        /// <returns>a dictionary containing the evaluation results (default is false)</returns>
        public static ItemRecommendationEvaluationResults Evaluate(
            this IRecommender recommender,
            IPosOnlyFeedback test,
            IPosOnlyFeedback training,
            IList<int> test_users = null,
            IList<int> candidate_items = null,
            CandidateItems candidate_item_mode = CandidateItems.OVERLAP,
            bool repeated_events = false)
        {
            switch (candidate_item_mode)
            {
                case CandidateItems.TRAINING: candidate_items = training.AllItems; break;
                case CandidateItems.TEST: candidate_items = test.AllItems; break;
                case CandidateItems.OVERLAP: candidate_items = new List<int>(test.AllItems.Intersect(training.AllItems)); break;
                case CandidateItems.UNION: candidate_items = new List<int>(test.AllItems.Union(training.AllItems)); break;
            }
            if (candidate_items == null)
                throw new ArgumentNullException("candidate_items");
            if (test_users == null)
                test_users = test.AllUsers;

            int num_users = 0;
            var result = new ItemRecommendationEvaluationResults();

            // make sure that UserMatrix is completely initialized before entering parallel code
            var training_user_matrix = training.UserMatrix;
            var test_user_matrix = test.UserMatrix;

            Parallel.ForEach(test_users, user_id =>
            {
                try
                {
                    var correct_items = new HashSet<int>(test_user_matrix[user_id]);
                    correct_items.IntersectWith(candidate_items);

                    // the number of items that will be used for this user
                    var candidate_items_in_train = training_user_matrix[user_id] == null ? new HashSet<int>() : new HashSet<int>(training_user_matrix[user_id]);
                    candidate_items_in_train.IntersectWith(candidate_items);
                    int num_eval_items = candidate_items.Count - (repeated_events ? 0 : candidate_items_in_train.Count());

                    // skip all users that have 0 or #candidate_items test items
                    if (correct_items.Count == 0)
                        return;
                    if (num_eval_items == correct_items.Count)
                        return;

                    IList<int> prediction_list = recommender.PredictItems(user_id, candidate_items);
                    if (prediction_list.Count != candidate_items.Count)
                        throw new Exception("Not all items have been ranked.");

                    ICollection<int> ignore_items = (repeated_events || training_user_matrix[user_id] == null) ? new int[0] : training_user_matrix[user_id];

                    double auc = AUC.Compute(prediction_list, correct_items, ignore_items);
                    double map = PrecisionAndRecall.AP(prediction_list, correct_items, ignore_items);
                    double ndcg = NDCG.Compute(prediction_list, correct_items, ignore_items);
                    double rr = ReciprocalRank.Compute(prediction_list, correct_items, ignore_items);
                    var positions = new int[] { 3, 5, 10 };  // DH: added for p@3 & r@3
                    var prec = PrecisionAndRecall.PrecisionAt(prediction_list, correct_items, ignore_items, positions);
                    var recall = PrecisionAndRecall.RecallAt(prediction_list, correct_items, ignore_items, positions);

                    // thread-safe incrementing
                    lock (result)
                    {
                        num_users++;
                        result["AUC"] += (float)auc;
                        result["MAP"] += (float)map;
                        result["NDCG"] += (float)ndcg;
                        result["MRR"] += (float)rr;
                        result["prec@3"] += (float)prec[3];
                        result["prec@5"] += (float)prec[5];
                        result["prec@10"] += (float)prec[10];
                        result["recall@3"] += (float)recall[3];
                        result["recall@5"] += (float)recall[5];
                        result["recall@10"] += (float)recall[10];
                    }

                    if (num_users % 1000 == 0)
                        Console.Error.Write(".");
                    if (num_users % 60000 == 0)
                        Console.Error.WriteLine();
                }
                catch (Exception e)
                {
                    Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace);
                    Console.Error.WriteLine("===> ERROR: user_id=" + user_id);
                    Console.Error.WriteLine("===> ERROR: training_user_matrix[user_id]=" + training_user_matrix[user_id]);
                    throw e;
                }
            });

            foreach (string measure in Measures)
                result[measure] /= num_users;
            result["num_users"] = num_users;
            result["num_lists"] = num_users;
            result["num_items"] = candidate_items.Count;

            return result;
        }
예제 #9
0
파일: Ensemble.cs 프로젝트: wendelad/RecSys
        public void EvaluateProbe(List<IPosOnlyFeedback> test_probe_data, List<IPosOnlyFeedback> training_probe_data, List<IList<int>> test_users, List<IMapping> user_mapping,
            List<IMapping> item_mapping,
         int n = -1)
        {
            List<IList<int>> candidate_items = new List<IList<int>>();
            List<RepeatedEvents> repeated_events = new List<RepeatedEvents>();
            List<IBooleanMatrix> training_user_matrix = new List<IBooleanMatrix>();
            List<IBooleanMatrix> test_user_matrix = new List<IBooleanMatrix>();

            for (int i = 0; i < m_recommenders.Count; i++)
            {

                candidate_items.Add(new List<int>(test_probe_data[i].AllItems.Union(training_probe_data[i].AllItems)));
                repeated_events.Add(RepeatedEvents.No);

                if (candidate_items[i] == null)
                    throw new ArgumentNullException("candidate_items");
                if (test_probe_data[i] == null)
                    test_users[i] = test_probe_data[i].AllUsers;

                training_user_matrix.Add(training_probe_data[i].UserMatrix);
                test_user_matrix.Add(test_probe_data[i].UserMatrix);
            }
            int num_users = 0;
            var result = new ItemRecommendationEvaluationResults();

            // make sure that the user matrix is completely initialized before entering parallel code

            foreach (int user_id in test_users[0])
            {

                string original = user_mapping[0].ToOriginalID(user_id);

                List<IList<Tuple<int, float>>> list_of_predictions = new List<IList<Tuple<int, float>>>();

                HashSet<int> correct_items = new HashSet<int>();

                List<HashSet<int>> ignore_items_for_this_user = new List<HashSet<int>>();

                List<int> num_candidates_for_this_user = new List<int>();

                correct_items = new HashSet<int>(test_user_matrix[0][user_id]);
                correct_items.IntersectWith(candidate_items[0]);

                for (int i = 0; i < m_recommenders.Count; i++)
                {

                    int internalId = user_mapping[i].ToInternalID(original);

                    ignore_items_for_this_user.Add(new HashSet<int>(training_user_matrix[i][internalId]));

                    /* if (correct_items[i].Count == 0)
                         continue;
                     */

                    ignore_items_for_this_user[i].IntersectWith(candidate_items[i]);
                    num_candidates_for_this_user.Add(candidate_items[i].Count - ignore_items_for_this_user[i].Count);
                    /*if (correct_items[i].Count == num_candidates_for_this_user[i])
                        continue;
                    */

                    //Recomenda

                    var listaRecomendacao = m_recommenders[i].Recommend(user_id, candidate_items: candidate_items[i], n: n, ignore_items: ignore_items_for_this_user[i]);
                    for (int j = 0; j < listaRecomendacao.Count; j++)
                    {
                        string idOriginal = item_mapping[i].ToOriginalID(listaRecomendacao[j].Item1);
                        int idMappingZero = item_mapping[0].ToInternalID(idOriginal);

                        Tuple<int, float> tupla = new Tuple<int, float>(idMappingZero, listaRecomendacao[j].Item2);

                        listaRecomendacao[j] = tupla;
                    }

                    list_of_predictions.Add(listaRecomendacao);

                }

                //Usar o melhor
                double maiorMapping = 0;
                int idMaiorMapping = 0;

                //Testar cada individual
                for (int k = 0; k < list_of_predictions.Count; k++)
                {
                    int[] prediction_probe = (from t in list_of_predictions[k] select t.Item1).ToArray();

                    double resultado = PrecisionAndRecall.AP(prediction_probe, correct_items);

                    if (resultado > maiorMapping)
                    {
                        maiorMapping = resultado;
                        idMaiorMapping = k;

                    }

                }

                //Set global so Fitness itens can see.
                list_prediction_probes = list_of_predictions;
                correct_items_global = correct_items;

                //Algortimo Genetico
                /*   //  Crossover		= 80%
                   //  Mutation		=  5%
                   //  Population size = 100
                   //  Generations		= 2000
                   //  Genome size		= 2
                   GA ga = new GA(0.8, 0.05, 40, 400, list_prediction_probes.Count);

                   ga.FitnessFunction = new GAFunction(Fitness);

                   //ga.FitnessFile = @"H:\fitness.csv";
                   ga.Elitism = true;
                   ga.Go();

                   double[] values;
                   double fitness;
                   ga.GetBest(out values, out fitness);*/

                //create the GA using an initialised population and user defined Fitness Function
                const double crossoverProbability = 0.85;
                const double mutationProbability = 0.08;
                const int elitismPercentage = 5;

                //create a Population of random chromosomes of length 44
                var population = new Population(40, list_of_predictions.Count * 10, false, false);

                //create the genetic operators
                var elite = new Elite(elitismPercentage);
                var crossover = new Crossover(crossoverProbability, true)
                {
                    CrossoverType = CrossoverType.DoublePoint
                };
                var mutation = new BinaryMutate(mutationProbability, true);

                //create the GA itself
                var ga = new GeneticAlgorithm(population, CalculateFitness);

                //add the operators to the ga process pipeline
                ga.Operators.Add(elite);
                ga.Operators.Add(crossover);
                ga.Operators.Add(mutation);

                //run the GA
                ga.Run(Terminate);

                var best = population.GetTop(1)[0];
                double rangeConst = 1 / (System.Math.Pow(2, 10) - 1);
                ga_weights[original] = new List<double>();

                for (int i = 0; i < list_prediction_probes.Count; i++)
                {
                    string str = best.ToBinaryString((i * 10), 10);
                    Int64 convertInt32 = Convert.ToInt32(str, 2);

                    double x = (convertInt32 * rangeConst);

                    ga_weights[original].Add(x);
                }

                best_alg[original] = idMaiorMapping;
                num_users++;

                if (num_users % 10 == 0)
                    Console.Error.Write(".");
                if (num_users % 100 == 0)
                    Console.Error.WriteLine("");

            }
        }
예제 #10
0
        // TODO consider micro- (by item) and macro-averaging (by user, the current thing); repeated events
        /// <summary>Online evaluation for rankings of items</summary>
        /// <remarks>
        /// </remarks>
        /// <param name="recommender">the item recommender to be evaluated</param>
        /// <param name="test">test cases</param>
        /// <param name="training">training data (must be connected to the recommender's training data)</param>
        /// <param name="test_users">a list of all test user IDs</param>
        /// <param name="candidate_items">a list of all candidate item IDs</param>
        /// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
        /// <returns>a dictionary containing the evaluation results (averaged by user)</returns>
        public static ItemRecommendationEvaluationResults EvaluateOnline(
			this IRecommender recommender,
			IPosOnlyFeedback test, IPosOnlyFeedback training,
			IList<int> test_users, IList<int> candidate_items,
			CandidateItems candidate_item_mode)
        {
            var incremental_recommender = recommender as IIncrementalItemRecommender;
            if (incremental_recommender == null)
                throw new ArgumentException("recommender must be of type IIncrementalItemRecommender");

            // prepare candidate items once to avoid recreating them
            switch (candidate_item_mode)
            {
                case CandidateItems.TRAINING: candidate_items = training.AllItems; break;
                case CandidateItems.TEST:     candidate_items = test.AllItems; break;
                case CandidateItems.OVERLAP:  candidate_items = new List<int>(test.AllItems.Intersect(training.AllItems)); break;
                case CandidateItems.UNION:    candidate_items = new List<int>(test.AllItems.Union(training.AllItems)); break;
            }
            candidate_item_mode = CandidateItems.EXPLICIT;

            // for better handling, move test data points into arrays
            var users = new int[test.Count];
            var items = new int[test.Count];
            int pos = 0;
            foreach (int user_id in test.UserMatrix.NonEmptyRowIDs)
                foreach (int item_id in test.UserMatrix[user_id])
                {
                    users[pos] = user_id;
                    items[pos] = item_id;
                    pos++;
                }

            // random order of the test data points  // TODO chronological order
            var random_index = new int[test.Count];
            for (int index = 0; index < random_index.Length; index++)
                random_index[index] = index;
            random_index.Shuffle();

            var results_by_user = new Dictionary<int, ItemRecommendationEvaluationResults>();

            int num_lists = 0;

            foreach (int index in random_index)
            {
                if (test_users.Contains(users[index]) && candidate_items.Contains(items[index]))
                {
                    // evaluate user
                    var current_test = new PosOnlyFeedback<SparseBooleanMatrix>();
                    current_test.Add(users[index], items[index]);
                    var current_result = Items.Evaluate(recommender, current_test, training, current_test.AllUsers, candidate_items, candidate_item_mode);

                    if (current_result["num_users"] == 1)
                        if (results_by_user.ContainsKey(users[index]))
                        {
                            foreach (string measure in Items.Measures)
                                results_by_user[users[index]][measure] += current_result[measure];
                            results_by_user[users[index]]["num_items"]++;
                            num_lists++;
                        }
                        else
                        {
                            results_by_user[users[index]] = current_result;
                            results_by_user[users[index]]["num_items"] = 1;
                            results_by_user[users[index]].Remove("num_users");
                        }
                }

                // update recommender
                incremental_recommender.AddFeedback(users[index], items[index]);
            }

            var results = new ItemRecommendationEvaluationResults();

            foreach (int u in results_by_user.Keys)
                foreach (string measure in Items.Measures)
                    results[measure] += results_by_user[u][measure] / results_by_user[u]["num_items"];

            foreach (string measure in Items.Measures)
                results[measure] /= results_by_user.Count;

            results["num_users"] = results_by_user.Count;
            results["num_items"] = candidate_items.Count;
            results["num_lists"] = num_lists;

            return results;
        }
예제 #11
0
        /// <summary>Evaluation for rankings of items</summary>
        /// <remarks>
        /// User-item combinations that appear in both sets are ignored for the test set, and thus in the evaluation,
        /// except the boolean argument repeated_events is set.
        ///
        /// The evaluation measures are listed in the Measures property.
        /// Additionally, 'num_users' and 'num_items' report the number of users that were used to compute the results
        /// and the number of items that were taken into account.
        ///
        /// Literature:
        /// <list type="bullet">
        ///   <item><description>
        ///   C. Manning, P. Raghavan, H. Schütze: Introduction to Information Retrieval, Cambridge University Press, 2008
        ///   </description></item>
        /// </list>
        ///
        /// On multi-core/multi-processor systems, the routine tries to use as many cores as possible,
        /// which should to an almost linear speed-up.
        /// </remarks>
        /// <param name="recommender">item recommender</param>
        /// <param name="test">test cases</param>
        /// <param name="training">training data</param>
        /// <param name="test_users">a list of integers with all test users; if null, use all users in the test cases</param>
        /// <param name="candidate_items">a list of integers with all candidate items</param>
        /// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
        /// <param name="repeated_events">allow repeated events in the evaluation (i.e. items accessed by a user before may be in the recommended list)</param>
        /// <param name="n">length of the item list to evaluate -- if set to -1 (default), use the complete list, otherwise compute evaluation measures on the top n items</param>
        /// <returns>a dictionary containing the evaluation results (default is false)</returns>
        static public ItemRecommendationEvaluationResults Evaluate(
            this IRecommender recommender,
            IPosOnlyFeedback test,
            IPosOnlyFeedback training,
            IList <int> test_users             = null,
            IList <int> candidate_items        = null,
            CandidateItems candidate_item_mode = CandidateItems.OVERLAP,
            RepeatedEvents repeated_events     = RepeatedEvents.No,
            int n = -1)
        {
            if (test_users == null)
            {
                test_users = test.AllUsers;
            }
            candidate_items = Candidates(candidate_items, candidate_item_mode, test, training);

            var result = new ItemRecommendationEvaluationResults();

            // make sure that the user matrix is completely initialized before entering parallel code
            var training_user_matrix = training.UserMatrix;
            var test_user_matrix     = test.UserMatrix;

            int num_users = 0;

            Parallel.ForEach(test_users, user_id => {
                try
                {
                    var correct_items = new HashSet <int>(test_user_matrix[user_id]);
                    correct_items.IntersectWith(candidate_items);
                    if (correct_items.Count == 0)
                    {
                        return;
                    }

                    var ignore_items_for_this_user = new HashSet <int>(
                        repeated_events == RepeatedEvents.Yes || training_user_matrix[user_id] == null ? new int[0] : training_user_matrix[user_id]
                        );

                    ignore_items_for_this_user.IntersectWith(candidate_items);
                    int num_candidates_for_this_user = candidate_items.Count - ignore_items_for_this_user.Count;
                    if (correct_items.Count == num_candidates_for_this_user)
                    {
                        return;
                    }

                    var prediction      = recommender.Recommend(user_id, candidate_items: candidate_items, n: n, ignore_items: ignore_items_for_this_user);
                    var prediction_list = (from t in prediction select t.Item1).ToArray();

                    int num_dropped_items = num_candidates_for_this_user - prediction.Count;
                    double auc            = AUC.Compute(prediction_list, correct_items, num_dropped_items);
                    double map            = PrecisionAndRecall.AP(prediction_list, correct_items);
                    double ndcg           = NDCG.Compute(prediction_list, correct_items);
                    double rr             = ReciprocalRank.Compute(prediction_list, correct_items);
                    var positions         = new int[] { 5, 10 };
                    var prec   = PrecisionAndRecall.PrecisionAt(prediction_list, correct_items, positions);
                    var recall = PrecisionAndRecall.RecallAt(prediction_list, correct_items, positions);

                    // thread-safe incrementing
                    lock (result)
                    {
                        num_users++;
                        result["AUC"]       += (float)auc;
                        result["MAP"]       += (float)map;
                        result["NDCG"]      += (float)ndcg;
                        result["MRR"]       += (float)rr;
                        result["prec@5"]    += (float)prec[5];
                        result["prec@10"]   += (float)prec[10];
                        result["recall@5"]  += (float)recall[5];
                        result["recall@10"] += (float)recall[10];
                    }

                    if (num_users % 1000 == 0)
                    {
                        Console.Error.Write(".");
                    }
                    if (num_users % 60000 == 0)
                    {
                        Console.Error.WriteLine();
                    }
                }
                catch (Exception e)
                {
                    Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace);
                    throw;
                }
            });

            foreach (string measure in Measures)
            {
                result[measure] /= num_users;
            }
            result["num_users"] = num_users;
            result["num_lists"] = num_users;
            result["num_items"] = candidate_items.Count;

            return(result);
        }
예제 #12
0
//		/// <summary>
//		/// Gets string for subselecting all id's used from database
//		/// </summary>
//		/// <returns>The all identifiers string for database.</returns>
//		static private string getAllIdsStringForDatabase(IList<int> allItems){
//
//			string all_ids = "(";
//			bool first = true;
//			foreach (int id in allItems) {
//				if (first) {
//					all_ids += id.ToString ();
//					first = false;
//				} else
//					all_ids += "," + id.ToString ();
//			}
//			all_ids += ")";
//			return all_ids;
//		}
//
////		static public void getWeatherVectorLocation(IList<int> items, string connection_string, ref Dictionary<int,IList<double>> venueWeatherVectors){
////			DBConnect conn = new DBConnect (connection_string);
////			List<string>[] res;
////			res = conn.Select ("select * " +
////			" from weather_avgs_per_venue where id_int in "+getAllIdsStringForDatabase(items), 9);
////			List<string> all_ids = res [0];
////			List<string> temperature = res [1];
////			List<string> precip_intensity = res [2];
////			List<string> wind_speed = res [3];
////			List<string> humidity = res [4];
////			List<string> cloud_cover = res [5];
////			List<string> pressure = res [6];
////			List<string> visibility = res [7];
////			List<string> moonphase = res [8];
////			int i = 0;
////			foreach(string id in all_ids){
////				venueWeatherVectors.Add(int.Parse (id),new List<double> { double.Parse(temperature [i]), double.Parse(precip_intensity [i]), double.Parse(wind_speed [i]), double.Parse(humidity [i]),
////					double.Parse(cloud_cover [i])});
////				i++;
////			}
////		}


        /// <summary>Evaluation for rankings of items</summary>
        /// <remarks>
        /// User-item combinations that appear in both sets are ignored for the test set, and thus in the evaluation,
        /// except the boolean argument repeated_events is set.
        ///
        /// The evaluation measures are listed in the Measures property.
        /// Additionally, 'num_users' and 'num_items' report the number of users that were used to compute the results
        /// and the number of items that were taken into account.
        ///
        /// Literature:
        /// <list type="bullet">
        ///   <item><description>
        ///   C. Manning, P. Raghavan, H. Schütze: Introduction to Information Retrieval, Cambridge University Press, 2008
        ///   </description></item>
        /// </list>
        ///
        /// On multi-core/multi-processor systems, the routine tries to use as many cores as possible,
        /// which should to an almost linear speed-up.
        /// </remarks>
        /// <param name="recommender">item recommender</param>
        /// <param name="test">test cases</param>
        /// <param name="training">training data</param>
        /// <param name="n">length of the item list to evaluate -- if set to -1 (default), use the complete list, otherwise compute evaluation measures on the top n items</param>
        /// <returns>a dictionary containing the evaluation results (default is false)</returns>
//		static public ItemRecommendationEvaluationResults Evaluate(
//			this IRecommender recommender,
//			ITimedRatings test,
//			ITimedRatings training,
//			string connection_string = "",
//			int n = -1,double alpha = 0.1)
//		{
//
//			var result = new ItemRecommendationEvaluationResults();
//			var candidates = test.AllItems.Intersect(training.AllItems).ToList();
//			int num_users = 0;
//			ThreadPool.SetMinThreads(test.AllUsers.Count, test.AllUsers.Count);
//			Dictionary<int,IList<int>> user_items = test.getItemsUserDict ();
//			ParallelOptions po = new ParallelOptions{
//				MaxDegreeOfParallelism = Environment.ProcessorCount
//			};
//
//			//foreach(int user_id in test.AllUsers){
//			Parallel.ForEach (test.AllUsers, po, user_id => {
//				try {
//					n = user_items [user_id].Count;
//					IList<Tuple<int,float>> prediction;
//					prediction = recommender.Recommend (user_id, candidate_items: candidates, n: n);
//					var prediction_list = (from t in prediction select t.Item1).ToArray ();
//					int num_candidates_for_this_user = candidates.Count ();
//					int num_dropped_items = num_candidates_for_this_user - prediction.Count;
//					var correct_items = user_items [user_id].Intersect (candidates).ToList ();
//					if (correct_items.Count () == 0)
//						return;
//
//					double auc = AUC.Compute (prediction_list, correct_items, num_dropped_items);
//					double map = PrecisionAndRecall.AP (prediction_list, correct_items);
//					double ndcg = NDCG.Compute (prediction_list, correct_items);
//					double rr = ReciprocalRank.Compute (prediction_list, correct_items);
//					var positions = new int[] { 5, 10 };
//					var prec = PrecisionAndRecall.PrecisionAt (prediction_list, correct_items, positions);
//					var recall = PrecisionAndRecall.RecallAt (prediction_list, correct_items, positions);
//
//					// thread-safe incrementing
//					lock (result) {
//						num_users++;
//						result ["AUC"] += (float)auc;
//						result ["MAP"] += (float)map;
//						result ["NDCG"] += (float)ndcg;
//						result ["MRR"] += (float)rr;
//						result ["prec@5"] += (float)prec [5];
//						result ["prec@10"] += (float)prec [10];
//						result ["recall@5"] += (float)recall [5];
//						result ["recall@10"] += (float)recall [10];
//					}
//
//					if (num_users % 1000 == 0)
//						Console.Error.Write (".");
//					if (num_users % 60000 == 0)
//						Console.Error.WriteLine ();
//				} catch (Exception e) {
//					Console.Error.WriteLine ("===> ERROR: " + e.Message + e.StackTrace);
//					throw;
//				}
//			});
//
//			foreach (string measure in Measures)
//				result[measure] /= num_users;
//			result["num_users"] = num_users;
//			result["num_lists"] = num_users;
//			result["num_items"] = candidates.Count();
//
//			return result;
//		}


        static public double EvaluateTime(
            this IRecommender recommender,
            ITimedRatings test,
            ITimedRatings training,
            string dataset,
            bool time_aware,
            int n = -1, double alpha = 0.1)
        {
            Dictionary <int, ItemRecommendationEvaluationResults> userRecommendationResults = new Dictionary <int, ItemRecommendationEvaluationResults> ();

            foreach (int user in test.AllUsers)
            {
                userRecommendationResults.Add(user, new ItemRecommendationEvaluationResults());
            }

            var             candidates = test.AllItems.Intersect(training.AllItems).ToList();
            ParallelOptions po         = new ParallelOptions {
                MaxDegreeOfParallelism = Environment.ProcessorCount
            };
            bool init = true;
            Dictionary <int, IList <int> > trainingUserItems = training.getItemsUserDict();

            Parallel.For(0, test.Users.Count - 1, po, index => {
                try{
                    DateTime time = test.Times[index];

                    int user = test.Users[index];
                    int item = test.Items[index];
                    if (trainingUserItems[user].Contains(item))
                    {
                        return;
                    }
                    IList <int> correct_items = new List <int>();
                    correct_items.Add(item);
                    correct_items = correct_items.Intersect(candidates).ToList();
                    if (correct_items.Count() == 0)
                    {
                        return;
                    }
                    IList <Tuple <int, float> > prediction;
                    if (time_aware)
                    {
                        prediction = ((ITimeAwareRatingPredictor)recommender).RecommendTime(user, time, candidate_items: candidates, n: 20);
                    }
                    else
                    {
                        prediction = recommender.Recommend(user, candidate_items: candidates, n: 20);
                    }
                    var prediction_list = (from t in prediction select t.Item1).ToArray();

                    double auc    = AUC.Compute(prediction_list, correct_items, 0);
                    double map    = PrecisionAndRecall.AP(prediction_list, correct_items);
                    double ndcg   = NDCG.Compute(prediction_list, correct_items);
                    double rr     = ReciprocalRank.Compute(prediction_list, correct_items);
                    var positions = new int[] { 5, 10 };
                    var prec      = PrecisionAndRecall.PrecisionAt(prediction_list, correct_items, positions);
                    var recall    = PrecisionAndRecall.RecallAt(prediction_list, correct_items, positions);

                    lock (userRecommendationResults){
                        ItemRecommendationEvaluationResults res = userRecommendationResults[user];
                        res["AUC"]       += (float)auc;
                        res["MAP"]       += (float)map;
                        res["NDCG"]      += (float)ndcg;
                        res["MRR"]       += (float)rr;
                        res["prec@5"]    += (float)prec [5];
                        res["prec@10"]   += (float)prec [10];
                        res["recall@5"]  += (float)recall [5];
                        res["recall@10"] += (float)recall [10];
                        if (!init)
                        {
                            res["AUC"]       /= 2;
                            res["MAP"]       /= 2;
                            res["NDCG"]      /= 2;
                            res["MRR"]       /= 2;
                            res["prec@5"]    /= 2;
                            res["prec@10"]   /= 2;
                            res["recall@5"]  /= 2;
                            res["recall@10"] /= 2;
                        }
                        init = false;
                        userRecommendationResults[user] = res;
                    }
                } catch (Exception e) {
                    Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace);
                    throw;
                }
            });
            ItemRecommendationEvaluationResults avg_res = new ItemRecommendationEvaluationResults();
            int num_users = 0;

            Console.WriteLine("Detailed user results:");
            foreach (int user in userRecommendationResults.Keys)
            {
                Console.Write("User: "******"{0}={1}", key, userRecommendationResults [user] [key]);
                }
                num_users++;
            }
            foreach (string measure in Measures)
            {
                avg_res[measure] /= num_users;
            }
            Console.WriteLine(dataset + " Avg results:");
            foreach (var key in avg_res.Keys)
            {
                Console.WriteLine("{0}={1}", key, avg_res[key]);
            }
            return(avg_res["prec@5"]);
        }
        /// <summary>Evaluate on the folds of a dataset split</summary>
        /// <param name="recommender">an item recommender</param>
        /// <param name="split">a dataset split</param>
        /// <param name="candidate_items">a collection of integers with all candidate items</param>
        /// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
        /// <param name="compute_fit">if set to true measure fit on the training data as well</param>
        /// <param name="show_results">set to true to print results to STDERR</param>
        /// <returns>a dictionary containing the average results over the different folds of the split</returns>
        static public EvaluationResults DoRatingBasedRankingCrossValidation(
            this RatingPredictor recommender,
            ISplit <IRatings> split,
            IList <int> candidate_items,
            CandidateItems candidate_item_mode = CandidateItems.OVERLAP,
            bool compute_fit  = false,
            bool show_results = false)
        {
            var avg_results = new ItemRecommendationEvaluationResults();

            Parallel.For(0, (int)split.NumberOfFolds, fold =>
            {
                try
                {
                    var split_recommender     = (RatingPredictor)recommender.Clone();                  // avoid changes in recommender
                    split_recommender.Ratings = split.Train[fold];
                    split_recommender.Train();

                    var test_data_posonly     = new PosOnlyFeedback <SparseBooleanMatrix>(split.Test[fold]);
                    var training_data_posonly = new PosOnlyFeedback <SparseBooleanMatrix>(split.Train[fold]);
                    IList <int> test_users    = test_data_posonly.AllUsers;
                    var fold_results          = Items.Evaluate(split_recommender, test_data_posonly, training_data_posonly, test_users, candidate_items, candidate_item_mode);
                    if (compute_fit)
                    {
                        fold_results["fit"] = (float)split_recommender.ComputeFit();
                    }

                    // thread-safe stats
                    lock (avg_results)
                        foreach (var key in fold_results.Keys)
                        {
                            if (avg_results.ContainsKey(key))
                            {
                                avg_results[key] += fold_results[key];
                            }
                            else
                            {
                                avg_results[key] = fold_results[key];
                            }
                        }

                    if (show_results)
                    {
                        Console.Error.WriteLine("fold {0} {1}", fold, fold_results);
                    }
                }
                catch (Exception e)
                {
                    Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace);
                    throw;
                }
            });

            foreach (var key in Items.Measures)
            {
                avg_results[key] /= split.NumberOfFolds;
            }
            avg_results["num_users"] /= split.NumberOfFolds;
            avg_results["num_items"] /= split.NumberOfFolds;

            return(avg_results);
        }
        /// <summary>Evaluate an iterative recommender on the folds of a dataset split, display results on STDOUT</summary>
        /// <param name="recommender">an item recommender</param>
        /// <param name="split">a positive-only feedback dataset split</param>
        /// <param name="test_users">a collection of integers with all test users</param>
        /// <param name="candidate_items">a collection of integers with all candidate items</param>
        /// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
        /// <param name="repeated_events">allow repeated events in the evaluation (i.e. items accessed by a user before may be in the recommended list)</param>
        /// <param name="max_iter">the maximum number of iterations</param>
        /// <param name="find_iter">the report interval</param>
        /// <param name="show_fold_results">if set to true to print per-fold results to STDERR</param>
        static public void DoRatingBasedRankingIterativeCrossValidation(
            this RatingPredictor recommender,
            ISplit <IRatings> split,
            IList <int> test_users,
            IList <int> candidate_items,
            CandidateItems candidate_item_mode,
            RepeatedEvents repeated_events,
            uint max_iter,
            uint find_iter         = 1,
            bool show_fold_results = false)
        {
            if (!(recommender is IIterativeModel))
            {
                throw new ArgumentException("recommender must be of type IIterativeModel");
            }

            var split_recommenders     = new RatingPredictor[split.NumberOfFolds];
            var iterative_recommenders = new IIterativeModel[split.NumberOfFolds];
            var fold_results           = new ItemRecommendationEvaluationResults[split.NumberOfFolds];

            // initial training and evaluation
            Parallel.For(0, (int)split.NumberOfFolds, i =>
            {
                try
                {
                    split_recommenders[i]         = (RatingPredictor)recommender.Clone();              // to avoid changes in recommender
                    split_recommenders[i].Ratings = split.Train[i];
                    split_recommenders[i].Train();
                    iterative_recommenders[i] = (IIterativeModel)split_recommenders[i];

                    var test_data_posonly     = new PosOnlyFeedback <SparseBooleanMatrix>(split.Test[i]);
                    var training_data_posonly = new PosOnlyFeedback <SparseBooleanMatrix>(split.Train[i]);
                    fold_results[i]           = Items.Evaluate(split_recommenders[i], test_data_posonly, training_data_posonly, test_users, candidate_items, candidate_item_mode, repeated_events);
                    if (show_fold_results)
                    {
                        Console.WriteLine("fold {0} {1} iteration {2}", i, fold_results, iterative_recommenders[i].NumIter);
                    }
                }
                catch (Exception e)
                {
                    Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace);
                    throw;
                }
            });
            Console.WriteLine("{0} iteration {1}", new ItemRecommendationEvaluationResults(fold_results), iterative_recommenders[0].NumIter);

            // iterative training and evaluation
            for (int it = (int)iterative_recommenders[0].NumIter + 1; it <= max_iter; it++)
            {
                Parallel.For(0, (int)split.NumberOfFolds, i =>
                {
                    try
                    {
                        iterative_recommenders[i].Iterate();

                        if (it % find_iter == 0)
                        {
                            var test_data_posonly     = new PosOnlyFeedback <SparseBooleanMatrix>(split.Test[i]);
                            var training_data_posonly = new PosOnlyFeedback <SparseBooleanMatrix>(split.Train[i]);

                            fold_results[i] = Items.Evaluate(split_recommenders[i], test_data_posonly, training_data_posonly, test_users, candidate_items, candidate_item_mode, repeated_events);
                            if (show_fold_results)
                            {
                                Console.WriteLine("fold {0} {1} iteration {2}", i, fold_results, it);
                            }
                        }
                    }
                    catch (Exception e)
                    {
                        Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace);
                        throw;
                    }
                });
                Console.WriteLine("{0} iteration {1}", new ItemRecommendationEvaluationResults(fold_results), it);
            }
        }
예제 #15
0
파일: Groups.cs 프로젝트: bemde/MyMediaLite
        /// <summary>Evaluation for rankings of items recommended to groups</summary>
        /// <remarks>
        /// </remarks>
        /// <param name="recommender">group recommender</param>
        /// <param name="test">test cases</param>
        /// <param name="train">training data</param>
        /// <param name="group_to_user">group to user relation</param>
        /// <param name="candidate_items">a collection of integers with all candidate items</param>
        /// <param name="ignore_overlap">if true, ignore items that appear for a group in the training set when evaluating for that user</param>
        /// <returns>a dictionary containing the evaluation results</returns>
        public static ItemRecommendationEvaluationResults Evaluate(
			this GroupRecommender recommender,
			IPosOnlyFeedback test,
			IPosOnlyFeedback train,
			SparseBooleanMatrix group_to_user,
			ICollection<int> candidate_items,
			bool ignore_overlap = true)
        {
            var result = new ItemRecommendationEvaluationResults();

            int num_groups = 0;

            foreach (int group_id in group_to_user.NonEmptyRowIDs)
            {
                var users = group_to_user.GetEntriesByRow(group_id);

                var correct_items = new HashSet<int>();
                foreach (int user_id in users)
                    correct_items.UnionWith(test.UserMatrix[user_id]);
                correct_items.IntersectWith(candidate_items);

                var candidate_items_in_train = new HashSet<int>();
                foreach (int user_id in users)
                    candidate_items_in_train.UnionWith(train.UserMatrix[user_id]);
                candidate_items_in_train.IntersectWith(candidate_items);
                int num_eval_items = candidate_items.Count - (ignore_overlap ? candidate_items_in_train.Count() : 0);

                // skip all groups that have 0 or #candidate_items test items
                if (correct_items.Count == 0)
                    continue;
                if (num_eval_items - correct_items.Count == 0)
                    continue;

                IList<int> prediction_list = recommender.RankItems(users, candidate_items);
                if (prediction_list.Count != candidate_items.Count)
                    throw new Exception("Not all items have been ranked.");

                var ignore_items = ignore_overlap ? candidate_items_in_train : new HashSet<int>();

                double auc  = AUC.Compute(prediction_list, correct_items, ignore_items);
                double map  = PrecisionAndRecall.AP(prediction_list, correct_items, ignore_items);
                double ndcg = NDCG.Compute(prediction_list, correct_items, ignore_items);
                double rr   = ReciprocalRank.Compute(prediction_list, correct_items, ignore_items);
                var positions = new int[] { 5, 10 };
                var prec   = PrecisionAndRecall.PrecisionAt(prediction_list, correct_items, ignore_items, positions);
                var recall = PrecisionAndRecall.RecallAt(prediction_list, correct_items, ignore_items, positions);

                // thread-safe incrementing
                lock(result)
                {
                    num_groups++;
                    result["AUC"]       += (float) auc;
                    result["MAP"]       += (float) map;
                    result["NDCG"]      += (float) ndcg;
                    result["MRR"]       += (float) rr;
                    result["prec@5"]    += (float) prec[5];
                    result["prec@10"]   += (float) prec[10];
                    result["recall@5"]  += (float) recall[5];
                    result["recall@10"] += (float) recall[10];
                }

                if (num_groups % 1000 == 0)
                    Console.Error.Write(".");
                if (num_groups % 60000 == 0)
                    Console.Error.WriteLine();
            }

            result["num_groups"] = num_groups;
            result["num_lists"]  = num_groups;
            result["num_items"]  = candidate_items.Count;

            return result;
        }
예제 #16
0
        /// <summary>Evaluate on the folds of a dataset split</summary>
        /// <param name="recommender">an item recommender</param>
        /// <param name="split">a dataset split</param>
        /// <param name="test_users">a collection of integers with all test users</param>
        /// <param name="candidate_items">a collection of integers with all candidate items</param>
        /// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
        /// <param name="compute_fit">if set to true measure fit on the training data as well</param>
        /// <param name="show_results">set to true to print results to STDERR</param>
        /// <returns>a dictionary containing the average results over the different folds of the split</returns>
        static public ItemRecommendationEvaluationResults DoCrossValidation(
            this IRecommender recommender,
            ISplit <IPosOnlyFeedback> split,
            IList <int> test_users,
            IList <int> candidate_items,
            CandidateItems candidate_item_mode = CandidateItems.OVERLAP,
            bool compute_fit  = false,
            bool show_results = false)
        {
            var avg_results = new ItemRecommendationEvaluationResults();

            if (!(recommender is ItemRecommender))
            {
                throw new ArgumentException("recommender must be of type ItemRecommender");
            }

            Parallel.For(0, (int)split.NumberOfFolds, fold =>
            {
                try
                {
                    var split_recommender      = (ItemRecommender)recommender.Clone();                 // avoid changes in recommender
                    split_recommender.Feedback = split.Train[fold];
                    split_recommender.Train();
                    var fold_results = Items.Evaluate(split_recommender, split.Test[fold], split.Train[fold], test_users, candidate_items, candidate_item_mode);
                    if (compute_fit)
                    {
                        fold_results["fit"] = (float)split_recommender.ComputeFit();
                    }

                    // thread-safe stats
                    lock (avg_results)
                        foreach (var key in fold_results.Keys)
                        {
                            if (avg_results.ContainsKey(key))
                            {
                                avg_results[key] += fold_results[key];
                            }
                            else
                            {
                                avg_results[key] = fold_results[key];
                            }
                        }

                    if (show_results)
                    {
                        Console.Error.WriteLine("fold {0} {1}", fold, fold_results);
                    }
                }
                catch (Exception e)
                {
                    Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace);
                    throw;
                }
            });

            foreach (var key in Items.Measures)
            {
                avg_results[key] /= split.NumberOfFolds;
            }
            avg_results["num_users"] /= split.NumberOfFolds;
            avg_results["num_items"] /= split.NumberOfFolds;

            return(avg_results);
        }