Esempio n. 1
0
    public static void Main(string[] args)
    {
        Assembly assembly = Assembly.GetExecutingAssembly();

        Assembly.LoadFile(Path.GetDirectoryName(assembly.Location) + Path.DirectorySeparatorChar + "MyMediaLiteExperimental.dll");

        AppDomain.CurrentDomain.UnhandledException += new UnhandledExceptionEventHandler(MyMediaLite.Util.Handlers.UnhandledExceptionHandler);
        Console.CancelKeyPress += new ConsoleCancelEventHandler(AbortHandler);

        // recommender arguments
        string method = "MostPopular";
        string recommender_options = string.Empty;

        // help/version
        bool show_help    = false;
        bool show_version = false;

        // variables for iteration search
        int    find_iter    = 0;
        int    max_iter     = 500;
        double auc_cutoff   = 0;
        double prec5_cutoff = 0;

        compute_fit = false;

        // other parameters
        string save_model_file = string.Empty;
        string load_model_file = string.Empty;
        int    random_seed     = -1;
        string prediction_file = string.Empty;

        test_ratio = 0;

        var p = new OptionSet()
        {
            // string-valued options
            { "training-file=", v => training_file = v },
            { "test-file=", v => test_file = v },
            { "recommender=", v => method = v },
            { "recommender-options=", v => recommender_options += " " + v },
            { "data-dir=", v => data_dir = v },
            { "user-attributes=", v => user_attributes_file = v },
            { "item-attributes=", v => item_attributes_file = v },
            { "user-relations=", v => user_relations_file = v },
            { "item-relations=", v => item_relations_file = v },
            { "save-model=", v => save_model_file = v },
            { "load-model=", v => load_model_file = v },
            { "prediction-file=", v => prediction_file = v },
            { "relevant-users=", v => relevant_users_file = v },
            { "relevant-items=", v => relevant_items_file = v },
            // integer-valued options
            { "find-iter=", (int v) => find_iter = v },
            { "max-iter=", (int v) => max_iter = v },
            { "random-seed=", (int v) => random_seed = v },
            { "predict-items-number=", (int v) => predict_items_number = v },
            // double-valued options
//			{ "epsilon=",             (double v) => epsilon      = v },
            { "auc-cutoff=", (double v) => auc_cutoff = v },
            { "prec5-cutoff=", (double v) => prec5_cutoff = v },
            { "test-ratio=", (double v) => test_ratio = v },
            // enum options
            //   * currently none *
            // boolean options
            { "compute-fit", v => compute_fit = v != null },
            { "online-evaluation", v => online_eval = v != null },
            { "filtered-evaluation", v => filtered_eval = v != null },
            { "help", v => show_help = v != null },
            { "version", v => show_version = v != null },
        };
        IList <string> extra_args = p.Parse(args);

        if (show_version)
        {
            ShowVersion();
        }
        if (show_help)
        {
            Usage(0);
        }

        bool no_eval = test_file == null;

        if (training_file == null)
        {
            Usage("Parameter --training-file=FILE is missing.");
        }

        if (extra_args.Count > 0)
        {
            Usage("Did not understand " + extra_args[0]);
        }

        if (online_eval && filtered_eval)
        {
            Usage("Combination of --online-eval and --filtered-eval is not (yet) supported.");
        }

        if (random_seed != -1)
        {
            MyMediaLite.Util.Random.InitInstance(random_seed);
        }

        recommender = Recommender.CreateItemRecommender(method);
        if (recommender == null)
        {
            Usage(string.Format("Unknown method: '{0}'", method));
        }

        Recommender.Configure(recommender, recommender_options, Usage);

        // load all the data
        LoadData();
        Utils.DisplayDataStats(training_data, test_data, recommender);

        TimeSpan time_span;

        if (find_iter != 0)
        {
            var iterative_recommender = (IIterativeModel)recommender;
            Console.WriteLine(recommender.ToString() + " ");

            if (load_model_file == string.Empty)
            {
                iterative_recommender.Train();
            }
            else
            {
                Recommender.LoadModel(iterative_recommender, load_model_file);
            }

            if (compute_fit)
            {
                Console.Write(string.Format(CultureInfo.InvariantCulture, "fit {0,0:0.#####} ", iterative_recommender.ComputeFit()));
            }

            var result = Evaluate();
            Items.DisplayResults(result);
            Console.WriteLine(" iteration " + iterative_recommender.NumIter);

            for (int i = (int)iterative_recommender.NumIter + 1; i <= max_iter; i++)
            {
                TimeSpan t = Utils.MeasureTime(delegate() {
                    iterative_recommender.Iterate();
                });
                training_time_stats.Add(t.TotalSeconds);

                if (i % find_iter == 0)
                {
                    if (compute_fit)
                    {
                        double fit = 0;
                        t = Utils.MeasureTime(delegate() { fit = iterative_recommender.ComputeFit(); });
                        fit_time_stats.Add(t.TotalSeconds);
                        Console.Write(string.Format(CultureInfo.InvariantCulture, "fit {0,0:0.#####} ", fit));
                    }

                    t = Utils.MeasureTime(delegate() { result = Evaluate(); });
                    eval_time_stats.Add(t.TotalSeconds);
                    Items.DisplayResults(result);
                    Console.WriteLine(" iteration " + i);

                    Recommender.SaveModel(recommender, save_model_file, i);
                    Predict(prediction_file, relevant_users_file, i);

                    if (result["AUC"] < auc_cutoff || result["prec@5"] < prec5_cutoff)
                    {
                        Console.Error.WriteLine("Reached cutoff after {0} iterations.", i);
                        Console.Error.WriteLine("DONE");
                        break;
                    }
                }
            }             // for
            DisplayStats();
        }
        else
        {
            if (load_model_file == string.Empty)
            {
                Console.Write(recommender.ToString() + " ");
                time_span = Utils.MeasureTime(delegate() { recommender.Train(); });
                Console.Write("training_time " + time_span + " ");
            }
            else
            {
                Recommender.LoadModel(recommender, load_model_file);
                Console.Write(recommender.ToString() + " ");
                // TODO is this the right time to load the model?
            }

            if (prediction_file != string.Empty)
            {
                Predict(prediction_file, relevant_users_file);
            }
            else if (!no_eval)
            {
                if (online_eval)
                {
                    time_span = Utils.MeasureTime(delegate() {
                        var result = Items.EvaluateOnline(recommender, test_data, training_data, relevant_users, relevant_items);                         // TODO support also for prediction outputs (to allow external evaluation)
                        Items.DisplayResults(result);
                    });
                }
                else
                {
                    time_span = Utils.MeasureTime(delegate() {
                        var result = Evaluate();
                        Items.DisplayResults(result);
                    });
                }
                Console.Write(" testing_time " + time_span);
            }
            Console.WriteLine();
        }
        Recommender.SaveModel(recommender, save_model_file);
    }
Esempio n. 2
0
        // TODO consider micro- (by item) and macro-averaging (by user, the current thing)
        /// <summary>Online evaluation for rankings of items</summary>
        /// <remarks>
        /// </remarks>
        /// <param name="recommender">item recommender</param>
        /// <param name="test">test cases</param>
        /// <param name="train">training data (must be connected to the recommender's training data)</param>
        /// <param name="relevant_users">a collection of integers with all relevant users</param>
        /// <param name="relevant_items">a collection of integers with all relevant items</param>
        /// <returns>a dictionary containing the evaluation results (averaged by user)</returns>
        static public Dictionary <string, double> EvaluateOnline(
            IItemRecommender recommender,
            IPosOnlyFeedback test, IPosOnlyFeedback train,
            ICollection <int> relevant_users, ICollection <int> relevant_items)
        {
            // for better handling, move test data points into arrays
            var users = new int[test.Count];
            var items = new int[test.Count];
            int pos   = 0;

            foreach (int user_id in test.UserMatrix.NonEmptyRowIDs)
            {
                foreach (int item_id in test.UserMatrix[user_id])
                {
                    users[pos] = user_id;
                    items[pos] = item_id;
                    pos++;
                }
            }

            // random order of the test data points  // TODO chronological order
            var random_index = new int[test.Count];

            for (int index = 0; index < random_index.Length; index++)
            {
                random_index[index] = index;
            }
            Util.Utils.Shuffle <int>(random_index);

            var results_by_user = new Dictionary <int, Dictionary <string, double> >();

            foreach (int index in random_index)
            {
                if (relevant_users.Contains(users[index]) && relevant_items.Contains(items[index]))
                {
                    // evaluate user
                    var current_test = new PosOnlyFeedback <SparseBooleanMatrix>();
                    current_test.Add(users[index], items[index]);
                    var current_result = Evaluate(recommender, current_test, train, current_test.AllUsers, relevant_items);

                    if (current_result["num_users"] == 1)
                    {
                        if (results_by_user.ContainsKey(users[index]))
                        {
                            foreach (string measure in Measures)
                            {
                                results_by_user[users[index]][measure] += current_result[measure];
                            }
                            results_by_user[users[index]]["num_items"]++;
                        }
                        else
                        {
                            results_by_user[users[index]] = current_result;
                            results_by_user[users[index]]["num_items"] = 1;
                            results_by_user[users[index]].Remove("num_users");
                        }
                    }
                }

                // update recommender
                recommender.AddFeedback(users[index], items[index]);
            }

            var results = new Dictionary <string, double>();

            foreach (string measure in Measures)
            {
                results[measure] = 0;
            }

            foreach (int u in results_by_user.Keys)
            {
                foreach (string measure in Measures)
                {
                    results[measure] += results_by_user[u][measure] / results_by_user[u]["num_items"];
                }
            }

            foreach (string measure in Measures)
            {
                results[measure] /= results_by_user.Count;
            }

            results["num_users"] = results_by_user.Count;
            results["num_items"] = relevant_items.Count;
            results["num_lists"] = test.Count;             // FIXME this is not exact

            return(results);
        }
Esempio n. 3
0
        /// <summary>Evaluation for rankings of items</summary>
        /// <remarks>
        /// User-item combinations that appear in both sets are ignored for the test set, and thus in the evaluation.
        /// The evaluation measures are listed in the ItemPredictionMeasures property.
        /// Additionally, 'num_users' and 'num_items' report the number of users that were used to compute the results
        /// and the number of items that were taken into account.
        ///
        /// Literature:
        ///   C. Manning, P. Raghavan, H. Schütze: Introduction to Information Retrieval, Cambridge University Press, 2008
        /// </remarks>
        /// <param name="recommender">item recommender</param>
        /// <param name="test">test cases</param>
        /// <param name="train">training data</param>
        /// <param name="relevant_users">a collection of integers with all relevant users</param>
        /// <param name="relevant_items">a collection of integers with all relevant items</param>
        /// <returns>a dictionary containing the evaluation results</returns>
        static public Dictionary <string, double> Evaluate(
            IItemRecommender recommender,
            IPosOnlyFeedback test,
            IPosOnlyFeedback train,
            ICollection <int> relevant_users,
            ICollection <int> relevant_items)
        {
            if (train.Overlap(test) > 0)
            {
                Console.Error.WriteLine("WARNING: Overlapping train and test data");
            }

            // compute evaluation measures
            double auc_sum     = 0;
            double map_sum     = 0;
            double prec_5_sum  = 0;
            double prec_10_sum = 0;
            double prec_15_sum = 0;
            double ndcg_sum    = 0;
            int    num_users   = 0;

            foreach (int user_id in relevant_users)
            {
                var correct_items = new HashSet <int>(test.UserMatrix[user_id]);
                correct_items.IntersectWith(relevant_items);

                // the number of items that are really relevant for this user
                var relevant_items_in_train = new HashSet <int>(train.UserMatrix[user_id]);
                relevant_items_in_train.IntersectWith(relevant_items);
                int num_eval_items = relevant_items.Count - relevant_items_in_train.Count();

                // skip all users that have 0 or #relevant_items test items
                if (correct_items.Count == 0)
                {
                    continue;
                }
                if (num_eval_items - correct_items.Count == 0)
                {
                    continue;
                }

                num_users++;
                int[] prediction = Prediction.PredictItems(recommender, user_id, relevant_items);

                auc_sum     += AUC(prediction, correct_items, train.UserMatrix[user_id]);
                map_sum     += MAP(prediction, correct_items, train.UserMatrix[user_id]);
                ndcg_sum    += NDCG(prediction, correct_items, train.UserMatrix[user_id]);
                prec_5_sum  += PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 5);
                prec_10_sum += PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 10);
                prec_15_sum += PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 15);

                if (prediction.Length != relevant_items.Count)
                {
                    throw new Exception("Not all items have been ranked.");
                }

                if (num_users % 1000 == 0)
                {
                    Console.Error.Write(".");
                }
                if (num_users % 20000 == 0)
                {
                    Console.Error.WriteLine();
                }
            }

            var result = new Dictionary <string, double>();

            result["AUC"]       = auc_sum / num_users;
            result["MAP"]       = map_sum / num_users;
            result["NDCG"]      = ndcg_sum / num_users;
            result["prec@5"]    = prec_5_sum / num_users;
            result["prec@10"]   = prec_10_sum / num_users;
            result["prec@15"]   = prec_15_sum / num_users;
            result["num_users"] = num_users;
            result["num_lists"] = num_users;
            result["num_items"] = relevant_items.Count;

            return(result);
        }
 public GetRecommendationItems(IItemRecommender ItemRecommender)
 {
     itemRecommeder = ItemRecommender;
 }
Esempio n. 5
0
    public static void Main(string[] args)
    {
        Assembly assembly = Assembly.GetExecutingAssembly();
        Assembly.LoadFile(Path.GetDirectoryName(assembly.Location) + Path.DirectorySeparatorChar + "MyMediaLiteExperimental.dll");

        AppDomain.CurrentDomain.UnhandledException += new UnhandledExceptionEventHandler(MyMediaLite.Util.Handlers.UnhandledExceptionHandler);
        Console.CancelKeyPress += new ConsoleCancelEventHandler(AbortHandler);

        // recommender arguments
        string method              = "MostPopular";
        string recommender_options = string.Empty;

        // help/version
        bool show_help    = false;
        bool show_version = false;

        // variables for iteration search
        int find_iter       = 0;
        int max_iter        = 500;
        double auc_cutoff   = 0;
        double prec5_cutoff = 0;
        compute_fit         = false;

        // other parameters
        string save_model_file        = string.Empty;
        string load_model_file        = string.Empty;
        int random_seed               = -1;
        string prediction_file        = string.Empty;
        test_ratio                    = 0;

           	var p = new OptionSet() {
            // string-valued options
            { "training-file=",       v => training_file          = v },
            { "test-file=",           v => test_file              = v },
            { "recommender=",         v => method                 = v },
            { "recommender-options=", v => recommender_options   += " " + v },
           			{ "data-dir=",            v => data_dir               = v },
            { "user-attributes=",     v => user_attributes_file   = v },
            { "item-attributes=",     v => item_attributes_file   = v },
            { "user-relations=",      v => user_relations_file    = v },
            { "item-relations=",      v => item_relations_file    = v },
            { "save-model=",          v => save_model_file        = v },
            { "load-model=",          v => load_model_file        = v },
            { "prediction-file=",     v => prediction_file        = v },
            { "relevant-users=",      v => relevant_users_file    = v },
            { "relevant-items=",      v => relevant_items_file    = v },
            // integer-valued options
           			{ "find-iter=",            (int v) => find_iter            = v },
            { "max-iter=",             (int v) => max_iter             = v },
            { "random-seed=",          (int v) => random_seed          = v },
            { "predict-items-number=", (int v) => predict_items_number = v },
            // double-valued options
        //			{ "epsilon=",             (double v) => epsilon      = v },
            { "auc-cutoff=",          (double v) => auc_cutoff   = v },
            { "prec5-cutoff=",        (double v) => prec5_cutoff = v },
            { "test-ratio=",          (double v) => test_ratio   = v },
            // enum options
            //   * currently none *
            // boolean options
            { "compute-fit",          v => compute_fit   = v != null },
            { "online-evaluation",    v => online_eval   = v != null },
            { "filtered-evaluation",  v => filtered_eval = v != null },
            { "help",                 v => show_help     = v != null },
            { "version",              v => show_version  = v != null },
           	  	};
           		IList<string> extra_args = p.Parse(args);

        if (show_version)
            ShowVersion();
        if (show_help)
            Usage(0);

        bool no_eval = test_file == null;

        if (training_file == null)
            Usage("Parameter --training-file=FILE is missing.");

        if (extra_args.Count > 0)
            Usage("Did not understand " + extra_args[0]);

        if (online_eval && filtered_eval)
            Usage("Combination of --online-eval and --filtered-eval is not (yet) supported.");

        if (random_seed != -1)
            MyMediaLite.Util.Random.InitInstance(random_seed);

        recommender = Recommender.CreateItemRecommender(method);
        if (recommender == null)
            Usage(string.Format("Unknown method: '{0}'", method));

        Recommender.Configure(recommender, recommender_options, Usage);

        // load all the data
        LoadData();
        Utils.DisplayDataStats(training_data, test_data, recommender);

        TimeSpan time_span;

        if (find_iter != 0)
        {
            var iterative_recommender = (IIterativeModel) recommender;
            Console.WriteLine(recommender.ToString() + " ");

            if (load_model_file == string.Empty)
                iterative_recommender.Train();
            else
                Recommender.LoadModel(iterative_recommender, load_model_file);

            if (compute_fit)
                Console.Write(string.Format(CultureInfo.InvariantCulture, "fit {0,0:0.#####} ", iterative_recommender.ComputeFit()));

            var result = Evaluate();
            Items.DisplayResults(result);
            Console.WriteLine(" iteration " + iterative_recommender.NumIter);

            for (int i = (int) iterative_recommender.NumIter + 1; i <= max_iter; i++)
            {
                TimeSpan t = Utils.MeasureTime(delegate() {
                    iterative_recommender.Iterate();
                });
                training_time_stats.Add(t.TotalSeconds);

                if (i % find_iter == 0)
                {
                    if (compute_fit)
                    {
                        double fit = 0;
                        t = Utils.MeasureTime(delegate() { fit = iterative_recommender.ComputeFit(); });
                        fit_time_stats.Add(t.TotalSeconds);
                        Console.Write(string.Format(CultureInfo.InvariantCulture, "fit {0,0:0.#####} ", fit));
                    }

                    t = Utils.MeasureTime(delegate() { result = Evaluate(); });
                    eval_time_stats.Add(t.TotalSeconds);
                    Items.DisplayResults(result);
                    Console.WriteLine(" iteration " + i);

                    Recommender.SaveModel(recommender, save_model_file, i);
                    Predict(prediction_file, relevant_users_file, i);

                    if (result["AUC"] < auc_cutoff || result["prec@5"] < prec5_cutoff)
                    {
                            Console.Error.WriteLine("Reached cutoff after {0} iterations.", i);
                            Console.Error.WriteLine("DONE");
                            break;
                    }
                }
            } // for
            DisplayStats();
        }
        else
        {
            if (load_model_file == string.Empty)
            {
                Console.Write(recommender.ToString() + " ");
                time_span = Utils.MeasureTime( delegate() { recommender.Train(); } );
                Console.Write("training_time " + time_span + " ");
            }
            else
            {
                Recommender.LoadModel(recommender, load_model_file);
                Console.Write(recommender.ToString() + " ");
                // TODO is this the right time to load the model?
            }

            if (prediction_file != string.Empty)
            {
                Predict(prediction_file, relevant_users_file);
            }
            else if (!no_eval)
            {
                if (online_eval)
                    time_span = Utils.MeasureTime( delegate() {
                        var result = Items.EvaluateOnline(recommender, test_data, training_data, relevant_users, relevant_items); // TODO support also for prediction outputs (to allow external evaluation)
                        Items.DisplayResults(result);
                    });
                else
                    time_span = Utils.MeasureTime( delegate() {
                        var result = Evaluate();
                        Items.DisplayResults(result);
                    });
                Console.Write(" testing_time " + time_span);
            }
            Console.WriteLine();
        }
        Recommender.SaveModel(recommender, save_model_file);
    }
Esempio n. 6
0
        // TODO consider micro- (by item) and macro-averaging (by user, the current thing)
        /// <summary>Online evaluation for rankings of items</summary>
        /// <remarks>
        /// </remarks>
        /// <param name="recommender">item recommender</param>
        /// <param name="test">test cases</param>
        /// <param name="train">training data (must be connected to the recommender's training data)</param>
        /// <param name="relevant_users">a collection of integers with all relevant users</param>
        /// <param name="relevant_items">a collection of integers with all relevant items</param>
        /// <returns>a dictionary containing the evaluation results (averaged by user)</returns>
        public static Dictionary<string, double> EvaluateOnline(
			IItemRecommender recommender,
			IPosOnlyFeedback test, IPosOnlyFeedback train,
		    ICollection<int> relevant_users, ICollection<int> relevant_items)
        {
            // for better handling, move test data points into arrays
            var users = new int[test.Count];
            var items = new int[test.Count];
            int pos = 0;
            foreach (int user_id in test.UserMatrix.NonEmptyRowIDs)
                foreach (int item_id in test.UserMatrix[user_id])
                {
                    users[pos] = user_id;
                    items[pos] = item_id;
                    pos++;
                }

            // random order of the test data points  // TODO chronological order
            var random_index = new int[test.Count];
            for (int index = 0; index < random_index.Length; index++)
                random_index[index] = index;
            Util.Utils.Shuffle<int>(random_index);

            var results_by_user = new Dictionary<int, Dictionary<string, double>>();

            foreach (int index in random_index)
            {
                if (relevant_users.Contains(users[index]) && relevant_items.Contains(items[index]))
                {
                    // evaluate user
                    var current_test = new PosOnlyFeedback<SparseBooleanMatrix>();
                    current_test.Add(users[index], items[index]);
                    var current_result = Evaluate(recommender, current_test, train, current_test.AllUsers, relevant_items);

                    if (current_result["num_users"] == 1)
                        if (results_by_user.ContainsKey(users[index]))
                        {
                            foreach (string measure in Measures)
                                results_by_user[users[index]][measure] += current_result[measure];
                            results_by_user[users[index]]["num_items"]++;
                        }
                        else
                        {
                            results_by_user[users[index]] = current_result;
                            results_by_user[users[index]]["num_items"] = 1;
                            results_by_user[users[index]].Remove("num_users");
                        }
                }

                // update recommender
                recommender.AddFeedback(users[index], items[index]);
            }

            var results = new Dictionary<string, double>();
            foreach (string measure in Measures)
                results[measure] = 0;

            foreach (int u in results_by_user.Keys)
                foreach (string measure in Measures)
                    results[measure] += results_by_user[u][measure] / results_by_user[u]["num_items"];

            foreach (string measure in Measures)
                results[measure] /= results_by_user.Count;

            results["num_users"] = results_by_user.Count;
            results["num_items"] = relevant_items.Count;
            results["num_lists"] = test.Count; // FIXME this is not exact

            return results;
        }
Esempio n. 7
0
        /// <summary>Evaluation for rankings of items</summary>
        /// <remarks>
        /// User-item combinations that appear in both sets are ignored for the test set, and thus in the evaluation.
        /// The evaluation measures are listed in the ItemPredictionMeasures property.
        /// Additionally, 'num_users' and 'num_items' report the number of users that were used to compute the results
        /// and the number of items that were taken into account.
        ///
        /// Literature:
        ///   C. Manning, P. Raghavan, H. Schütze: Introduction to Information Retrieval, Cambridge University Press, 2008
        /// </remarks>
        /// <param name="recommender">item recommender</param>
        /// <param name="test">test cases</param>
        /// <param name="train">training data</param>
        /// <param name="relevant_users">a collection of integers with all relevant users</param>
        /// <param name="relevant_items">a collection of integers with all relevant items</param>
        /// <returns>a dictionary containing the evaluation results</returns>
        public static Dictionary<string, double> Evaluate(
			IItemRecommender recommender,
			IPosOnlyFeedback test,
			IPosOnlyFeedback train,
		    ICollection<int> relevant_users,
			ICollection<int> relevant_items)
        {
            if (train.Overlap(test) > 0)
                Console.Error.WriteLine("WARNING: Overlapping train and test data");

            // compute evaluation measures
            double auc_sum     = 0;
            double map_sum     = 0;
            double prec_5_sum  = 0;
            double prec_10_sum = 0;
            double prec_15_sum = 0;
            double ndcg_sum    = 0;
            int num_users      = 0;

            foreach (int user_id in relevant_users)
            {
                var correct_items = new HashSet<int>(test.UserMatrix[user_id]);
                correct_items.IntersectWith(relevant_items);

                // the number of items that are really relevant for this user
                var relevant_items_in_train = new HashSet<int>(train.UserMatrix[user_id]);
                relevant_items_in_train.IntersectWith(relevant_items);
                int num_eval_items = relevant_items.Count - relevant_items_in_train.Count();

                // skip all users that have 0 or #relevant_items test items
                if (correct_items.Count == 0)
                    continue;
                if (num_eval_items - correct_items.Count == 0)
                    continue;

                num_users++;
                int[] prediction = Prediction.PredictItems(recommender, user_id, relevant_items);

                auc_sum     += AUC(prediction, correct_items, train.UserMatrix[user_id]);
                map_sum     += MAP(prediction, correct_items, train.UserMatrix[user_id]);
                ndcg_sum    += NDCG(prediction, correct_items, train.UserMatrix[user_id]);
                prec_5_sum  += PrecisionAt(prediction, correct_items, train.UserMatrix[user_id],  5);
                prec_10_sum += PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 10);
                prec_15_sum += PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 15);

                if (prediction.Length != relevant_items.Count)
                    throw new Exception("Not all items have been ranked.");

                if (num_users % 1000 == 0)
                    Console.Error.Write(".");
                if (num_users % 20000 == 0)
                    Console.Error.WriteLine();
            }

            var result = new Dictionary<string, double>();
            result["AUC"]       = auc_sum / num_users;
            result["MAP"]       = map_sum / num_users;
            result["NDCG"]      = ndcg_sum / num_users;
            result["prec@5"]    = prec_5_sum / num_users;
            result["prec@10"]   = prec_10_sum / num_users;
            result["prec@15"]   = prec_15_sum / num_users;
            result["num_users"] = num_users;
            result["num_lists"] = num_users;
            result["num_items"] = relevant_items.Count;

            return result;
        }
Esempio n. 8
0
		/// <summary>Evaluation for rankings of filtered items</summary>
		/// <remarks>
		/// </remarks>
		/// <param name="recommender">item recommender</param>
		/// <param name="test">test cases</param>
		/// <param name="train">training data</param>
		/// <param name="item_attributes">the item attributes to be used for filtering</param>
		/// <param name="relevant_users">a collection of integers with all relevant users</param>
		/// <param name="relevant_items">a collection of integers with all relevant items</param>
		/// <returns>a dictionary containing the evaluation results</returns>
		static public Dictionary<string, double> Evaluate(
			IItemRecommender recommender,
			IPosOnlyFeedback test,
			IPosOnlyFeedback train,
		    SparseBooleanMatrix item_attributes,
		    ICollection<int> relevant_users,
			ICollection<int> relevant_items)
		{
			if (train.Overlap(test) > 0)
				Console.Error.WriteLine("WARNING: Overlapping train and test data");

			SparseBooleanMatrix items_by_attribute = (SparseBooleanMatrix) item_attributes.Transpose();

			// compute evaluation measures
			double auc_sum     = 0;
			double map_sum     = 0;
			double prec_5_sum  = 0;
			double prec_10_sum = 0;
			double prec_15_sum = 0;
			double ndcg_sum    = 0;

			// for counting the users and the evaluation lists
			int num_lists = 0;
			int num_users = 0;
			int last_user_id = -1;

			foreach (int user_id in relevant_users)
			{
				var filtered_items = GetFilteredItems(user_id, test, item_attributes);

				foreach (int attribute_id in filtered_items.Keys)
				{
					// TODO optimize this a bit, currently it is quite naive
					var relevant_filtered_items = new HashSet<int>(items_by_attribute[attribute_id]);
					relevant_filtered_items.IntersectWith(relevant_items);

					var correct_items = new HashSet<int>(filtered_items[attribute_id]);
					correct_items.IntersectWith(relevant_filtered_items);

					// the number of items that are really relevant for this user
					var relevant_items_in_train = new HashSet<int>(train.UserMatrix[user_id]);
					relevant_items_in_train.IntersectWith(relevant_filtered_items);
					int num_eval_items = relevant_filtered_items.Count - relevant_items_in_train.Count();

					// skip all users that have 0 or #relevant_filtered_items test items
					if (correct_items.Count == 0)
						continue;
					if (num_eval_items - correct_items.Count == 0)
						continue;

					// counting stats
					num_lists++;
					if (last_user_id != user_id)
					{
						last_user_id = user_id;
						num_users++;
					}

					// evaluation
					int[] prediction = Prediction.PredictItems(recommender, user_id, relevant_filtered_items);

					auc_sum     += Items.AUC(prediction, correct_items, train.UserMatrix[user_id]);
					map_sum     += Items.MAP(prediction, correct_items, train.UserMatrix[user_id]);
					ndcg_sum    += Items.NDCG(prediction, correct_items, train.UserMatrix[user_id]);
					prec_5_sum  += Items.PrecisionAt(prediction, correct_items, train.UserMatrix[user_id],  5);
					prec_10_sum += Items.PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 10);
					prec_15_sum += Items.PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 15);

					if (prediction.Length != relevant_filtered_items.Count)
						throw new Exception("Not all items have been ranked.");

					if (num_lists % 1000 == 0)
						Console.Error.Write(".");
					if (num_lists % 20000 == 0)
						Console.Error.WriteLine();
				}
			}

			var result = new Dictionary<string, double>();
			result.Add("AUC",     auc_sum / num_lists);
			result.Add("MAP",     map_sum / num_lists);
			result.Add("NDCG",    ndcg_sum / num_lists);
			result.Add("prec@5",  prec_5_sum / num_lists);
			result.Add("prec@10", prec_10_sum / num_lists);
			result.Add("prec@15", prec_15_sum / num_lists);
			result.Add("num_users", num_users);
			result.Add("num_lists", num_lists);
			result.Add("num_items", relevant_items.Count);

			return result;
		}
Esempio n. 9
0
        /// <summary>Display data statistics for item recommendation datasets</summary>
        /// <param name="training_data">the training dataset</param>
        /// <param name="test_data">the test dataset</param>
        /// <param name="recommender">the recommender that will be used</param>
        public static void DisplayDataStats(IPosOnlyFeedback training_data, IPosOnlyFeedback test_data, IItemRecommender recommender)
        {
            // training data stats
            int    num_users   = training_data.AllUsers.Count;
            int    num_items   = training_data.AllItems.Count;
            long   matrix_size = (long)num_users * num_items;
            long   empty_size  = (long)matrix_size - training_data.Count;
            double sparsity    = (double)100L * empty_size / matrix_size;

            Console.WriteLine(string.Format(CultureInfo.InvariantCulture, "training data: {0} users, {1} items, {2} events, sparsity {3,0:0.#####}", num_users, num_items, training_data.Count, sparsity));

            // test data stats
            if (test_data != null)
            {
                num_users   = test_data.AllUsers.Count;
                num_items   = test_data.AllItems.Count;
                matrix_size = (long)num_users * num_items;
                empty_size  = (long)matrix_size - test_data.Count;
                sparsity    = (double)100L * empty_size / matrix_size;
                Console.WriteLine(string.Format(CultureInfo.InvariantCulture, "test data:     {0} users, {1} items, {2} events, sparsity {3,0:0.#####}", num_users, num_items, test_data.Count, sparsity));
            }

            // attribute stats
            if (recommender is IUserAttributeAwareRecommender)
            {
                Console.WriteLine("{0} user attributes for {1} users",
                                  ((IUserAttributeAwareRecommender)recommender).NumUserAttributes,
                                  ((IUserAttributeAwareRecommender)recommender).UserAttributes.NumberOfRows);
            }
            if (recommender is IItemAttributeAwareRecommender)
            {
                Console.WriteLine("{0} item attributes for {1} items",
                                  ((IItemAttributeAwareRecommender)recommender).NumItemAttributes,
                                  ((IItemAttributeAwareRecommender)recommender).ItemAttributes.NumberOfRows);
            }
        }
Esempio n. 10
0
        /// <summary>Evaluation for rankings of filtered items</summary>
        /// <remarks>
        /// </remarks>
        /// <param name="recommender">item recommender</param>
        /// <param name="test">test cases</param>
        /// <param name="train">training data</param>
        /// <param name="item_attributes">the item attributes to be used for filtering</param>
        /// <param name="relevant_users">a collection of integers with all relevant users</param>
        /// <param name="relevant_items">a collection of integers with all relevant items</param>
        /// <returns>a dictionary containing the evaluation results</returns>
        public static Dictionary<string, double> Evaluate(
			IItemRecommender recommender,
			IPosOnlyFeedback test,
			IPosOnlyFeedback train,
		    SparseBooleanMatrix item_attributes,
		    ICollection<int> relevant_users,
			ICollection<int> relevant_items)
        {
            if (train.Overlap(test) > 0)
                Console.Error.WriteLine("WARNING: Overlapping train and test data");

            SparseBooleanMatrix items_by_attribute = (SparseBooleanMatrix) item_attributes.Transpose();

            // compute evaluation measures
            double auc_sum     = 0;
            double map_sum     = 0;
            double prec_5_sum  = 0;
            double prec_10_sum = 0;
            double prec_15_sum = 0;
            double ndcg_sum    = 0;

            // for counting the users and the evaluation lists
            int num_lists = 0;
            int num_users = 0;
            int last_user_id = -1;

            foreach (int user_id in relevant_users)
            {
                var filtered_items = GetFilteredItems(user_id, test, item_attributes);

                foreach (int attribute_id in filtered_items.Keys)
                {
                    // TODO optimize this a bit, currently it is quite naive
                    var relevant_filtered_items = new HashSet<int>(items_by_attribute[attribute_id]);
                    relevant_filtered_items.IntersectWith(relevant_items);

                    var correct_items = new HashSet<int>(filtered_items[attribute_id]);
                    correct_items.IntersectWith(relevant_filtered_items);

                    // the number of items that are really relevant for this user
                    var relevant_items_in_train = new HashSet<int>(train.UserMatrix[user_id]);
                    relevant_items_in_train.IntersectWith(relevant_filtered_items);
                    int num_eval_items = relevant_filtered_items.Count - relevant_items_in_train.Count();

                    // skip all users that have 0 or #relevant_filtered_items test items
                    if (correct_items.Count == 0)
                        continue;
                    if (num_eval_items - correct_items.Count == 0)
                        continue;

                    // counting stats
                    num_lists++;
                    if (last_user_id != user_id)
                    {
                        last_user_id = user_id;
                        num_users++;
                    }

                    // evaluation
                    int[] prediction = Prediction.PredictItems(recommender, user_id, relevant_filtered_items);

                    auc_sum     += Items.AUC(prediction, correct_items, train.UserMatrix[user_id]);
                    map_sum     += Items.MAP(prediction, correct_items, train.UserMatrix[user_id]);
                    ndcg_sum    += Items.NDCG(prediction, correct_items, train.UserMatrix[user_id]);
                    prec_5_sum  += Items.PrecisionAt(prediction, correct_items, train.UserMatrix[user_id],  5);
                    prec_10_sum += Items.PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 10);
                    prec_15_sum += Items.PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 15);

                    if (prediction.Length != relevant_filtered_items.Count)
                        throw new Exception("Not all items have been ranked.");

                    if (num_lists % 1000 == 0)
                        Console.Error.Write(".");
                    if (num_lists % 20000 == 0)
                        Console.Error.WriteLine();
                }
            }

            var result = new Dictionary<string, double>();
            result.Add("AUC",     auc_sum / num_lists);
            result.Add("MAP",     map_sum / num_lists);
            result.Add("NDCG",    ndcg_sum / num_lists);
            result.Add("prec@5",  prec_5_sum / num_lists);
            result.Add("prec@10", prec_10_sum / num_lists);
            result.Add("prec@15", prec_15_sum / num_lists);
            result.Add("num_users", num_users);
            result.Add("num_lists", num_lists);
            result.Add("num_items", relevant_items.Count);

            return result;
        }