/// <summary>Evaluate on the folds of a dataset split</summary> /// <param name="recommender">a rating predictor</param> /// <param name="num_folds">the number of folds</param> /// <param name="compute_fit">if set to true measure fit on the training data as well</param> /// <param name="show_fold_results">if set to true to print per-fold results to STDERR</param> /// <returns>a dictionary containing the average results over the different folds of the split</returns> public static RatingPredictionEvaluationResults DoCrossValidation( this RatingPredictor recommender, uint num_folds = 5, bool compute_fit = false, bool show_fold_results = false) { var split = new RatingCrossValidationSplit(recommender.Ratings, num_folds); return recommender.DoCrossValidation(split, compute_fit, show_fold_results); }
/// <summary>Evaluate on the folds of a dataset split</summary> /// <param name="recommender">a rating predictor</param> /// <param name="num_folds">the number of folds</param> /// <param name="compute_fit">if set to true measure fit on the training data as well</param> /// <param name="show_fold_results">if set to true to print per-fold results to STDERR</param> /// <returns>a dictionary containing the average results over the different folds of the split</returns> static public RatingPredictionEvaluationResults DoCrossValidation( this RatingPredictor recommender, uint num_folds = 5, bool compute_fit = false, bool show_fold_results = false) { var split = new RatingCrossValidationSplit(recommender.Ratings, num_folds); return(recommender.DoCrossValidation(split, compute_fit, show_fold_results)); }
/// <summary>Evaluate an iterative recommender on the folds of a dataset split, display results on STDOUT</summary> /// <param name="recommender">a rating predictor</param> /// <param name="num_folds">the number of folds</param> /// <param name="max_iter">the maximum number of iterations</param> /// <param name="find_iter">the report interval</param> /// <param name="show_fold_results">if set to true to print per-fold results to STDERR</param> static public void DoIterativeCrossValidation( this RatingPredictor recommender, uint num_folds, uint max_iter, uint find_iter = 1, bool show_fold_results = false) { var split = new RatingCrossValidationSplit(recommender.Ratings, num_folds); recommender.DoIterativeCrossValidation(split, max_iter, find_iter, show_fold_results); }
/// <summary>Evaluate on the folds of a dataset split</summary> /// <param name="recommender">an item recommender</param> /// <param name="num_folds">the number of folds</param> /// <param name="candidate_items">a collection of integers with all candidate items</param> /// <param name="candidate_item_mode">the mode used to determine the candidate items</param> /// <param name="compute_fit">if set to true measure fit on the training data as well</param> /// <param name="show_results">set to true to print results to STDERR</param> /// <returns>a dictionary containing the average results over the different folds of the split</returns> static public EvaluationResults DoRatingBasedRankingCrossValidation( this RatingPredictor recommender, uint num_folds, IList<int> candidate_items, CandidateItems candidate_item_mode = CandidateItems.OVERLAP, bool compute_fit = false, bool show_results = false) { var split = new RatingCrossValidationSplit(recommender.Ratings, num_folds); return recommender.DoRatingBasedRankingCrossValidation(split, candidate_items, candidate_item_mode, compute_fit, show_results); }
/// <summary>Evaluate on the folds of a dataset split</summary> /// <param name="recommender">an item recommender</param> /// <param name="num_folds">the number of folds</param> /// <param name="candidate_items">a collection of integers with all candidate items</param> /// <param name="candidate_item_mode">the mode used to determine the candidate items</param> /// <param name="compute_fit">if set to true measure fit on the training data as well</param> /// <param name="show_results">set to true to print results to STDERR</param> /// <returns>a dictionary containing the average results over the different folds of the split</returns> static public EvaluationResults DoRatingBasedRankingCrossValidation( this RatingPredictor recommender, uint num_folds, IList <int> candidate_items, CandidateItems candidate_item_mode = CandidateItems.OVERLAP, bool compute_fit = false, bool show_results = false) { var split = new RatingCrossValidationSplit(recommender.Ratings, num_folds); return(recommender.DoRatingBasedRankingCrossValidation(split, candidate_items, candidate_item_mode, compute_fit, show_results)); }
/// <summary>Find the the parameters resulting in the minimal results for a given evaluation measure using k-fold cross-validation</summary> /// <remarks>The recommender will be set to the best parameter value after calling this method.</remarks> /// <param name="evaluation_measure">the name of the evaluation measure</param> /// <param name="hyperparameter_name">the name of the hyperparameter to optimize</param> /// <param name="hyperparameter_values">the values of the hyperparameter to try out</param> /// <param name="recommender">the recommender</param> /// <param name="k">the number of folds to be used for cross-validation</param> /// <returns>the best (lowest) average value for the hyperparameter</returns> public static double FindMinimum(string evaluation_measure, string hyperparameter_name, double[] hyperparameter_values, RatingPrediction.RatingPredictor recommender, int k) { var data = recommender.Ratings; var split = new RatingCrossValidationSplit(data, k); double result = FindMinimum(evaluation_measure, hyperparameter_name, hyperparameter_values, recommender, split); recommender.Ratings = data; return(result); }
/// <summary>Evaluate an iterative recommender on the folds of a dataset split, display results on STDOUT</summary> /// <param name="recommender">an item recommender</param> /// <param name="num_folds">the number of folds</param> /// <param name="test_users">a collection of integers with all test users</param> /// <param name="candidate_items">a collection of integers with all candidate items</param> /// <param name="candidate_item_mode">the mode used to determine the candidate items</param> /// <param name="repeated_events">allow repeated events in the evaluation (i.e. items accessed by a user before may be in the recommended list)</param> /// <param name="max_iter">the maximum number of iterations</param> /// <param name="find_iter">the report interval</param> /// <param name="show_fold_results">if set to true to print per-fold results to STDERR</param> static public void DoRatingBasedRankingIterativeCrossValidation( this RatingPredictor recommender, uint num_folds, IList <int> test_users, IList <int> candidate_items, CandidateItems candidate_item_mode, RepeatedEvents repeated_events, uint max_iter, uint find_iter = 1, bool show_fold_results = false) { var split = new RatingCrossValidationSplit(recommender.Ratings, num_folds); recommender.DoRatingBasedRankingIterativeCrossValidation(split, test_users, candidate_items, candidate_item_mode, repeated_events, max_iter, find_iter); }
/// <summary>Evaluate an iterative recommender on the folds of a dataset split, display results on STDOUT</summary> /// <param name="recommender">a rating predictor</param> /// <param name="num_folds">the number of folds</param> /// <param name="max_iter">the maximum number of iterations</param> /// <param name="find_iter">the report interval</param> /// <param name="show_fold_results">if set to true to print per-fold results to STDERR</param> public static void DoIterativeCrossValidation( this RatingPredictor recommender, uint num_folds, int max_iter, int find_iter = 1, bool show_fold_results = false) { var split = new RatingCrossValidationSplit(recommender.Ratings, num_folds); recommender.DoIterativeCrossValidation(split, max_iter, find_iter, show_fold_results); }
/// <summary>Find the the parameters resulting in the minimal results for a given evaluation measure using k-fold cross-validation</summary> /// <remarks>The recommender will be set to the best parameter value after calling this method.</remarks> /// <param name="evaluation_measure">the name of the evaluation measure</param> /// <param name="hyperparameter_name">the name of the hyperparameter to optimize</param> /// <param name="hyperparameter_values">the values of the hyperparameter to try out</param> /// <param name="recommender">the recommender</param> /// <param name="k">the number of folds to be used for cross-validation</param> /// <returns>the best (lowest) average value for the hyperparameter</returns> public static double FindMinimum( string evaluation_measure, string hyperparameter_name, double[] hyperparameter_values, RatingPrediction.RatingPredictor recommender, uint k) { var data = recommender.Ratings; var split = new RatingCrossValidationSplit(data, k); double result = FindMinimum(evaluation_measure, hyperparameter_name, hyperparameter_values, recommender, split); recommender.Ratings = data; return result; }
static void Main(string[] args) { Assembly assembly = Assembly.GetExecutingAssembly(); Assembly.LoadFile(Path.GetDirectoryName(assembly.Location) + Path.DirectorySeparatorChar + "MyMediaLiteExperimental.dll"); AppDomain.CurrentDomain.UnhandledException += new UnhandledExceptionEventHandler(Handlers.UnhandledExceptionHandler); Console.CancelKeyPress += new ConsoleCancelEventHandler(AbortHandler); // recommender arguments string method = "BiasedMatrixFactorization"; string recommender_options = string.Empty; // help/version bool show_help = false; bool show_version = false; // arguments for iteration search int find_iter = 0; int max_iter = 500; double epsilon = 0; double rmse_cutoff = double.MaxValue; double mae_cutoff = double.MaxValue; // data arguments string data_dir = string.Empty; string user_attributes_file = string.Empty; string item_attributes_file = string.Empty; string user_relations_file = string.Empty; string item_relations_file = string.Empty; // other arguments bool online_eval = false; bool search_hp = false; string save_model_file = string.Empty; string load_model_file = string.Empty; int random_seed = -1; string prediction_file = string.Empty; string prediction_line = "{0}\t{1}\t{2}"; int cross_validation = 0; double split_ratio = 0; var p = new OptionSet() { // string-valued options { "training-file=", v => training_file = v }, { "test-file=", v => test_file = v }, { "recommender=", v => method = v }, { "recommender-options=", v => recommender_options += " " + v }, { "data-dir=", v => data_dir = v }, { "user-attributes=", v => user_attributes_file = v }, { "item-attributes=", v => item_attributes_file = v }, { "user-relations=", v => user_relations_file = v }, { "item-relations=", v => item_relations_file = v }, { "save-model=", v => save_model_file = v }, { "load-model=", v => load_model_file = v }, { "prediction-file=", v => prediction_file = v }, { "prediction-line=", v => prediction_line = v }, // integer-valued options { "find-iter=", (int v) => find_iter = v }, { "max-iter=", (int v) => max_iter = v }, { "random-seed=", (int v) => random_seed = v }, { "cross-validation=", (int v) => cross_validation = v }, // double-valued options { "epsilon=", (double v) => epsilon = v }, { "rmse-cutoff=", (double v) => rmse_cutoff = v }, { "mae-cutoff=", (double v) => mae_cutoff = v }, { "split-ratio=", (double v) => split_ratio = v }, // enum options { "rating-type=", (RatingType v) => rating_type = v }, { "file-format=", (RatingFileFormat v) => file_format = v }, // boolean options { "compute-fit", v => compute_fit = v != null }, { "online-evaluation", v => online_eval = v != null }, { "search-hp", v => search_hp = v != null }, { "help", v => show_help = v != null }, { "version", v => show_version = v != null }, }; IList<string> extra_args = p.Parse(args); // TODO make sure interaction of --find-iter and --cross-validation works properly bool no_eval = test_file == null; if (show_version) ShowVersion(); if (show_help) Usage(0); if (extra_args.Count > 0) Usage("Did not understand " + extra_args[0]); if (training_file == null) Usage("Parameter --training-file=FILE is missing."); if (cross_validation != 0 && split_ratio != 0) Usage("--cross-validation=K and --split-ratio=NUM are mutually exclusive."); if (random_seed != -1) MyMediaLite.Util.Random.InitInstance(random_seed); recommender = Recommender.CreateRatingPredictor(method); if (recommender == null) Usage(string.Format("Unknown method: '{0}'", method)); Recommender.Configure(recommender, recommender_options, Usage); // ID mapping objects if (file_format == RatingFileFormat.KDDCUP_2011) { user_mapping = new IdentityMapping(); item_mapping = new IdentityMapping(); } // load all the data LoadData(data_dir, user_attributes_file, item_attributes_file, user_relations_file, item_relations_file, !online_eval); Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "ratings range: [{0}, {1}]", recommender.MinRating, recommender.MaxRating)); if (split_ratio > 0) { var split = new RatingsSimpleSplit(training_data, split_ratio); recommender.Ratings = split.Train[0]; training_data = split.Train[0]; test_data = split.Test[0]; } Utils.DisplayDataStats(training_data, test_data, recommender); if (find_iter != 0) { if ( !(recommender is IIterativeModel) ) Usage("Only iterative recommenders support find_iter."); var iterative_recommender = (IIterativeModel) recommender; Console.WriteLine(recommender.ToString() + " "); if (load_model_file == string.Empty) recommender.Train(); else Recommender.LoadModel(iterative_recommender, load_model_file); if (compute_fit) Console.Write(string.Format(CultureInfo.InvariantCulture, "fit {0,0:0.#####} ", iterative_recommender.ComputeFit())); MyMediaLite.Eval.Ratings.DisplayResults(MyMediaLite.Eval.Ratings.Evaluate(recommender, test_data)); Console.WriteLine(" iteration " + iterative_recommender.NumIter); for (int i = (int) iterative_recommender.NumIter + 1; i <= max_iter; i++) { TimeSpan time = Utils.MeasureTime(delegate() { iterative_recommender.Iterate(); }); training_time_stats.Add(time.TotalSeconds); if (i % find_iter == 0) { if (compute_fit) { double fit = 0; time = Utils.MeasureTime(delegate() { fit = iterative_recommender.ComputeFit(); }); fit_time_stats.Add(time.TotalSeconds); Console.Write(string.Format(CultureInfo.InvariantCulture, "fit {0,0:0.#####} ", fit)); } Dictionary<string, double> results = null; time = Utils.MeasureTime(delegate() { results = MyMediaLite.Eval.Ratings.Evaluate(recommender, test_data); }); eval_time_stats.Add(time.TotalSeconds); MyMediaLite.Eval.Ratings.DisplayResults(results); rmse_eval_stats.Add(results["RMSE"]); Console.WriteLine(" iteration " + i); Recommender.SaveModel(recommender, save_model_file, i); if (prediction_file != string.Empty) Prediction.WritePredictions(recommender, test_data, user_mapping, item_mapping, prediction_line, prediction_file + "-it-" + i); if (epsilon > 0.0 && results["RMSE"] - rmse_eval_stats.Min() > epsilon) { Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "{0} >> {1}", results["RMSE"], rmse_eval_stats.Min())); Console.Error.WriteLine("Reached convergence on training/validation data after {0} iterations.", i); break; } if (results["RMSE"] > rmse_cutoff || results["MAE"] > mae_cutoff) { Console.Error.WriteLine("Reached cutoff after {0} iterations.", i); break; } } } // for DisplayStats(); } else { TimeSpan seconds; if (load_model_file == string.Empty) { if (cross_validation > 0) { Console.Write(recommender.ToString()); Console.WriteLine(); var split = new RatingCrossValidationSplit(training_data, cross_validation); var results = MyMediaLite.Eval.Ratings.EvaluateOnSplit(recommender, split); // TODO if (search_hp) MyMediaLite.Eval.Ratings.DisplayResults(results); no_eval = true; recommender.Ratings = training_data; } else { if (search_hp) { // TODO --search-hp-criterion=RMSE double result = NelderMead.FindMinimum("RMSE", recommender); Console.Error.WriteLine("estimated quality (on split) {0}", result.ToString(CultureInfo.InvariantCulture)); // TODO give out hp search time } Console.Write(recommender.ToString()); seconds = Utils.MeasureTime( delegate() { recommender.Train(); } ); Console.Write(" training_time " + seconds + " "); } } else { Recommender.LoadModel(recommender, load_model_file); Console.Write(recommender.ToString() + " "); } if (!no_eval) { if (online_eval) // TODO support also for prediction outputs (to allow external evaluation) seconds = Utils.MeasureTime(delegate() { MyMediaLite.Eval.Ratings.DisplayResults(MyMediaLite.Eval.Ratings.EvaluateOnline(recommender, test_data)); }); else seconds = Utils.MeasureTime(delegate() { MyMediaLite.Eval.Ratings.DisplayResults(MyMediaLite.Eval.Ratings.Evaluate(recommender, test_data)); }); Console.Write(" testing_time " + seconds); } if (compute_fit) { Console.Write("fit "); seconds = Utils.MeasureTime(delegate() { MyMediaLite.Eval.Ratings.DisplayResults(MyMediaLite.Eval.Ratings.Evaluate(recommender, training_data)); }); Console.Write(string.Format(CultureInfo.InvariantCulture, " fit_time {0,0:0.#####} ", seconds)); } if (prediction_file != string.Empty) { seconds = Utils.MeasureTime(delegate() { Console.WriteLine(); Prediction.WritePredictions(recommender, test_data, user_mapping, item_mapping, prediction_line, prediction_file); }); Console.Error.Write("predicting_time " + seconds); } Console.WriteLine(); Console.Error.WriteLine("memory {0}", Memory.Usage); } Recommender.SaveModel(recommender, save_model_file); }
static void Main(string[] args) { Assembly assembly = Assembly.GetExecutingAssembly(); Assembly.LoadFile(Path.GetDirectoryName(assembly.Location) + Path.DirectorySeparatorChar + "MyMediaLiteExperimental.dll"); AppDomain.CurrentDomain.UnhandledException += new UnhandledExceptionEventHandler(Handlers.UnhandledExceptionHandler); Console.CancelKeyPress += new ConsoleCancelEventHandler(AbortHandler); // recommender arguments string method = "BiasedMatrixFactorization"; string recommender_options = string.Empty; // help/version bool show_help = false; bool show_version = false; // arguments for iteration search int find_iter = 0; int max_iter = 500; double epsilon = 0; double rmse_cutoff = double.MaxValue; double mae_cutoff = double.MaxValue; // data arguments string data_dir = string.Empty; string user_attributes_file = string.Empty; string item_attributes_file = string.Empty; string user_relations_file = string.Empty; string item_relations_file = string.Empty; // other arguments bool online_eval = false; bool search_hp = false; string save_model_file = string.Empty; string load_model_file = string.Empty; int random_seed = -1; string prediction_file = string.Empty; string prediction_line = "{0}\t{1}\t{2}"; int cross_validation = 0; double split_ratio = 0; var p = new OptionSet() { // string-valued options { "training-file=", v => training_file = v }, { "test-file=", v => test_file = v }, { "recommender=", v => method = v }, { "recommender-options=", v => recommender_options += " " + v }, { "data-dir=", v => data_dir = v }, { "user-attributes=", v => user_attributes_file = v }, { "item-attributes=", v => item_attributes_file = v }, { "user-relations=", v => user_relations_file = v }, { "item-relations=", v => item_relations_file = v }, { "save-model=", v => save_model_file = v }, { "load-model=", v => load_model_file = v }, { "prediction-file=", v => prediction_file = v }, { "prediction-line=", v => prediction_line = v }, // integer-valued options { "find-iter=", (int v) => find_iter = v }, { "max-iter=", (int v) => max_iter = v }, { "random-seed=", (int v) => random_seed = v }, { "cross-validation=", (int v) => cross_validation = v }, // double-valued options { "epsilon=", (double v) => epsilon = v }, { "rmse-cutoff=", (double v) => rmse_cutoff = v }, { "mae-cutoff=", (double v) => mae_cutoff = v }, { "split-ratio=", (double v) => split_ratio = v }, // enum options { "rating-type=", (RatingType v) => rating_type = v }, { "file-format=", (RatingFileFormat v) => file_format = v }, // boolean options { "compute-fit", v => compute_fit = v != null }, { "online-evaluation", v => online_eval = v != null }, { "search-hp", v => search_hp = v != null }, { "help", v => show_help = v != null }, { "version", v => show_version = v != null }, }; IList <string> extra_args = p.Parse(args); // TODO make sure interaction of --find-iter and --cross-validation works properly bool no_eval = test_file == null; if (show_version) { ShowVersion(); } if (show_help) { Usage(0); } if (extra_args.Count > 0) { Usage("Did not understand " + extra_args[0]); } if (training_file == null) { Usage("Parameter --training-file=FILE is missing."); } if (cross_validation != 0 && split_ratio != 0) { Usage("--cross-validation=K and --split-ratio=NUM are mutually exclusive."); } if (random_seed != -1) { MyMediaLite.Util.Random.InitInstance(random_seed); } recommender = Recommender.CreateRatingPredictor(method); if (recommender == null) { Usage(string.Format("Unknown method: '{0}'", method)); } Recommender.Configure(recommender, recommender_options, Usage); // ID mapping objects if (file_format == RatingFileFormat.KDDCUP_2011) { user_mapping = new IdentityMapping(); item_mapping = new IdentityMapping(); } // load all the data LoadData(data_dir, user_attributes_file, item_attributes_file, user_relations_file, item_relations_file, !online_eval); Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "ratings range: [{0}, {1}]", recommender.MinRating, recommender.MaxRating)); if (split_ratio > 0) { var split = new RatingsSimpleSplit(training_data, split_ratio); recommender.Ratings = split.Train[0]; training_data = split.Train[0]; test_data = split.Test[0]; } Utils.DisplayDataStats(training_data, test_data, recommender); if (find_iter != 0) { if (!(recommender is IIterativeModel)) { Usage("Only iterative recommenders support find_iter."); } var iterative_recommender = (IIterativeModel)recommender; Console.WriteLine(recommender.ToString() + " "); if (load_model_file == string.Empty) { recommender.Train(); } else { Recommender.LoadModel(iterative_recommender, load_model_file); } if (compute_fit) { Console.Write(string.Format(CultureInfo.InvariantCulture, "fit {0,0:0.#####} ", iterative_recommender.ComputeFit())); } MyMediaLite.Eval.Ratings.DisplayResults(MyMediaLite.Eval.Ratings.Evaluate(recommender, test_data)); Console.WriteLine(" iteration " + iterative_recommender.NumIter); for (int i = (int)iterative_recommender.NumIter + 1; i <= max_iter; i++) { TimeSpan time = Utils.MeasureTime(delegate() { iterative_recommender.Iterate(); }); training_time_stats.Add(time.TotalSeconds); if (i % find_iter == 0) { if (compute_fit) { double fit = 0; time = Utils.MeasureTime(delegate() { fit = iterative_recommender.ComputeFit(); }); fit_time_stats.Add(time.TotalSeconds); Console.Write(string.Format(CultureInfo.InvariantCulture, "fit {0,0:0.#####} ", fit)); } Dictionary <string, double> results = null; time = Utils.MeasureTime(delegate() { results = MyMediaLite.Eval.Ratings.Evaluate(recommender, test_data); }); eval_time_stats.Add(time.TotalSeconds); MyMediaLite.Eval.Ratings.DisplayResults(results); rmse_eval_stats.Add(results["RMSE"]); Console.WriteLine(" iteration " + i); Recommender.SaveModel(recommender, save_model_file, i); if (prediction_file != string.Empty) { Prediction.WritePredictions(recommender, test_data, user_mapping, item_mapping, prediction_line, prediction_file + "-it-" + i); } if (epsilon > 0.0 && results["RMSE"] - rmse_eval_stats.Min() > epsilon) { Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "{0} >> {1}", results["RMSE"], rmse_eval_stats.Min())); Console.Error.WriteLine("Reached convergence on training/validation data after {0} iterations.", i); break; } if (results["RMSE"] > rmse_cutoff || results["MAE"] > mae_cutoff) { Console.Error.WriteLine("Reached cutoff after {0} iterations.", i); break; } } } // for DisplayStats(); } else { TimeSpan seconds; if (load_model_file == string.Empty) { if (cross_validation > 0) { Console.Write(recommender.ToString()); Console.WriteLine(); var split = new RatingCrossValidationSplit(training_data, cross_validation); var results = MyMediaLite.Eval.Ratings.EvaluateOnSplit(recommender, split); // TODO if (search_hp) MyMediaLite.Eval.Ratings.DisplayResults(results); no_eval = true; recommender.Ratings = training_data; } else { if (search_hp) { // TODO --search-hp-criterion=RMSE double result = NelderMead.FindMinimum("RMSE", recommender); Console.Error.WriteLine("estimated quality (on split) {0}", result.ToString(CultureInfo.InvariantCulture)); // TODO give out hp search time } Console.Write(recommender.ToString()); seconds = Utils.MeasureTime(delegate() { recommender.Train(); }); Console.Write(" training_time " + seconds + " "); } } else { Recommender.LoadModel(recommender, load_model_file); Console.Write(recommender.ToString() + " "); } if (!no_eval) { if (online_eval) // TODO support also for prediction outputs (to allow external evaluation) { seconds = Utils.MeasureTime(delegate() { MyMediaLite.Eval.Ratings.DisplayResults(MyMediaLite.Eval.Ratings.EvaluateOnline(recommender, test_data)); }); } else { seconds = Utils.MeasureTime(delegate() { MyMediaLite.Eval.Ratings.DisplayResults(MyMediaLite.Eval.Ratings.Evaluate(recommender, test_data)); }); } Console.Write(" testing_time " + seconds); } if (compute_fit) { Console.Write("fit "); seconds = Utils.MeasureTime(delegate() { MyMediaLite.Eval.Ratings.DisplayResults(MyMediaLite.Eval.Ratings.Evaluate(recommender, training_data)); }); Console.Write(string.Format(CultureInfo.InvariantCulture, " fit_time {0,0:0.#####} ", seconds)); } if (prediction_file != string.Empty) { seconds = Utils.MeasureTime(delegate() { Console.WriteLine(); Prediction.WritePredictions(recommender, test_data, user_mapping, item_mapping, prediction_line, prediction_file); }); Console.Error.Write("predicting_time " + seconds); } Console.WriteLine(); Console.Error.WriteLine("memory {0}", Memory.Usage); } Recommender.SaveModel(recommender, save_model_file); }
static void DoTrack1() { var rating_predictor_validate = recommender as RatingPredictor; var rating_predictor_final = rating_predictor_validate.Clone() as RatingPredictor; rating_predictor_final.Ratings = complete_ratings; Console.WriteLine("Validation split:"); Utils.DisplayDataStats(training_ratings, validation_ratings, rating_predictor_validate); Console.WriteLine("Test split:"); Utils.DisplayDataStats(complete_ratings, test_data, rating_predictor_final); if (find_iter != 0) { if (!(recommender is IIterativeModel)) { Usage("Only iterative recommenders support find_iter."); } IIterativeModel iterative_recommender_validate = (MatrixFactorization)rating_predictor_validate; IIterativeModel iterative_recommender_final = (MatrixFactorization)rating_predictor_final; Console.WriteLine(recommender.ToString() + " "); if (load_model_file == string.Empty) { iterative_recommender_validate.Train(); iterative_recommender_final.Train(); } else { Recommender.LoadModel(rating_predictor_final, "final-" + load_model_file); } if (compute_fit) { Console.Write(string.Format(CultureInfo.InvariantCulture, "fit {0:0.#####} ", iterative_recommender_validate.ComputeFit())); } MyMediaLite.Eval.Ratings.DisplayResults(MyMediaLite.Eval.Ratings.Evaluate(rating_predictor_validate, validation_ratings)); Console.WriteLine(" " + iterative_recommender_validate.NumIter); for (int i = (int)iterative_recommender_validate.NumIter + 1; i <= max_iter; i++) { TimeSpan time = Utils.MeasureTime(delegate() { iterative_recommender_validate.Iterate(); iterative_recommender_final.Iterate(); // TODO parallelize this }); training_time_stats.Add(time.TotalSeconds); if (i % find_iter == 0) { if (compute_fit) { double fit = 0; time = Utils.MeasureTime(delegate() { fit = iterative_recommender_validate.ComputeFit(); }); fit_time_stats.Add(time.TotalSeconds); Console.Write(string.Format(CultureInfo.InvariantCulture, "fit {0:0.#####} ", fit)); } // evaluate and save stats // TODO parallelize Dictionary <string, double> results = null; time = Utils.MeasureTime(delegate() { results = MyMediaLite.Eval.Ratings.Evaluate(rating_predictor_validate, validation_ratings); MyMediaLite.Eval.Ratings.DisplayResults(results); rmse_eval_stats.Add(results["RMSE"]); Console.WriteLine(" " + i); }); eval_time_stats.Add(time.TotalSeconds); // write out model files and predictions if (save_model_file != string.Empty) { Recommender.SaveModel(rating_predictor_validate, save_model_file + "-validate", i); Recommender.SaveModel(rating_predictor_final, save_model_file, i); } if (prediction_file != string.Empty) { if (track2) { KDDCup.PredictRatingsDouble(rating_predictor_validate, validation_candidates, prediction_file + "-validate-it-" + i); KDDCup.PredictRatingsDouble(rating_predictor_final, test_data, prediction_file + "-it-" + i); } else { KDDCup.PredictRatings(rating_predictor_validate, validation_ratings, prediction_file + "-validate-it-" + i); KDDCup.PredictRatings(rating_predictor_final, test_data, prediction_file + "-it-" + i); } } // check whether we should abort if (epsilon > 0 && results["RMSE"] > rmse_eval_stats.Min() + epsilon) { Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "{0} >> {1}", results["RMSE"], rmse_eval_stats.Min())); Console.Error.WriteLine("Reached convergence on training/validation data after {0} iterations.", i); break; } if (results["RMSE"] > rmse_cutoff || results["MAE"] > mae_cutoff) { Console.Error.WriteLine("Reached cutoff after {0} iterations.", i); break; } } } // for DisplayIterationStats(); Recommender.SaveModel(recommender, save_model_file); } else { TimeSpan seconds; if (!no_eval) { if (load_model_file == string.Empty) { Console.Write(recommender.ToString()); if (cross_validation > 0) // TODO cross-validation could also be performed on the complete dataset { // TODO support track2 Console.WriteLine(); var split = new RatingCrossValidationSplit(training_ratings, cross_validation); var results = MyMediaLite.Eval.Ratings.EvaluateOnSplit(rating_predictor_validate, split); MyMediaLite.Eval.Ratings.DisplayResults(results); no_eval = true; rating_predictor_validate.Ratings = training_ratings; } else { seconds = Utils.MeasureTime(delegate() { recommender.Train(); }); Console.Write(" training_time " + seconds + " "); Recommender.SaveModel(recommender, save_model_file); } } Console.Write(recommender.ToString() + " "); seconds = Utils.MeasureTime( delegate() { MyMediaLite.Eval.Ratings.DisplayResults(MyMediaLite.Eval.Ratings.Evaluate(rating_predictor_validate, validation_ratings)); } ); Console.Write(" testing_time " + seconds); } Console.WriteLine(); if (prediction_file != string.Empty) { Console.WriteLine("Prediction for KDD Cup Track 1:"); seconds = Utils.MeasureTime(delegate() { rating_predictor_final.Train(); }); Console.Write(" training_time " + seconds + " "); if (save_model_file != string.Empty) { Recommender.SaveModel(rating_predictor_validate, save_model_file + "-validate"); Recommender.SaveModel(rating_predictor_final, save_model_file); } Console.WriteLine(); seconds = Utils.MeasureTime(delegate() { KDDCup.PredictRatingsDouble(rating_predictor_final, test_data, prediction_file); if (track2) { KDDCup.PredictRatingsDouble(rating_predictor_validate, validation_candidates, prediction_file + "-validate"); } else { KDDCup.PredictRatings(rating_predictor_validate, validation_ratings, prediction_file + "-validate"); } }); Console.Error.WriteLine("predicting_time " + seconds); } } }
/// <summary>Evaluate an iterative recommender on the folds of a dataset split, display results on STDOUT</summary> /// <param name="recommender">an item recommender</param> /// <param name="num_folds">the number of folds</param> /// <param name="test_users">a collection of integers with all test users</param> /// <param name="candidate_items">a collection of integers with all candidate items</param> /// <param name="candidate_item_mode">the mode used to determine the candidate items</param> /// <param name="repeated_events">allow repeated events in the evaluation (i.e. items accessed by a user before may be in the recommended list)</param> /// <param name="max_iter">the maximum number of iterations</param> /// <param name="find_iter">the report interval</param> /// <param name="show_fold_results">if set to true to print per-fold results to STDERR</param> static public void DoRatingBasedRankingIterativeCrossValidation( this RatingPredictor recommender, uint num_folds, IList<int> test_users, IList<int> candidate_items, CandidateItems candidate_item_mode, RepeatedEvents repeated_events, uint max_iter, uint find_iter = 1, bool show_fold_results = false) { var split = new RatingCrossValidationSplit(recommender.Ratings, num_folds); recommender.DoRatingBasedRankingIterativeCrossValidation(split, test_users, candidate_items, candidate_item_mode, repeated_events, max_iter, find_iter); }
static void DoTrack1() { var rating_predictor_validate = recommender as RatingPredictor; var rating_predictor_final = rating_predictor_validate.Clone() as RatingPredictor; rating_predictor_final.Ratings = complete_ratings; Console.WriteLine("Validation split:"); Utils.DisplayDataStats(training_ratings, validation_ratings, rating_predictor_validate); Console.WriteLine("Test split:"); Utils.DisplayDataStats(complete_ratings, test_data, rating_predictor_final); if (find_iter != 0) { if ( !(recommender is IIterativeModel) ) Usage("Only iterative recommenders support find_iter."); IIterativeModel iterative_recommender_validate = (MatrixFactorization) rating_predictor_validate; IIterativeModel iterative_recommender_final = (MatrixFactorization) rating_predictor_final; Console.WriteLine(recommender.ToString() + " "); if (load_model_file == string.Empty) { iterative_recommender_validate.Train(); iterative_recommender_final.Train(); } else { Recommender.LoadModel(rating_predictor_final, "final-" + load_model_file); } if (compute_fit) Console.Write(string.Format(CultureInfo.InvariantCulture, "fit {0:0.#####} ", iterative_recommender_validate.ComputeFit())); MyMediaLite.Eval.Ratings.DisplayResults(MyMediaLite.Eval.Ratings.Evaluate(rating_predictor_validate, validation_ratings)); Console.WriteLine(" " + iterative_recommender_validate.NumIter); for (int i = (int) iterative_recommender_validate.NumIter + 1; i <= max_iter; i++) { TimeSpan time = Utils.MeasureTime(delegate() { iterative_recommender_validate.Iterate(); iterative_recommender_final.Iterate(); // TODO parallelize this }); training_time_stats.Add(time.TotalSeconds); if (i % find_iter == 0) { if (compute_fit) { double fit = 0; time = Utils.MeasureTime(delegate() { fit = iterative_recommender_validate.ComputeFit(); }); fit_time_stats.Add(time.TotalSeconds); Console.Write(string.Format(CultureInfo.InvariantCulture, "fit {0:0.#####} ", fit)); } // evaluate and save stats // TODO parallelize Dictionary<string, double> results = null; time = Utils.MeasureTime(delegate() { results = MyMediaLite.Eval.Ratings.Evaluate(rating_predictor_validate, validation_ratings); MyMediaLite.Eval.Ratings.DisplayResults(results); rmse_eval_stats.Add(results["RMSE"]); Console.WriteLine(" " + i); }); eval_time_stats.Add(time.TotalSeconds); // write out model files and predictions if (save_model_file != string.Empty) { Recommender.SaveModel(rating_predictor_validate, save_model_file + "-validate", i); Recommender.SaveModel(rating_predictor_final, save_model_file, i); } if (prediction_file != string.Empty) if (track2) { KDDCup.PredictRatingsDouble(rating_predictor_validate, validation_candidates, prediction_file + "-validate-it-" + i); KDDCup.PredictRatingsDouble(rating_predictor_final, test_data, prediction_file + "-it-" + i); } else { KDDCup.PredictRatings(rating_predictor_validate, validation_ratings, prediction_file + "-validate-it-" + i); KDDCup.PredictRatings(rating_predictor_final, test_data, prediction_file + "-it-" + i); } // check whether we should abort if (epsilon > 0 && results["RMSE"] > rmse_eval_stats.Min() + epsilon) { Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "{0} >> {1}", results["RMSE"], rmse_eval_stats.Min())); Console.Error.WriteLine("Reached convergence on training/validation data after {0} iterations.", i); break; } if (results["RMSE"] > rmse_cutoff || results["MAE"] > mae_cutoff) { Console.Error.WriteLine("Reached cutoff after {0} iterations.", i); break; } } } // for DisplayIterationStats(); Recommender.SaveModel(recommender, save_model_file); } else { TimeSpan seconds; if (!no_eval) { if (load_model_file == string.Empty) { Console.Write(recommender.ToString()); if (cross_validation > 0) // TODO cross-validation could also be performed on the complete dataset { // TODO support track2 Console.WriteLine(); var split = new RatingCrossValidationSplit(training_ratings, cross_validation); var results = MyMediaLite.Eval.Ratings.EvaluateOnSplit(rating_predictor_validate, split); MyMediaLite.Eval.Ratings.DisplayResults(results); no_eval = true; rating_predictor_validate.Ratings = training_ratings; } else { seconds = Utils.MeasureTime( delegate() { recommender.Train(); } ); Console.Write(" training_time " + seconds + " "); Recommender.SaveModel(recommender, save_model_file); } } Console.Write(recommender.ToString() + " "); seconds = Utils.MeasureTime( delegate() { MyMediaLite.Eval.Ratings.DisplayResults(MyMediaLite.Eval.Ratings.Evaluate(rating_predictor_validate, validation_ratings)); } ); Console.Write(" testing_time " + seconds); } Console.WriteLine(); if (prediction_file != string.Empty) { Console.WriteLine("Prediction for KDD Cup Track 1:"); seconds = Utils.MeasureTime( delegate() { rating_predictor_final.Train(); } ); Console.Write(" training_time " + seconds + " "); if (save_model_file != string.Empty) { Recommender.SaveModel(rating_predictor_validate, save_model_file + "-validate"); Recommender.SaveModel(rating_predictor_final, save_model_file); } Console.WriteLine(); seconds = Utils.MeasureTime( delegate() { KDDCup.PredictRatingsDouble(rating_predictor_final, test_data, prediction_file); if (track2) KDDCup.PredictRatingsDouble(rating_predictor_validate, validation_candidates, prediction_file + "-validate"); else KDDCup.PredictRatings(rating_predictor_validate, validation_ratings, prediction_file + "-validate"); }); Console.Error.WriteLine("predicting_time " + seconds); } } }