/// <summary>Find the the parameters resulting in the minimal results for a given evaluation measure (1D)</summary> /// <remarks>The recommender will be set to the best parameter value after calling this method.</remarks> /// <param name="evaluation_measure">the name of the evaluation measure</param> /// <param name="hyperparameter_name">the name of the hyperparameter to optimize</param> /// <param name="hyperparameter_values">the values of the hyperparameter to try out</param> /// <param name="recommender">the recommender</param> /// <param name="split">the dataset split to use</param> /// <returns>the best (lowest) average value for the hyperparameter</returns> public static double FindMinimum( string evaluation_measure, string hyperparameter_name, double[] hyperparameter_values, RatingPredictor recommender, ISplit <IRatings> split) { double min_result = double.MaxValue; int min_i = -1; for (int i = 0; i < hyperparameter_values.Length; i++) { recommender.SetProperty(hyperparameter_name, hyperparameter_values[i].ToString(CultureInfo.InvariantCulture)); double result = recommender.DoCrossValidation(split)[evaluation_measure]; if (result < min_result) { min_i = i; min_result = result; } } recommender.SetProperty(hyperparameter_name, hyperparameter_values[min_i].ToString(CultureInfo.InvariantCulture)); return(min_result); }
/// <summary>Evaluate on the folds of a dataset split</summary> /// <param name="recommender">a rating predictor</param> /// <param name="split">a rating dataset split</param> /// <param name="show_results">set to true to print results to STDERR</param> /// <returns>a dictionary containing the average results over the different folds of the split</returns> static public Dictionary <string, double> EvaluateOnSplit(RatingPredictor recommender, ISplit <IRatings> split, bool show_results) { var avg_results = new Dictionary <string, double>(); foreach (var key in Measures) { avg_results[key] = 0; } for (int i = 0; i < split.NumberOfFolds; i++) { var split_recommender = (RatingPredictor)recommender.Clone(); // to avoid changes in recommender split_recommender.Ratings = split.Train[i]; split_recommender.Train(); var fold_results = Evaluate(split_recommender, split.Test[i]); foreach (var key in fold_results.Keys) { avg_results[key] += fold_results[key]; } if (show_results) { Console.Error.WriteLine("fold {0}, RMSE {1,0:0.#####}, MAE {2,0:0.#####}", i, fold_results["RMSE"].ToString(CultureInfo.InvariantCulture), fold_results["MAE"].ToString(CultureInfo.InvariantCulture)); } } foreach (var key in avg_results.Keys.ToList()) { avg_results[key] /= split.NumberOfFolds; } return(avg_results); }
/// <summary>Find the the parameters resulting in the minimal results for a given evaluation measure (2D)</summary> /// <remarks>The recommender will be set to the best parameter value after calling this method.</remarks> /// <param name="evaluation_measure">the name of the evaluation measure</param> /// <param name="hp_name1">the name of the first hyperparameter to optimize</param> /// <param name="hp_values1">the values of the first hyperparameter to try out</param> /// <param name="hp_name2">the name of the second hyperparameter to optimize</param> /// <param name="hp_values2">the values of the second hyperparameter to try out</param> /// <param name="recommender">the recommender</param> /// <param name="split">the dataset split to use</param> /// <returns>the best (lowest) average value for the hyperparameter</returns> public static double FindMinimum( string evaluation_measure, string hp_name1, string hp_name2, double[] hp_values1, double[] hp_values2, RatingPredictor recommender, ISplit <IRatings> split) { double min_result = double.MaxValue; int min_i = -1; int min_j = -1; for (int i = 0; i < hp_values1.Length; i++) { for (int j = 0; j < hp_values2.Length; j++) { recommender.SetProperty(hp_name1, hp_values1[i].ToString(CultureInfo.InvariantCulture)); recommender.SetProperty(hp_name2, hp_values2[j].ToString(CultureInfo.InvariantCulture)); Console.Error.WriteLine("reg_u={0} reg_i={1}", hp_values1[i].ToString(CultureInfo.InvariantCulture), hp_values2[j].ToString(CultureInfo.InvariantCulture)); // TODO this is not generic double result = recommender.DoCrossValidation(split)[evaluation_measure]; if (result < min_result) { min_i = i; min_j = j; min_result = result; } } } // set to best hyperparameter values recommender.SetProperty(hp_name1, hp_values1[min_i].ToString(CultureInfo.InvariantCulture)); recommender.SetProperty(hp_name2, hp_values2[min_j].ToString(CultureInfo.InvariantCulture)); return(min_result); }
/// <summary>Find best hyperparameter (according to an error measure) using Nelder-Mead search</summary> /// <param name="error_measure">an error measure (lower is better)</param> /// <param name="recommender">a rating predictor (will be set to best hyperparameter combination)</param> /// <returns>the estimated error of the best hyperparameter combination</returns> public static double FindMinimum( string error_measure, RatingPredictor recommender) { var split = new RatingsSimpleSplit(recommender.Ratings, split_ratio); //var split = new RatingCrossValidationSplit(recommender.Ratings, 5); IList <string> hp_names; IList <DenseVector> initial_hp_values; // TODO manage this via reflection? if (recommender is UserItemBaseline) { hp_names = new string[] { "reg_u", "reg_i" }; initial_hp_values = new DenseVector[] { new DenseVector(new double[] { 25, 10 }), new DenseVector(new double[] { 10, 25 }), new DenseVector(new double[] { 2, 5 }), new DenseVector(new double[] { 5, 2 }), new DenseVector(new double[] { 1, 4 }), new DenseVector(new double[] { 4, 1 }), new DenseVector(new double[] { 3, 3 }), }; } else if (recommender is BiasedMatrixFactorization) { hp_names = new string[] { "regularization", "bias_reg" }; initial_hp_values = new DenseVector[] { // TODO reg_u and reg_i (in a second step?) new DenseVector(new double[] { 0.1, 0 }), new DenseVector(new double[] { 0.01, 0 }), new DenseVector(new double[] { 0.0001, 0 }), new DenseVector(new double[] { 0.00001, 0 }), new DenseVector(new double[] { 0.1, 0.0001 }), new DenseVector(new double[] { 0.01, 0.0001 }), new DenseVector(new double[] { 0.0001, 0.0001 }), new DenseVector(new double[] { 0.00001, 0.0001 }), }; } else if (recommender is MatrixFactorization) { // TODO normal interval search could be more efficient hp_names = new string[] { "regularization", }; initial_hp_values = new DenseVector[] { new DenseVector(new double[] { 0.1 }), new DenseVector(new double[] { 0.01 }), new DenseVector(new double[] { 0.0001 }), new DenseVector(new double[] { 0.00001 }), }; } // TODO kNN-based methods else { throw new Exception("not prepared for type " + recommender.GetType().ToString()); } return(FindMinimum( error_measure, hp_names, initial_hp_values, recommender, split)); }
static double Run(RatingPredictor recommender, ISplit <IRatings> split, string hp_string, string evaluation_measure) { Recommender.Configure(recommender, hp_string); double result = Eval.Ratings.EvaluateOnSplit(recommender, split)[evaluation_measure]; Console.Error.WriteLine("Nelder-Mead: {0}: {1}", hp_string, result.ToString(CultureInfo.InvariantCulture)); return(result); }
/// <summary>Evaluate on the folds of a dataset split</summary> /// <param name="recommender">a rating predictor</param> /// <param name="num_folds">the number of folds</param> /// <param name="compute_fit">if set to true measure fit on the training data as well</param> /// <param name="show_fold_results">if set to true to print per-fold results to STDERR</param> /// <returns>a dictionary containing the average results over the different folds of the split</returns> static public RatingPredictionEvaluationResults DoCrossValidation( this RatingPredictor recommender, uint num_folds = 5, bool compute_fit = false, bool show_fold_results = false) { var split = new RatingCrossValidationSplit(recommender.Ratings, num_folds); return(recommender.DoCrossValidation(split, compute_fit, show_fold_results)); }
/// <summary>Evaluate an iterative recommender on the folds of a dataset split, display results on STDOUT</summary> /// <param name="recommender">a rating predictor</param> /// <param name="num_folds">the number of folds</param> /// <param name="max_iter">the maximum number of iterations</param> /// <param name="find_iter">the report interval</param> /// <param name="show_fold_results">if set to true to print per-fold results to STDERR</param> static public void DoIterativeCrossValidation( this RatingPredictor recommender, uint num_folds, uint max_iter, uint find_iter = 1, bool show_fold_results = false) { var split = new RatingCrossValidationSplit(recommender.Ratings, num_folds); recommender.DoIterativeCrossValidation(split, max_iter, find_iter, show_fold_results); }
/// <summary>Evaluate on the folds of a dataset split</summary> /// <param name="recommender">an item recommender</param> /// <param name="num_folds">the number of folds</param> /// <param name="candidate_items">a collection of integers with all candidate items</param> /// <param name="candidate_item_mode">the mode used to determine the candidate items</param> /// <param name="compute_fit">if set to true measure fit on the training data as well</param> /// <param name="show_results">set to true to print results to STDERR</param> /// <returns>a dictionary containing the average results over the different folds of the split</returns> static public EvaluationResults DoRatingBasedRankingCrossValidation( this RatingPredictor recommender, uint num_folds, IList <int> candidate_items, CandidateItems candidate_item_mode = CandidateItems.OVERLAP, bool compute_fit = false, bool show_results = false) { var split = new RatingCrossValidationSplit(recommender.Ratings, num_folds); return(recommender.DoRatingBasedRankingCrossValidation(split, candidate_items, candidate_item_mode, compute_fit, show_results)); }
private void CreateRecommender() { BiasedMatrixFactorization recommender = new BiasedMatrixFactorization(); Console.Error.Write("Reading in ratings ... "); TimeSpan time = Utils.MeasureTime(delegate() { recommender.Ratings = RatingPrediction.Read(ratings_file, user_mapping, item_mapping); }); Console.Error.WriteLine("done ({0,0:0.##}).", time.TotalSeconds.ToString(CultureInfo.InvariantCulture)); //Console.Error.Write("Reading in additional ratings ... "); //string[] rating_files = Directory.GetFiles("../../saved_data/", "user-ratings-*"); //Console.Error.WriteLine("done."); foreach (var indices_for_item in recommender.Ratings.ByItem) { if (indices_for_item.Count > 0) { movies_by_frequency.Add(new WeightedItem(recommender.Ratings.Items[indices_for_item[0]], indices_for_item.Count)); } } movies_by_frequency.Sort(); movies_by_frequency.Reverse(); for (int i = 0; i < n_movies; i++) { top_n_movies.Add(movies_by_frequency[i].item_id); } Console.Error.Write("Loading prediction model ... "); recommender.UpdateUsers = true; recommender.UpdateItems = false; recommender.BiasReg = 0.001; recommender.Regularization = 0.045; recommender.NumIter = 60; time = Utils.MeasureTime(delegate() { recommender.LoadModel(model_file); }); Console.Error.WriteLine("done ({0,0:0.##}).", time.TotalSeconds.ToString(CultureInfo.InvariantCulture)); rating_predictor = recommender; current_user_id = user_mapping.ToInternalID(current_user_external_id); //rating_predictor.AddUser(current_user_id); // add movies that were not in the training set //rating_predictor.AddItem( item_mapping.InternalIDs.Count - 1 ); PredictAllRatings(); }
/// <summary>Evaluate an iterative recommender on the folds of a dataset split, display results on STDOUT</summary> /// <param name="recommender">an item recommender</param> /// <param name="num_folds">the number of folds</param> /// <param name="test_users">a collection of integers with all test users</param> /// <param name="candidate_items">a collection of integers with all candidate items</param> /// <param name="candidate_item_mode">the mode used to determine the candidate items</param> /// <param name="repeated_events">allow repeated events in the evaluation (i.e. items accessed by a user before may be in the recommended list)</param> /// <param name="max_iter">the maximum number of iterations</param> /// <param name="find_iter">the report interval</param> /// <param name="show_fold_results">if set to true to print per-fold results to STDERR</param> static public void DoRatingBasedRankingIterativeCrossValidation( this RatingPredictor recommender, uint num_folds, IList <int> test_users, IList <int> candidate_items, CandidateItems candidate_item_mode, RepeatedEvents repeated_events, uint max_iter, uint find_iter = 1, bool show_fold_results = false) { var split = new RatingCrossValidationSplit(recommender.Ratings, num_folds); recommender.DoRatingBasedRankingIterativeCrossValidation(split, test_users, candidate_items, candidate_item_mode, repeated_events, max_iter, find_iter); }
// TODO get rid of recommender argument /// <summary>Display dataset statistics</summary> /// <param name="train">the training data</param> /// <param name="test">the test data</param> /// <param name="recommender">the recommender (to get attribute information)</param> /// <param name="display_overlap">if set true, display the user/item overlap between train and test</param> public static void DisplayDataStats(IRatings train, IRatings test, RatingPredictor recommender, bool display_overlap) { // training data stats int num_users = train.AllUsers.Count; int num_items = train.AllItems.Count; long matrix_size = (long)num_users * num_items; long empty_size = (long)matrix_size - train.Count; double sparsity = (double)100L * empty_size / matrix_size; Console.WriteLine(string.Format(CultureInfo.InvariantCulture, "training data: {0} users, {1} items, {2} ratings, sparsity {3,0:0.#####}", num_users, num_items, train.Count, sparsity)); // test data stats if (test != null) { num_users = test.AllUsers.Count; num_items = test.AllItems.Count; matrix_size = (long)num_users * num_items; empty_size = (long)matrix_size - test.Count; sparsity = (double)100L * empty_size / matrix_size; Console.WriteLine(string.Format(CultureInfo.InvariantCulture, "test data: {0} users, {1} items, {2} ratings, sparsity {3,0:0.#####}", num_users, num_items, test.Count, sparsity)); } // count and display the overlap between train and test if (display_overlap && test != null) { int num_new_users = 0; int num_new_items = 0; TimeSpan seconds = Utils.MeasureTime(delegate() { num_new_users = test.AllUsers.Except(train.AllUsers).Count(); num_new_items = test.AllItems.Except(train.AllItems).Count(); }); Console.WriteLine("{0} new users, {1} new items ({2} seconds)", num_new_users, num_new_items, seconds); } // attribute stats if (recommender != null) { if (recommender is IUserAttributeAwareRecommender) { Console.WriteLine("{0} user attributes", ((IUserAttributeAwareRecommender)recommender).NumUserAttributes); } if (recommender is IItemAttributeAwareRecommender) { Console.WriteLine("{0} item attributes", ((IItemAttributeAwareRecommender)recommender).NumItemAttributes); } } }
/// <summary>Evaluate on the folds of a dataset split</summary> /// <param name="recommender">a rating predictor</param> /// <param name="split">a rating dataset split</param> /// <param name="compute_fit">if set to true measure fit on the training data as well</param> /// <param name="show_fold_results">set to true to print per-fold results to STDERR</param> /// <returns>a dictionary containing the average results over the different folds of the split</returns> static public RatingPredictionEvaluationResults DoCrossValidation( this RatingPredictor recommender, ISplit <IRatings> split, bool compute_fit = false, bool show_fold_results = false) { var fold_results = new RatingPredictionEvaluationResults[split.NumberOfFolds]; Parallel.For(0, (int)split.NumberOfFolds, i => { try { var split_recommender = (RatingPredictor)recommender.Clone(); // to avoid changes in recommender split_recommender.Ratings = split.Train[i]; if (recommender is ITransductiveRatingPredictor) { ((ITransductiveRatingPredictor)split_recommender).AdditionalFeedback = split.Test[i]; } split_recommender.Train(); fold_results[i] = Ratings.Evaluate(split_recommender, split.Test[i]); if (compute_fit) { fold_results[i]["fit"] = (float)split_recommender.ComputeFit(); } if (show_fold_results) { Console.Error.WriteLine("fold {0} {1}", i, fold_results[i]); } } catch (Exception e) { Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace); throw; } }); return(new RatingPredictionEvaluationResults(fold_results)); }
static void UserItemStart() { FileReader fReader = new FileReader(); Dictionary<int, UserPref> userList = fReader.GetData(); Dictionary<string, float> fileMetaData = fReader.GetMetaData(); FormulaFactory forFactory = new FormulaFactory(); NeighbourFinder nFinder = new NeighbourFinder(forFactory.GetFormula(fileMetaData)); int targetUserID = 186; Dictionary<int, double> neighbourList = nFinder.FindNeighbours(targetUserID, userList); foreach (var neighbour in neighbourList) { Console.WriteLine("Neighbour {0} has a similarity of {1}", neighbour.Key, neighbour.Value); } RatingPredictor rPredictor = new RatingPredictor(); rPredictor.PredictRatings(userList, targetUserID, neighbourList); rPredictor.GetTopPredictedRatings(8); Console.Read(); }
/// <summary>Evaluate an iterative recommender on the folds of a dataset split, display results on STDOUT</summary> /// <param name="recommender">a rating predictor</param> /// <param name="split">a rating dataset split</param> /// <param name="max_iter">the maximum number of iterations</param> /// <param name="find_iter">the report interval</param> /// <param name="show_fold_results">if set to true to print per-fold results to STDERR</param> static public void DoIterativeCrossValidation( this RatingPredictor recommender, ISplit <IRatings> split, uint max_iter, uint find_iter = 1, bool show_fold_results = false) { if (!(recommender is IIterativeModel)) { throw new ArgumentException("recommender must be of type IIterativeModel"); } var split_recommenders = new RatingPredictor[split.NumberOfFolds]; var iterative_recommenders = new IIterativeModel[split.NumberOfFolds]; var fold_results = new RatingPredictionEvaluationResults[split.NumberOfFolds]; // initial training and evaluation Parallel.For(0, (int)split.NumberOfFolds, i => { try { split_recommenders[i] = (RatingPredictor)recommender.Clone(); // to avoid changes in recommender split_recommenders[i].Ratings = split.Train[i]; if (recommender is ITransductiveRatingPredictor) { ((ITransductiveRatingPredictor)split_recommenders[i]).AdditionalFeedback = split.Test[i]; } split_recommenders[i].Train(); iterative_recommenders[i] = (IIterativeModel)split_recommenders[i]; fold_results[i] = Ratings.Evaluate(split_recommenders[i], split.Test[i]); if (show_fold_results) { Console.Error.WriteLine("fold {0} {1} iteration {2}", i, fold_results[i], iterative_recommenders[i].NumIter); } } catch (Exception e) { Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace); throw; } }); Console.WriteLine("{0} iteration {1}", new RatingPredictionEvaluationResults(fold_results), iterative_recommenders[0].NumIter); // iterative training and evaluation for (int it = (int)iterative_recommenders[0].NumIter + 1; it <= max_iter; it++) { Parallel.For(0, (int)split.NumberOfFolds, i => { try { iterative_recommenders[i].Iterate(); if (it % find_iter == 0) { fold_results[i] = Ratings.Evaluate(split_recommenders[i], split.Test[i]); if (show_fold_results) { Console.Error.WriteLine("fold {0} {1} iteration {2}", i, fold_results[i], it); } } } catch (Exception e) { Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace); throw; } }); Console.WriteLine("{0} iteration {1}", new RatingPredictionEvaluationResults(fold_results), it); } }
/// <summary> /// /// </summary> /// <param name="evaluation_measure">the name of the evaluation measure</param>/// /// <param name="recommender">the recommender</param> public NelderMead(string evaluation_measure, RatingPredictor recommender) { this.evaluation_measure = evaluation_measure; this.recommender = recommender; Init(); }
/// <summary>Evaluate on the folds of a dataset split</summary> /// <param name="recommender">a rating predictor</param> /// <param name="split">a rating dataset split</param> /// <returns>a dictionary containing the average results over the different folds of the split</returns> static public Dictionary <string, double> EvaluateOnSplit(RatingPredictor recommender, ISplit <IRatings> split) { return(EvaluateOnSplit(recommender, split, false)); }
static void Main(string[] args) { AppDomain.CurrentDomain.UnhandledException += new UnhandledExceptionEventHandler(Handlers.UnhandledExceptionHandler); Console.CancelKeyPress += new ConsoleCancelEventHandler(AbortHandler); // check number of command line parameters if (args.Length < 1) Usage("Not enough arguments."); // read command line parameters string method = args[0]; RecommenderParameters parameters = null; try { parameters = new RecommenderParameters(args, 1); } catch (ArgumentException e) { Usage(e.Message); } // arguments for iteration search find_iter = parameters.GetRemoveInt32( "find_iter", 0); max_iter = parameters.GetRemoveInt32( "max_iter", 500); compute_fit = parameters.GetRemoveBool( "compute_fit", false); epsilon = parameters.GetRemoveDouble( "epsilon", 0); rmse_cutoff = parameters.GetRemoveDouble( "rmse_cutoff", double.MaxValue); mae_cutoff = parameters.GetRemoveDouble( "mae_cutoff", double.MaxValue); // data arguments string data_dir = parameters.GetRemoveString( "data_dir"); track2 = parameters.GetRemoveBool( "track2", false); if (data_dir != string.Empty) data_dir = data_dir + (track2 ? "/mml-track2" : "/track1"); else data_dir = track2 ? "/mml-track2" : "track1"; sample_data = parameters.GetRemoveBool( "sample_data", false); // other arguments save_model_file = parameters.GetRemoveString( "save_model"); load_model_file = parameters.GetRemoveString( "load_model"); int random_seed = parameters.GetRemoveInt32( "random_seed", -1); no_eval = parameters.GetRemoveBool( "no_eval", false); prediction_file = parameters.GetRemoveString( "prediction_file"); cross_validation = parameters.GetRemoveUInt32( "cross_validation", 0); good_rating_prob = parameters.GetRemoveBool( "good_rating_prob", false); if (random_seed != -1) MyMediaLite.Util.Random.Seed = random_seed; recommender = Recommender.CreateRatingPredictor(method); if (recommender == null) Usage(string.Format("Unknown method: '{0}'", method)); Recommender.Configure(recommender, parameters, Usage); if (parameters.CheckForLeftovers()) Usage(-1); // load all the data TimeSpan loading_time = Wrap.MeasureTime(delegate() { LoadData(data_dir); }); Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "loading_time {0,0:0.##}", loading_time.TotalSeconds)); recommender.Ratings = training_ratings; Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "ratings range: [{0}, {1}]", recommender.MinRating, recommender.MaxRating)); if (load_model_file != string.Empty) Model.Load(recommender, load_model_file); DoTrack1(); Console.Error.WriteLine("memory {0}", Memory.Usage); }
static void Main(string[] args) { Assembly assembly = Assembly.GetExecutingAssembly(); Assembly.LoadFile(Path.GetDirectoryName(assembly.Location) + Path.DirectorySeparatorChar + "MyMediaLiteExperimental.dll"); AppDomain.CurrentDomain.UnhandledException += new UnhandledExceptionEventHandler(Handlers.UnhandledExceptionHandler); Console.CancelKeyPress += new ConsoleCancelEventHandler(AbortHandler); // recommender arguments string method = "BiasedMatrixFactorization"; string recommender_options = string.Empty; // help/version bool show_help = false; bool show_version = false; // arguments for iteration search int find_iter = 0; int max_iter = 500; double epsilon = 0; double rmse_cutoff = double.MaxValue; double mae_cutoff = double.MaxValue; // data arguments string data_dir = string.Empty; string user_attributes_file = string.Empty; string item_attributes_file = string.Empty; string user_relations_file = string.Empty; string item_relations_file = string.Empty; // other arguments bool online_eval = false; bool search_hp = false; string save_model_file = string.Empty; string load_model_file = string.Empty; int random_seed = -1; string prediction_file = string.Empty; string prediction_line = "{0}\t{1}\t{2}"; int cross_validation = 0; double split_ratio = 0; var p = new OptionSet() { // string-valued options { "training-file=", v => training_file = v }, { "test-file=", v => test_file = v }, { "recommender=", v => method = v }, { "recommender-options=", v => recommender_options += " " + v }, { "data-dir=", v => data_dir = v }, { "user-attributes=", v => user_attributes_file = v }, { "item-attributes=", v => item_attributes_file = v }, { "user-relations=", v => user_relations_file = v }, { "item-relations=", v => item_relations_file = v }, { "save-model=", v => save_model_file = v }, { "load-model=", v => load_model_file = v }, { "prediction-file=", v => prediction_file = v }, { "prediction-line=", v => prediction_line = v }, // integer-valued options { "find-iter=", (int v) => find_iter = v }, { "max-iter=", (int v) => max_iter = v }, { "random-seed=", (int v) => random_seed = v }, { "cross-validation=", (int v) => cross_validation = v }, // double-valued options { "epsilon=", (double v) => epsilon = v }, { "rmse-cutoff=", (double v) => rmse_cutoff = v }, { "mae-cutoff=", (double v) => mae_cutoff = v }, { "split-ratio=", (double v) => split_ratio = v }, // enum options { "rating-type=", (RatingType v) => rating_type = v }, { "file-format=", (RatingFileFormat v) => file_format = v }, // boolean options { "compute-fit", v => compute_fit = v != null }, { "online-evaluation", v => online_eval = v != null }, { "search-hp", v => search_hp = v != null }, { "help", v => show_help = v != null }, { "version", v => show_version = v != null }, }; IList <string> extra_args = p.Parse(args); // TODO make sure interaction of --find-iter and --cross-validation works properly bool no_eval = test_file == null; if (show_version) { ShowVersion(); } if (show_help) { Usage(0); } if (extra_args.Count > 0) { Usage("Did not understand " + extra_args[0]); } if (training_file == null) { Usage("Parameter --training-file=FILE is missing."); } if (cross_validation != 0 && split_ratio != 0) { Usage("--cross-validation=K and --split-ratio=NUM are mutually exclusive."); } if (random_seed != -1) { MyMediaLite.Util.Random.InitInstance(random_seed); } recommender = Recommender.CreateRatingPredictor(method); if (recommender == null) { Usage(string.Format("Unknown method: '{0}'", method)); } Recommender.Configure(recommender, recommender_options, Usage); // ID mapping objects if (file_format == RatingFileFormat.KDDCUP_2011) { user_mapping = new IdentityMapping(); item_mapping = new IdentityMapping(); } // load all the data LoadData(data_dir, user_attributes_file, item_attributes_file, user_relations_file, item_relations_file, !online_eval); Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "ratings range: [{0}, {1}]", recommender.MinRating, recommender.MaxRating)); if (split_ratio > 0) { var split = new RatingsSimpleSplit(training_data, split_ratio); recommender.Ratings = split.Train[0]; training_data = split.Train[0]; test_data = split.Test[0]; } Utils.DisplayDataStats(training_data, test_data, recommender); if (find_iter != 0) { if (!(recommender is IIterativeModel)) { Usage("Only iterative recommenders support find_iter."); } var iterative_recommender = (IIterativeModel)recommender; Console.WriteLine(recommender.ToString() + " "); if (load_model_file == string.Empty) { recommender.Train(); } else { Recommender.LoadModel(iterative_recommender, load_model_file); } if (compute_fit) { Console.Write(string.Format(CultureInfo.InvariantCulture, "fit {0,0:0.#####} ", iterative_recommender.ComputeFit())); } MyMediaLite.Eval.Ratings.DisplayResults(MyMediaLite.Eval.Ratings.Evaluate(recommender, test_data)); Console.WriteLine(" iteration " + iterative_recommender.NumIter); for (int i = (int)iterative_recommender.NumIter + 1; i <= max_iter; i++) { TimeSpan time = Utils.MeasureTime(delegate() { iterative_recommender.Iterate(); }); training_time_stats.Add(time.TotalSeconds); if (i % find_iter == 0) { if (compute_fit) { double fit = 0; time = Utils.MeasureTime(delegate() { fit = iterative_recommender.ComputeFit(); }); fit_time_stats.Add(time.TotalSeconds); Console.Write(string.Format(CultureInfo.InvariantCulture, "fit {0,0:0.#####} ", fit)); } Dictionary <string, double> results = null; time = Utils.MeasureTime(delegate() { results = MyMediaLite.Eval.Ratings.Evaluate(recommender, test_data); }); eval_time_stats.Add(time.TotalSeconds); MyMediaLite.Eval.Ratings.DisplayResults(results); rmse_eval_stats.Add(results["RMSE"]); Console.WriteLine(" iteration " + i); Recommender.SaveModel(recommender, save_model_file, i); if (prediction_file != string.Empty) { Prediction.WritePredictions(recommender, test_data, user_mapping, item_mapping, prediction_line, prediction_file + "-it-" + i); } if (epsilon > 0.0 && results["RMSE"] - rmse_eval_stats.Min() > epsilon) { Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "{0} >> {1}", results["RMSE"], rmse_eval_stats.Min())); Console.Error.WriteLine("Reached convergence on training/validation data after {0} iterations.", i); break; } if (results["RMSE"] > rmse_cutoff || results["MAE"] > mae_cutoff) { Console.Error.WriteLine("Reached cutoff after {0} iterations.", i); break; } } } // for DisplayStats(); } else { TimeSpan seconds; if (load_model_file == string.Empty) { if (cross_validation > 0) { Console.Write(recommender.ToString()); Console.WriteLine(); var split = new RatingCrossValidationSplit(training_data, cross_validation); var results = MyMediaLite.Eval.Ratings.EvaluateOnSplit(recommender, split); // TODO if (search_hp) MyMediaLite.Eval.Ratings.DisplayResults(results); no_eval = true; recommender.Ratings = training_data; } else { if (search_hp) { // TODO --search-hp-criterion=RMSE double result = NelderMead.FindMinimum("RMSE", recommender); Console.Error.WriteLine("estimated quality (on split) {0}", result.ToString(CultureInfo.InvariantCulture)); // TODO give out hp search time } Console.Write(recommender.ToString()); seconds = Utils.MeasureTime(delegate() { recommender.Train(); }); Console.Write(" training_time " + seconds + " "); } } else { Recommender.LoadModel(recommender, load_model_file); Console.Write(recommender.ToString() + " "); } if (!no_eval) { if (online_eval) // TODO support also for prediction outputs (to allow external evaluation) { seconds = Utils.MeasureTime(delegate() { MyMediaLite.Eval.Ratings.DisplayResults(MyMediaLite.Eval.Ratings.EvaluateOnline(recommender, test_data)); }); } else { seconds = Utils.MeasureTime(delegate() { MyMediaLite.Eval.Ratings.DisplayResults(MyMediaLite.Eval.Ratings.Evaluate(recommender, test_data)); }); } Console.Write(" testing_time " + seconds); } if (compute_fit) { Console.Write("fit "); seconds = Utils.MeasureTime(delegate() { MyMediaLite.Eval.Ratings.DisplayResults(MyMediaLite.Eval.Ratings.Evaluate(recommender, training_data)); }); Console.Write(string.Format(CultureInfo.InvariantCulture, " fit_time {0,0:0.#####} ", seconds)); } if (prediction_file != string.Empty) { seconds = Utils.MeasureTime(delegate() { Console.WriteLine(); Prediction.WritePredictions(recommender, test_data, user_mapping, item_mapping, prediction_line, prediction_file); }); Console.Error.Write("predicting_time " + seconds); } Console.WriteLine(); Console.Error.WriteLine("memory {0}", Memory.Usage); } Recommender.SaveModel(recommender, save_model_file); }
/// <summary>Performs user-wise fold-in evaluation, but instead of folding in perform a complete re-training with the new data</summary> /// <remarks> /// This method can be quite slow. /// </remarks> /// <returns>the evaluation results</returns> /// <param name='recommender'>a rating predictor capable of performing a user fold-in</param> /// <param name='update_data'>the rating data used to represent the users</param> /// <param name='eval_data'>the evaluation data</param> static public RatingPredictionEvaluationResults EvaluateFoldInCompleteRetraining(this RatingPredictor recommender, IRatings update_data, IRatings eval_data) { double rmse = 0; double mae = 0; double cbd = 0; int rating_count = 0; foreach (int user_id in update_data.AllUsers) { if (eval_data.AllUsers.Contains(user_id)) { var local_recommender = (RatingPredictor)recommender.Clone(); var known_ratings = new RatingsProxy(update_data, update_data.ByUser[user_id]); local_recommender.Ratings = new CombinedRatings(recommender.Ratings, known_ratings); local_recommender.Train(); var items_to_rate = (from index in eval_data.ByUser[user_id] select eval_data.Items[index]).ToArray(); var predicted_ratings = recommender.Recommend(user_id, candidate_items: items_to_rate); foreach (var pred in predicted_ratings) { float prediction = pred.Item2; float actual_rating = eval_data.Get(user_id, pred.Item1, eval_data.ByUser[user_id]); float error = prediction - actual_rating; rmse += error * error; mae += Math.Abs(error); cbd += Eval.Ratings.ComputeCBD(actual_rating, prediction, recommender.MinRating, recommender.MaxRating); rating_count++; } Console.Error.Write("."); } } mae = mae / rating_count; rmse = Math.Sqrt(rmse / rating_count); cbd = cbd / rating_count; var result = new RatingPredictionEvaluationResults(); result["RMSE"] = (float)rmse; result["MAE"] = (float)mae; result["NMAE"] = (float)mae / (recommender.MaxRating - recommender.MinRating); result["CBD"] = (float)cbd; return(result); }
private List <KeyValuePair <int, double> > PredictRatings(Matrix <int> ratingsMatrix, Matrix <float> weightsMatrix, RatingPredictor ratingPredictor, int user, int item) { List <KeyValuePair <int, double> > predictions = new List <KeyValuePair <int, double> >(); HashSet <int> randomItems = new HashSet <int>(); randomItems.Add(item); Random random = new Random(); while (randomItems.Count < randomRatingsCount + 1) { int randomMovie = random.Next(this.moviesCount); if (!ratingsMatrix[user].Contains(randomMovie)) { randomItems.Add(randomMovie); } } foreach (int i in randomItems) { double predictedRating = ratingPredictor.PredictRating(ratingsMatrix, weightsMatrix, user, i, false); if (predictedRating > 0) { predictions.Add(new KeyValuePair <int, double>(i, predictedRating)); } } List <KeyValuePair <int, double> > orderedPredictions = predictions.OrderByDescending(x => x.Value).ToList(); return(orderedPredictions); }
/// <summary>Computes the RMSE fit of a recommender on the training data</summary> /// <returns>the RMSE on the training data</returns> /// <param name='recommender'>the rating predictor to evaluate</param> public static double ComputeFit(this RatingPredictor recommender) { return(recommender.Evaluate(recommender.Ratings)["RMSE"]); }
/// <summary>Display dataset statistics</summary> /// <param name="train">the training data</param> /// <param name="test">the test data</param> /// <param name="recommender">the recommender (to get attribute information)</param> public static void DisplayDataStats(IRatings train, IRatings test, RatingPredictor recommender) { DisplayDataStats(train, test, recommender, false); }
/// <summary>Evaluate on the folds of a dataset split</summary> /// <param name="recommender">an item recommender</param> /// <param name="split">a dataset split</param> /// <param name="candidate_items">a collection of integers with all candidate items</param> /// <param name="candidate_item_mode">the mode used to determine the candidate items</param> /// <param name="compute_fit">if set to true measure fit on the training data as well</param> /// <param name="show_results">set to true to print results to STDERR</param> /// <returns>a dictionary containing the average results over the different folds of the split</returns> static public EvaluationResults DoRatingBasedRankingCrossValidation( this RatingPredictor recommender, ISplit <IRatings> split, IList <int> candidate_items, CandidateItems candidate_item_mode = CandidateItems.OVERLAP, bool compute_fit = false, bool show_results = false) { var avg_results = new ItemRecommendationEvaluationResults(); Parallel.For(0, (int)split.NumberOfFolds, fold => { try { var split_recommender = (RatingPredictor)recommender.Clone(); // avoid changes in recommender split_recommender.Ratings = split.Train[fold]; split_recommender.Train(); var test_data_posonly = new PosOnlyFeedback <SparseBooleanMatrix>(split.Test[fold]); var training_data_posonly = new PosOnlyFeedback <SparseBooleanMatrix>(split.Train[fold]); IList <int> test_users = test_data_posonly.AllUsers; var fold_results = Items.Evaluate(split_recommender, test_data_posonly, training_data_posonly, test_users, candidate_items, candidate_item_mode); if (compute_fit) { fold_results["fit"] = (float)split_recommender.ComputeFit(); } // thread-safe stats lock (avg_results) foreach (var key in fold_results.Keys) { if (avg_results.ContainsKey(key)) { avg_results[key] += fold_results[key]; } else { avg_results[key] = fold_results[key]; } } if (show_results) { Console.Error.WriteLine("fold {0} {1}", fold, fold_results); } } catch (Exception e) { Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace); throw; } }); foreach (var key in Items.Measures) { avg_results[key] /= split.NumberOfFolds; } avg_results["num_users"] /= split.NumberOfFolds; avg_results["num_items"] /= split.NumberOfFolds; return(avg_results); }
/// <summary>Find the the parameters resulting in the minimal results for a given evaluation measure</summary> /// <remarks>The recommender will be set to the best parameter value after calling this method.</remarks> /// <param name="evaluation_measure">the name of the evaluation measure</param> /// <param name="hp_names">the names of the hyperparameters to optimize</param> /// <param name="initial_hp_values">the values of the hyperparameters to try out first</param> /// <param name="recommender">the recommender</param> /// <param name="split">the dataset split to use</param> /// <returns>the best (lowest) average value for the hyperparameter</returns> public static double FindMinimum(string evaluation_measure, IList <string> hp_names, IList <Vector> initial_hp_values, RatingPredictor recommender, // TODO make more general? ISplit <IRatings> split) { var results = new Dictionary <string, double>(); var hp_vectors = new Dictionary <string, Vector>(); // initialize foreach (var hp_values in initial_hp_values) { string hp_string = CreateConfigString(hp_names, hp_values); results[hp_string] = Run(recommender, split, hp_string, evaluation_measure); hp_vectors[hp_string] = hp_values; } List <string> keys; for (int i = 0; i < num_it; i++) { if (results.Count != hp_vectors.Count) { throw new Exception(string.Format("{0} vs. {1}", results.Count, hp_vectors.Count)); } keys = new List <string>(results.Keys); keys.Sort(delegate(string k1, string k2) { return(results[k1].CompareTo(results[k2])); }); var min_key = keys.First(); var max_key = keys.Last(); Console.Error.WriteLine("Nelder-Mead: iteration {0} ({1})", i, results[min_key]); var worst_vector = hp_vectors[max_key]; var worst_result = results[max_key]; hp_vectors.Remove(max_key); results.Remove(max_key); // compute center var center = ComputeCenter(results, hp_vectors); // reflection //Console.Error.WriteLine("ref"); var reflection = center + alpha * (center - worst_vector); string ref_string = CreateConfigString(hp_names, reflection); double ref_result = Run(recommender, split, ref_string, evaluation_measure); if (results[min_key] <= ref_result && ref_result < results.Values.Max()) { results[ref_string] = ref_result; hp_vectors[ref_string] = reflection; continue; } // expansion if (ref_result < results[min_key]) { //Console.Error.WriteLine("exp"); var expansion = center + gamma * (center - worst_vector); string exp_string = CreateConfigString(hp_names, expansion); double exp_result = Run(recommender, split, exp_string, evaluation_measure); if (exp_result < ref_result) { results[exp_string] = exp_result; hp_vectors[exp_string] = expansion; } else { results[ref_string] = ref_result; hp_vectors[ref_string] = reflection; } continue; } // contraction //Console.Error.WriteLine("con"); var contraction = worst_vector + rho * (center - worst_vector); string con_string = CreateConfigString(hp_names, contraction); double con_result = Run(recommender, split, con_string, evaluation_measure); if (con_result < worst_result) { results[con_string] = con_result; hp_vectors[con_string] = contraction; continue; } // reduction //Console.Error.WriteLine("red"); var best_vector = hp_vectors[min_key]; var best_result = results[min_key]; hp_vectors.Remove(min_key); results.Remove(min_key); foreach (var key in new List <string>(results.Keys)) { var reduction = hp_vectors[key] + sigma * (hp_vectors[key] - best_vector); string red_string = CreateConfigString(hp_names, reduction); double red_result = Run(recommender, split, red_string, evaluation_measure); // replace by reduced vector results.Remove(key); hp_vectors.Remove(key); results[red_string] = red_result; hp_vectors[red_string] = reduction; } results[min_key] = best_result; hp_vectors[min_key] = best_vector; results[max_key] = worst_result; hp_vectors[max_key] = worst_vector; } keys = new List <string>(results.Keys); keys.Sort(delegate(string k1, string k2) { return(results[k1].CompareTo(results[k2])); }); // set to best hyperparameter values Recommender.Configure(recommender, keys.First()); return(results[keys.First()]); }
static void Main(string[] args) { Assembly assembly = Assembly.GetExecutingAssembly(); Assembly.LoadFile(Path.GetDirectoryName(assembly.Location) + Path.DirectorySeparatorChar + "MyMediaLiteExperimental.dll"); double min_rating = 0; double max_rating = 100; AppDomain.CurrentDomain.UnhandledException += new UnhandledExceptionEventHandler(Handlers.UnhandledExceptionHandler); Console.CancelKeyPress += new ConsoleCancelEventHandler(AbortHandler); // check number of command line parameters if (args.Length < 1) { Usage("Not enough arguments."); } // read command line parameters string method = args[0]; RecommenderParameters parameters = null; try { parameters = new RecommenderParameters(args, 1); } catch (ArgumentException e) { Usage(e.Message); } // arguments for iteration search find_iter = parameters.GetRemoveInt32("find_iter", 0); max_iter = parameters.GetRemoveInt32("max_iter", 500); compute_fit = parameters.GetRemoveBool("compute_fit", false); epsilon = parameters.GetRemoveDouble("epsilon", 0); rmse_cutoff = parameters.GetRemoveDouble("rmse_cutoff", double.MaxValue); mae_cutoff = parameters.GetRemoveDouble("mae_cutoff", double.MaxValue); // data arguments string data_dir = parameters.GetRemoveString("data_dir"); track2 = parameters.GetRemoveBool("track2", false); if (data_dir != string.Empty) { data_dir = data_dir + (track2 ? "/mml-track2" : "/track1"); } else { data_dir = track2 ? "/mml-track2" : "track1"; } sample_data = parameters.GetRemoveBool("sample_data", false); // other arguments save_model_file = parameters.GetRemoveString("save_model"); load_model_file = parameters.GetRemoveString("load_model"); int random_seed = parameters.GetRemoveInt32("random_seed", -1); no_eval = parameters.GetRemoveBool("no_eval", false); prediction_file = parameters.GetRemoveString("prediction_file"); cross_validation = parameters.GetRemoveInt32("cross_validation", 0); good_rating_prob = parameters.GetRemoveBool("good_rating_prob", false); if (good_rating_prob) { max_rating = 1; } if (random_seed != -1) { MyMediaLite.Util.Random.InitInstance(random_seed); } recommender = Recommender.CreateRatingPredictor(method); if (recommender == null) { Usage(string.Format("Unknown method: '{0}'", method)); } Recommender.Configure(recommender, parameters, Usage); if (parameters.CheckForLeftovers()) { Usage(-1); } // load all the data TimeSpan loading_time = Utils.MeasureTime(delegate() { LoadData(data_dir); }); Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "loading_time {0,0:0.##}", loading_time.TotalSeconds)); recommender.Ratings = training_ratings; recommender.MinRating = min_rating; recommender.MaxRating = max_rating; Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "ratings range: [{0}, {1}]", recommender.MinRating, recommender.MaxRating)); if (load_model_file != string.Empty) { Recommender.LoadModel(recommender, load_model_file); } DoTrack1(); Console.Error.WriteLine("memory {0}", Memory.Usage); }
static void Main(string[] args) { Assembly assembly = Assembly.GetExecutingAssembly(); Assembly.LoadFile(Path.GetDirectoryName(assembly.Location) + Path.DirectorySeparatorChar + "MyMediaLiteExperimental.dll"); AppDomain.CurrentDomain.UnhandledException += new UnhandledExceptionEventHandler(Handlers.UnhandledExceptionHandler); Console.CancelKeyPress += new ConsoleCancelEventHandler(AbortHandler); // recommender arguments string method = "BiasedMatrixFactorization"; string recommender_options = string.Empty; // help/version bool show_help = false; bool show_version = false; // arguments for iteration search int find_iter = 0; int max_iter = 500; double epsilon = 0; double rmse_cutoff = double.MaxValue; double mae_cutoff = double.MaxValue; // data arguments string data_dir = string.Empty; string user_attributes_file = string.Empty; string item_attributes_file = string.Empty; string user_relations_file = string.Empty; string item_relations_file = string.Empty; // other arguments bool online_eval = false; bool search_hp = false; string save_model_file = string.Empty; string load_model_file = string.Empty; int random_seed = -1; string prediction_file = string.Empty; string prediction_line = "{0}\t{1}\t{2}"; int cross_validation = 0; double split_ratio = 0; var p = new OptionSet() { // string-valued options { "training-file=", v => training_file = v }, { "test-file=", v => test_file = v }, { "recommender=", v => method = v }, { "recommender-options=", v => recommender_options += " " + v }, { "data-dir=", v => data_dir = v }, { "user-attributes=", v => user_attributes_file = v }, { "item-attributes=", v => item_attributes_file = v }, { "user-relations=", v => user_relations_file = v }, { "item-relations=", v => item_relations_file = v }, { "save-model=", v => save_model_file = v }, { "load-model=", v => load_model_file = v }, { "prediction-file=", v => prediction_file = v }, { "prediction-line=", v => prediction_line = v }, // integer-valued options { "find-iter=", (int v) => find_iter = v }, { "max-iter=", (int v) => max_iter = v }, { "random-seed=", (int v) => random_seed = v }, { "cross-validation=", (int v) => cross_validation = v }, // double-valued options { "epsilon=", (double v) => epsilon = v }, { "rmse-cutoff=", (double v) => rmse_cutoff = v }, { "mae-cutoff=", (double v) => mae_cutoff = v }, { "split-ratio=", (double v) => split_ratio = v }, // enum options { "rating-type=", (RatingType v) => rating_type = v }, { "file-format=", (RatingFileFormat v) => file_format = v }, // boolean options { "compute-fit", v => compute_fit = v != null }, { "online-evaluation", v => online_eval = v != null }, { "search-hp", v => search_hp = v != null }, { "help", v => show_help = v != null }, { "version", v => show_version = v != null }, }; IList<string> extra_args = p.Parse(args); // TODO make sure interaction of --find-iter and --cross-validation works properly bool no_eval = test_file == null; if (show_version) ShowVersion(); if (show_help) Usage(0); if (extra_args.Count > 0) Usage("Did not understand " + extra_args[0]); if (training_file == null) Usage("Parameter --training-file=FILE is missing."); if (cross_validation != 0 && split_ratio != 0) Usage("--cross-validation=K and --split-ratio=NUM are mutually exclusive."); if (random_seed != -1) MyMediaLite.Util.Random.InitInstance(random_seed); recommender = Recommender.CreateRatingPredictor(method); if (recommender == null) Usage(string.Format("Unknown method: '{0}'", method)); Recommender.Configure(recommender, recommender_options, Usage); // ID mapping objects if (file_format == RatingFileFormat.KDDCUP_2011) { user_mapping = new IdentityMapping(); item_mapping = new IdentityMapping(); } // load all the data LoadData(data_dir, user_attributes_file, item_attributes_file, user_relations_file, item_relations_file, !online_eval); Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "ratings range: [{0}, {1}]", recommender.MinRating, recommender.MaxRating)); if (split_ratio > 0) { var split = new RatingsSimpleSplit(training_data, split_ratio); recommender.Ratings = split.Train[0]; training_data = split.Train[0]; test_data = split.Test[0]; } Utils.DisplayDataStats(training_data, test_data, recommender); if (find_iter != 0) { if ( !(recommender is IIterativeModel) ) Usage("Only iterative recommenders support find_iter."); var iterative_recommender = (IIterativeModel) recommender; Console.WriteLine(recommender.ToString() + " "); if (load_model_file == string.Empty) recommender.Train(); else Recommender.LoadModel(iterative_recommender, load_model_file); if (compute_fit) Console.Write(string.Format(CultureInfo.InvariantCulture, "fit {0,0:0.#####} ", iterative_recommender.ComputeFit())); MyMediaLite.Eval.Ratings.DisplayResults(MyMediaLite.Eval.Ratings.Evaluate(recommender, test_data)); Console.WriteLine(" iteration " + iterative_recommender.NumIter); for (int i = (int) iterative_recommender.NumIter + 1; i <= max_iter; i++) { TimeSpan time = Utils.MeasureTime(delegate() { iterative_recommender.Iterate(); }); training_time_stats.Add(time.TotalSeconds); if (i % find_iter == 0) { if (compute_fit) { double fit = 0; time = Utils.MeasureTime(delegate() { fit = iterative_recommender.ComputeFit(); }); fit_time_stats.Add(time.TotalSeconds); Console.Write(string.Format(CultureInfo.InvariantCulture, "fit {0,0:0.#####} ", fit)); } Dictionary<string, double> results = null; time = Utils.MeasureTime(delegate() { results = MyMediaLite.Eval.Ratings.Evaluate(recommender, test_data); }); eval_time_stats.Add(time.TotalSeconds); MyMediaLite.Eval.Ratings.DisplayResults(results); rmse_eval_stats.Add(results["RMSE"]); Console.WriteLine(" iteration " + i); Recommender.SaveModel(recommender, save_model_file, i); if (prediction_file != string.Empty) Prediction.WritePredictions(recommender, test_data, user_mapping, item_mapping, prediction_line, prediction_file + "-it-" + i); if (epsilon > 0.0 && results["RMSE"] - rmse_eval_stats.Min() > epsilon) { Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "{0} >> {1}", results["RMSE"], rmse_eval_stats.Min())); Console.Error.WriteLine("Reached convergence on training/validation data after {0} iterations.", i); break; } if (results["RMSE"] > rmse_cutoff || results["MAE"] > mae_cutoff) { Console.Error.WriteLine("Reached cutoff after {0} iterations.", i); break; } } } // for DisplayStats(); } else { TimeSpan seconds; if (load_model_file == string.Empty) { if (cross_validation > 0) { Console.Write(recommender.ToString()); Console.WriteLine(); var split = new RatingCrossValidationSplit(training_data, cross_validation); var results = MyMediaLite.Eval.Ratings.EvaluateOnSplit(recommender, split); // TODO if (search_hp) MyMediaLite.Eval.Ratings.DisplayResults(results); no_eval = true; recommender.Ratings = training_data; } else { if (search_hp) { // TODO --search-hp-criterion=RMSE double result = NelderMead.FindMinimum("RMSE", recommender); Console.Error.WriteLine("estimated quality (on split) {0}", result.ToString(CultureInfo.InvariantCulture)); // TODO give out hp search time } Console.Write(recommender.ToString()); seconds = Utils.MeasureTime( delegate() { recommender.Train(); } ); Console.Write(" training_time " + seconds + " "); } } else { Recommender.LoadModel(recommender, load_model_file); Console.Write(recommender.ToString() + " "); } if (!no_eval) { if (online_eval) // TODO support also for prediction outputs (to allow external evaluation) seconds = Utils.MeasureTime(delegate() { MyMediaLite.Eval.Ratings.DisplayResults(MyMediaLite.Eval.Ratings.EvaluateOnline(recommender, test_data)); }); else seconds = Utils.MeasureTime(delegate() { MyMediaLite.Eval.Ratings.DisplayResults(MyMediaLite.Eval.Ratings.Evaluate(recommender, test_data)); }); Console.Write(" testing_time " + seconds); } if (compute_fit) { Console.Write("fit "); seconds = Utils.MeasureTime(delegate() { MyMediaLite.Eval.Ratings.DisplayResults(MyMediaLite.Eval.Ratings.Evaluate(recommender, training_data)); }); Console.Write(string.Format(CultureInfo.InvariantCulture, " fit_time {0,0:0.#####} ", seconds)); } if (prediction_file != string.Empty) { seconds = Utils.MeasureTime(delegate() { Console.WriteLine(); Prediction.WritePredictions(recommender, test_data, user_mapping, item_mapping, prediction_line, prediction_file); }); Console.Error.Write("predicting_time " + seconds); } Console.WriteLine(); Console.Error.WriteLine("memory {0}", Memory.Usage); } Recommender.SaveModel(recommender, save_model_file); }
public static void Main(string[] args) { // TODO add random seed // TODO report per-user times string data_file = args[0]; string method = args[1]; string options = args[2]; int num_test_users = int.Parse(args[3]); // load the data var all_data = RatingData.Read(data_file); // TODO randomize var test_users = new HashSet <int>(Enumerable.Range(0, num_test_users)); var update_indices = new List <int>(); var eval_indices = new List <int>(); foreach (int user_id in test_users) { if (all_data.ByUser[user_id].Count > 1) { var user_indices = all_data.ByUser[user_id]; for (int i = 0; i < user_indices.Count - 1; i++) { update_indices.Add(user_indices[i]); } for (int i = user_indices.Count - 1; i < user_indices.Count; i++) { eval_indices.Add(user_indices[i]); } } } var training_indices = new List <int>(); for (int i = 0; i < all_data.Count; i++) { if (!test_users.Contains(all_data.Users[i])) { training_indices.Add(i); } } var training_data = new MyMediaLite.Data.Ratings(); foreach (int i in training_indices) { training_data.Add(all_data.Users[i], all_data.Items[i], all_data[i]); } var update_data = new RatingsProxy(all_data, update_indices); var eval_data = new RatingsProxy(all_data, eval_indices); Console.Write(training_data.Statistics()); Console.Write(update_data.Statistics()); Console.Write(eval_data.Statistics()); // prepare recommender RatingPredictor recommender = method.CreateRatingPredictor(); recommender.Configure(options); recommender.Ratings = training_data; Console.WriteLine(string.Format(CultureInfo.InvariantCulture, "ratings range: [{0}, {1}]", recommender.MinRating, recommender.MaxRating)); Console.WriteLine("recommender: {0}", recommender); recommender.Train(); // I. complete retraining Console.WriteLine( "complete training: {0}", recommender.EvaluateFoldInCompleteRetraining(update_data, eval_data)); // II. online updates Console.WriteLine( "incremental training: {0}", ((IncrementalRatingPredictor)recommender).EvaluateFoldInIncrementalTraining(update_data, eval_data)); // III. fold-in Console.WriteLine( "fold-in: {0}", ((IFoldInRatingPredictor)recommender).EvaluateFoldIn(update_data, eval_data)); }
/// <summary>Evaluate an iterative recommender on the folds of a dataset split, display results on STDOUT</summary> /// <param name="recommender">an item recommender</param> /// <param name="split">a positive-only feedback dataset split</param> /// <param name="test_users">a collection of integers with all test users</param> /// <param name="candidate_items">a collection of integers with all candidate items</param> /// <param name="candidate_item_mode">the mode used to determine the candidate items</param> /// <param name="repeated_events">allow repeated events in the evaluation (i.e. items accessed by a user before may be in the recommended list)</param> /// <param name="max_iter">the maximum number of iterations</param> /// <param name="find_iter">the report interval</param> /// <param name="show_fold_results">if set to true to print per-fold results to STDERR</param> static public void DoRatingBasedRankingIterativeCrossValidation( this RatingPredictor recommender, ISplit <IRatings> split, IList <int> test_users, IList <int> candidate_items, CandidateItems candidate_item_mode, RepeatedEvents repeated_events, uint max_iter, uint find_iter = 1, bool show_fold_results = false) { if (!(recommender is IIterativeModel)) { throw new ArgumentException("recommender must be of type IIterativeModel"); } var split_recommenders = new RatingPredictor[split.NumberOfFolds]; var iterative_recommenders = new IIterativeModel[split.NumberOfFolds]; var fold_results = new ItemRecommendationEvaluationResults[split.NumberOfFolds]; // initial training and evaluation Parallel.For(0, (int)split.NumberOfFolds, i => { try { split_recommenders[i] = (RatingPredictor)recommender.Clone(); // to avoid changes in recommender split_recommenders[i].Ratings = split.Train[i]; split_recommenders[i].Train(); iterative_recommenders[i] = (IIterativeModel)split_recommenders[i]; var test_data_posonly = new PosOnlyFeedback <SparseBooleanMatrix>(split.Test[i]); var training_data_posonly = new PosOnlyFeedback <SparseBooleanMatrix>(split.Train[i]); fold_results[i] = Items.Evaluate(split_recommenders[i], test_data_posonly, training_data_posonly, test_users, candidate_items, candidate_item_mode, repeated_events); if (show_fold_results) { Console.WriteLine("fold {0} {1} iteration {2}", i, fold_results, iterative_recommenders[i].NumIter); } } catch (Exception e) { Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace); throw; } }); Console.WriteLine("{0} iteration {1}", new ItemRecommendationEvaluationResults(fold_results), iterative_recommenders[0].NumIter); // iterative training and evaluation for (int it = (int)iterative_recommenders[0].NumIter + 1; it <= max_iter; it++) { Parallel.For(0, (int)split.NumberOfFolds, i => { try { iterative_recommenders[i].Iterate(); if (it % find_iter == 0) { var test_data_posonly = new PosOnlyFeedback <SparseBooleanMatrix>(split.Test[i]); var training_data_posonly = new PosOnlyFeedback <SparseBooleanMatrix>(split.Train[i]); fold_results[i] = Items.Evaluate(split_recommenders[i], test_data_posonly, training_data_posonly, test_users, candidate_items, candidate_item_mode, repeated_events); if (show_fold_results) { Console.WriteLine("fold {0} {1} iteration {2}", i, fold_results, it); } } } catch (Exception e) { Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace); throw; } }); Console.WriteLine("{0} iteration {1}", new ItemRecommendationEvaluationResults(fold_results), it); } }
static void Main(string[] args) { AppDomain.CurrentDomain.UnhandledException += new UnhandledExceptionEventHandler(Handlers.UnhandledExceptionHandler); Console.CancelKeyPress += new ConsoleCancelEventHandler(AbortHandler); // recommender arguments string method = null; string recommender_options = string.Empty; // help/version bool show_help = false; bool show_version = false; // arguments for iteration search int max_iter = 100; string measure = "RMSE"; double epsilon = 0; double cutoff = double.MaxValue; // other arguments bool search_hp = false; int random_seed = -1; string prediction_line = "{0}\t{1}\t{2}"; string prediction_header = null; var p = new OptionSet() { // string-valued options { "training-file=", v => training_file = v }, { "test-file=", v => test_file = v }, { "recommender=", v => method = v }, { "recommender-options=", v => recommender_options += " " + v }, { "data-dir=", v => data_dir = v }, { "user-attributes=", v => user_attributes_file = v }, { "item-attributes=", v => item_attributes_file = v }, { "user-relations=", v => user_relations_file = v }, { "item-relations=", v => item_relations_file = v }, { "save-model=", v => save_model_file = v }, { "load-model=", v => load_model_file = v }, { "save-user-mapping=", v => save_user_mapping_file = v }, { "save-item-mapping=", v => save_item_mapping_file = v }, { "load-user-mapping=", v => load_user_mapping_file = v }, { "load-item-mapping=", v => load_item_mapping_file = v }, { "prediction-file=", v => prediction_file = v }, { "prediction-line=", v => prediction_line = v }, { "prediction-header=", v => prediction_header = v }, { "chronological-split=", v => chronological_split = v }, { "measure=", v => measure = v }, // integer-valued options { "find-iter=", (int v) => find_iter = v }, { "max-iter=", (int v) => max_iter = v }, { "random-seed=", (int v) => random_seed = v }, { "cross-validation=", (uint v) => cross_validation = v }, // double-valued options { "epsilon=", (double v) => epsilon = v }, { "cutoff=", (double v) => cutoff = v }, { "test-ratio=", (double v) => test_ratio = v }, // enum options { "rating-type=", (RatingType v) => rating_type = v }, { "file-format=", (RatingFileFormat v) => file_format = v }, // boolean options { "compute-fit", v => compute_fit = v != null }, { "online-evaluation", v => online_eval = v != null }, { "show-fold-results", v => show_fold_results = v != null }, { "search-hp", v => search_hp = v != null }, { "no-id-mapping", v => no_id_mapping = v != null }, { "help", v => show_help = v != null }, { "version", v => show_version = v != null }, }; IList<string> extra_args = p.Parse(args); // ... some more command line parameter actions ... bool no_eval = true; if (test_ratio > 0 || test_file != null || chronological_split != null) no_eval = false; if (show_version) ShowVersion(); if (show_help) Usage(0); if (random_seed != -1) MyMediaLite.Util.Random.Seed = random_seed; // set up recommender if (load_model_file != null) recommender = (RatingPredictor) Model.Load(load_model_file); else if (method != null) recommender = Recommender.CreateRatingPredictor(method); else recommender = Recommender.CreateRatingPredictor("BiasedMatrixFactorization"); // in case something went wrong ... if (recommender == null && method != null) Usage(string.Format("Unknown rating prediction method: '{0}'", method)); if (recommender == null && load_model_file != null) Abort(string.Format("Could not load model from file {0}.", load_model_file)); CheckParameters(extra_args); recommender.Configure(recommender_options, (string m) => { Console.Error.WriteLine(m); Environment.Exit(-1); }); // ID mapping objects if (file_format == RatingFileFormat.KDDCUP_2011 || no_id_mapping) { user_mapping = new IdentityMapping(); item_mapping = new IdentityMapping(); } if (load_user_mapping_file != null) user_mapping = EntityMappingExtensions.LoadMapping(load_user_mapping_file); if (load_item_mapping_file != null) item_mapping = EntityMappingExtensions.LoadMapping(load_item_mapping_file); // load all the data LoadData(!online_eval); // if requested, save ID mappings if (save_user_mapping_file != null) user_mapping.SaveMapping(save_user_mapping_file); if (save_item_mapping_file != null) item_mapping.SaveMapping(save_item_mapping_file); Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "ratings range: [{0}, {1}]", recommender.MinRating, recommender.MaxRating)); if (test_ratio > 0) { var split = new RatingsSimpleSplit(training_data, test_ratio); recommender.Ratings = training_data = split.Train[0]; test_data = split.Test[0]; Console.Error.WriteLine(string.Format( CultureInfo.InvariantCulture, "test ratio {0}", test_ratio)); } if (chronological_split != null) { var split = chronological_split_ratio != -1 ? new RatingsChronologicalSplit((ITimedRatings) training_data, chronological_split_ratio) : new RatingsChronologicalSplit((ITimedRatings) training_data, chronological_split_time); recommender.Ratings = training_data = split.Train[0]; test_data = split.Test[0]; if (test_ratio != -1) Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "test ratio (chronological) {0}", chronological_split_ratio)); else Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "split time {0}", chronological_split_time)); } Console.Write(training_data.Statistics(test_data, user_attributes, item_attributes)); if (find_iter != 0) { if ( !(recommender is IIterativeModel) ) Abort("Only iterative recommenders (interface IIterativeModel) support --find-iter=N."); Console.WriteLine(recommender.ToString()); if (cross_validation > 1) { recommender.DoIterativeCrossValidation(cross_validation, max_iter, find_iter); } else { var iterative_recommender = (IIterativeModel) recommender; var eval_stats = new List<double>(); if (load_model_file == null) recommender.Train(); if (compute_fit) Console.WriteLine("fit {0} iteration {1}", recommender.Evaluate(training_data), iterative_recommender.NumIter); Console.WriteLine("{0} iteration {1}", recommender.Evaluate(test_data), iterative_recommender.NumIter); for (int it = (int) iterative_recommender.NumIter + 1; it <= max_iter; it++) { TimeSpan time = Wrap.MeasureTime(delegate() { iterative_recommender.Iterate(); }); training_time_stats.Add(time.TotalSeconds); if (it % find_iter == 0) { if (compute_fit) { time = Wrap.MeasureTime(delegate() { Console.WriteLine("fit {0} iteration {1}", recommender.Evaluate(training_data), it); }); fit_time_stats.Add(time.TotalSeconds); } RatingPredictionEvaluationResults results = null; time = Wrap.MeasureTime(delegate() { results = recommender.Evaluate(test_data); }); eval_time_stats.Add(time.TotalSeconds); eval_stats.Add(results[measure]); Console.WriteLine("{0} iteration {1}", results, it); Model.Save(recommender, save_model_file, it); if (prediction_file != null) recommender.WritePredictions(test_data, prediction_file + "-it-" + it, user_mapping, item_mapping, prediction_line, prediction_header); if (epsilon > 0.0 && results[measure] - eval_stats.Min() > epsilon) { Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "{0} >> {1}", results["RMSE"], eval_stats.Min())); Console.Error.WriteLine("Reached convergence on training/validation data after {0} iterations.", it); break; } if (results[measure] > cutoff) { Console.Error.WriteLine("Reached cutoff after {0} iterations.", it); break; } } } // for } } else { TimeSpan seconds; Console.Write(recommender + " "); if (load_model_file == null) { if (cross_validation > 1) { Console.WriteLine(); var results = recommender.DoCrossValidation(cross_validation, compute_fit, show_fold_results); Console.Write(results); no_eval = true; } else { if (search_hp) { double result = NelderMead.FindMinimum("RMSE", recommender); Console.Error.WriteLine("estimated quality (on split) {0}", result.ToString(CultureInfo.InvariantCulture)); } seconds = Wrap.MeasureTime( delegate() { recommender.Train(); } ); Console.Write(" training_time " + seconds + " "); } } if (!no_eval) { if (online_eval) seconds = Wrap.MeasureTime(delegate() { Console.Write(recommender.EvaluateOnline(test_data)); }); else seconds = Wrap.MeasureTime(delegate() { Console.Write(recommender.Evaluate(test_data)); }); Console.Write(" testing_time " + seconds); if (compute_fit) { Console.Write("\nfit "); seconds = Wrap.MeasureTime(delegate() { Console.Write(recommender.Evaluate(training_data)); }); Console.Write(" fit_time " + seconds); } if (prediction_file != null) { Console.WriteLine(); seconds = Wrap.MeasureTime(delegate() { recommender.WritePredictions(test_data, prediction_file, user_mapping, item_mapping, prediction_line, prediction_header); }); Console.Error.Write("prediction_time " + seconds); } } Console.WriteLine(); } Model.Save(recommender, save_model_file); DisplayStats(); }
public void Evaluate(Matrix <int> ratingsMatrix, Matrix <float> weightsMatrix, Matrix <int> maxRatings, RatingPredictor ratingPredictor) { int testRatings = 0; long hits = 0; int ratings = 0; int ratingsPredicted = 0; int itemsPredicted = 0; // First type parameter is the type of the source elements // Second type parameter is the type of the thread-local variable (partition subtotal) Parallel.ForEach(maxRatings.Rows, // source collection () => new Tuple <long, int, int, int>(0, 0, 0, 0), // method to initialize the local variable (user, loop, tuple) => // method invoked by the loop on each iteration { long localHits = 0; int localTestRatings = 0; int localRatingsPredicted = 0; int localItemsPredicted = 0; foreach (int item in maxRatings[user]) { List <KeyValuePair <int, double> > predictedRatings = PredictRatings(ratingsMatrix, weightsMatrix, ratingPredictor, user, item); for (int i = 0; i < this.topK && i < predictedRatings.Count; i++) { if (predictedRatings[i].Key == item && predictedRatings[i].Value != 0) { localHits++; } } foreach (KeyValuePair <int, double> pair in predictedRatings) { if (pair.Key == item && pair.Value != 0) { localItemsPredicted++; } } localTestRatings++; } return(new Tuple <long, int, int, int>(tuple.Item1 + localHits, tuple.Item2 + localTestRatings, tuple.Item3 + localRatingsPredicted, tuple.Item4 + localItemsPredicted)); // value to be passed to next iteration }, // Method to be executed when each partition has completed. // finalResult is the final value of subtotal for a particular partition. (pair) => { Interlocked.Add(ref hits, pair.Item1); Interlocked.Add(ref testRatings, pair.Item2); Interlocked.Add(ref ratings, 101 * pair.Item2); Interlocked.Add(ref ratingsPredicted, pair.Item3); Interlocked.Add(ref itemsPredicted, pair.Item4); } ); //foreach (int user in maxRatings.Rows) //{ // Parallel.ForEach(maxRatings.GetRow(user), // source collection // () => 0, // method to initialize the local variable // (userRatings, loop, localHits) => // method invoked by the loop on each iteration // { // List<KeyValuePair<int, double>> predictedRatings = PredictRatings(ratingsMatrix, weightsMatrix, ratingPredictor, user, userRatings.Key); // for (int i = 0; i < this.topK; i++) // if (predictedRatings[i].Key == userRatings.Key) // localHits++; // return localHits; // value to be passed to next iteration // }, // // Method to be executed when each partition has completed. // // finalResult is the final value of subtotal for a particular partition. // (finalResult) => Interlocked.Add(ref hits, finalResult) // ); //} RatingsPredicted = ratingsPredicted; TotalRatings = ratings; Coverage = RatingsPredicted / TotalRatings; ItemsPredicted = itemsPredicted; Recall = (double)hits / testRatings; Precision = Recall / topK; }
private void CreateRecommender() { BiasedMatrixFactorization recommender = new BiasedMatrixFactorization(); Console.Error.Write("Reading in ratings ... "); TimeSpan time = Utils.MeasureTime(delegate() { recommender.Ratings = RatingPrediction.Read(ratings_file, user_mapping, item_mapping); }); Console.Error.WriteLine("done ({0,0:0.##}).", time.TotalSeconds.ToString(CultureInfo.InvariantCulture)); //Console.Error.Write("Reading in additional ratings ... "); //string[] rating_files = Directory.GetFiles("../../saved_data/", "user-ratings-*"); //Console.Error.WriteLine("done."); foreach (var indices_for_item in recommender.Ratings.ByItem) if (indices_for_item.Count > 0) movies_by_frequency.Add( new WeightedItem(recommender.Ratings.Items[indices_for_item[0]], indices_for_item.Count) ); movies_by_frequency.Sort(); movies_by_frequency.Reverse(); for (int i = 0; i < n_movies; i++) top_n_movies.Add( movies_by_frequency[i].item_id ); Console.Error.Write("Loading prediction model ... "); recommender.UpdateUsers = true; recommender.UpdateItems = false; recommender.BiasReg = 0.001; recommender.Regularization = 0.045; recommender.NumIter = 60; time = Utils.MeasureTime(delegate() { recommender.LoadModel(model_file); }); Console.Error.WriteLine("done ({0,0:0.##}).", time.TotalSeconds.ToString(CultureInfo.InvariantCulture)); rating_predictor = recommender; current_user_id = user_mapping.ToInternalID(current_user_external_id); //rating_predictor.AddUser(current_user_id); // add movies that were not in the training set //rating_predictor.AddItem( item_mapping.InternalIDs.Count - 1 ); PredictAllRatings(); }