/// <summary>Find the the parameters resulting in the minimal results for a given evaluation measure (1D)</summary>
        /// <remarks>The recommender will be set to the best parameter value after calling this method.</remarks>
        /// <param name="evaluation_measure">the name of the evaluation measure</param>
        /// <param name="hyperparameter_name">the name of the hyperparameter to optimize</param>
        /// <param name="hyperparameter_values">the values of the hyperparameter to try out</param>
        /// <param name="recommender">the recommender</param>
        /// <param name="split">the dataset split to use</param>
        /// <returns>the best (lowest) average value for the hyperparameter</returns>
        public static double FindMinimum(
            string evaluation_measure,
            string hyperparameter_name,
            double[] hyperparameter_values,
            RatingPredictor recommender,
            ISplit <IRatings> split)
        {
            double min_result = double.MaxValue;
            int    min_i      = -1;

            for (int i = 0; i < hyperparameter_values.Length; i++)
            {
                recommender.SetProperty(hyperparameter_name, hyperparameter_values[i].ToString(CultureInfo.InvariantCulture));
                double result = recommender.DoCrossValidation(split)[evaluation_measure];

                if (result < min_result)
                {
                    min_i      = i;
                    min_result = result;
                }
            }
            recommender.SetProperty(hyperparameter_name, hyperparameter_values[min_i].ToString(CultureInfo.InvariantCulture));

            return(min_result);
        }
Exemplo n.º 2
0
        /// <summary>Evaluate on the folds of a dataset split</summary>
        /// <param name="recommender">a rating predictor</param>
        /// <param name="split">a rating dataset split</param>
        /// <param name="show_results">set to true to print results to STDERR</param>
        /// <returns>a dictionary containing the average results over the different folds of the split</returns>
        static public Dictionary <string, double> EvaluateOnSplit(RatingPredictor recommender, ISplit <IRatings> split, bool show_results)
        {
            var avg_results = new Dictionary <string, double>();

            foreach (var key in Measures)
            {
                avg_results[key] = 0;
            }

            for (int i = 0; i < split.NumberOfFolds; i++)
            {
                var split_recommender = (RatingPredictor)recommender.Clone();                  // to avoid changes in recommender
                split_recommender.Ratings = split.Train[i];
                split_recommender.Train();
                var fold_results = Evaluate(split_recommender, split.Test[i]);

                foreach (var key in fold_results.Keys)
                {
                    avg_results[key] += fold_results[key];
                }
                if (show_results)
                {
                    Console.Error.WriteLine("fold {0}, RMSE {1,0:0.#####}, MAE {2,0:0.#####}", i, fold_results["RMSE"].ToString(CultureInfo.InvariantCulture), fold_results["MAE"].ToString(CultureInfo.InvariantCulture));
                }
            }

            foreach (var key in avg_results.Keys.ToList())
            {
                avg_results[key] /= split.NumberOfFolds;
            }

            return(avg_results);
        }
        /// <summary>Find the the parameters resulting in the minimal results for a given evaluation measure (2D)</summary>
        /// <remarks>The recommender will be set to the best parameter value after calling this method.</remarks>
        /// <param name="evaluation_measure">the name of the evaluation measure</param>
        /// <param name="hp_name1">the name of the first hyperparameter to optimize</param>
        /// <param name="hp_values1">the values of the first hyperparameter to try out</param>
        /// <param name="hp_name2">the name of the second hyperparameter to optimize</param>
        /// <param name="hp_values2">the values of the second hyperparameter to try out</param>
        /// <param name="recommender">the recommender</param>
        /// <param name="split">the dataset split to use</param>
        /// <returns>the best (lowest) average value for the hyperparameter</returns>
        public static double FindMinimum(
            string evaluation_measure,
            string hp_name1, string hp_name2,
            double[] hp_values1, double[] hp_values2,
            RatingPredictor recommender,
            ISplit <IRatings> split)
        {
            double min_result = double.MaxValue;
            int    min_i      = -1;
            int    min_j      = -1;

            for (int i = 0; i < hp_values1.Length; i++)
            {
                for (int j = 0; j < hp_values2.Length; j++)
                {
                    recommender.SetProperty(hp_name1, hp_values1[i].ToString(CultureInfo.InvariantCulture));
                    recommender.SetProperty(hp_name2, hp_values2[j].ToString(CultureInfo.InvariantCulture));

                    Console.Error.WriteLine("reg_u={0} reg_i={1}", hp_values1[i].ToString(CultureInfo.InvariantCulture), hp_values2[j].ToString(CultureInfo.InvariantCulture));                     // TODO this is not generic
                    double result = recommender.DoCrossValidation(split)[evaluation_measure];
                    if (result < min_result)
                    {
                        min_i      = i;
                        min_j      = j;
                        min_result = result;
                    }
                }
            }

            // set to best hyperparameter values
            recommender.SetProperty(hp_name1, hp_values1[min_i].ToString(CultureInfo.InvariantCulture));
            recommender.SetProperty(hp_name2, hp_values2[min_j].ToString(CultureInfo.InvariantCulture));

            return(min_result);
        }
Exemplo n.º 4
0
        /// <summary>Find best hyperparameter (according to an error measure) using Nelder-Mead search</summary>
        /// <param name="error_measure">an error measure (lower is better)</param>
        /// <param name="recommender">a rating predictor (will be set to best hyperparameter combination)</param>
        /// <returns>the estimated error of the best hyperparameter combination</returns>
        public static double FindMinimum(
            string error_measure,
            RatingPredictor recommender)
        {
            var split = new RatingsSimpleSplit(recommender.Ratings, split_ratio);
            //var split = new RatingCrossValidationSplit(recommender.Ratings, 5);

            IList <string>      hp_names;
            IList <DenseVector> initial_hp_values;

            // TODO manage this via reflection?
            if (recommender is UserItemBaseline)
            {
                hp_names          = new string[] { "reg_u", "reg_i" };
                initial_hp_values = new DenseVector[] {
                    new DenseVector(new double[] { 25, 10 }),
                    new DenseVector(new double[] { 10, 25 }),
                    new DenseVector(new double[] { 2, 5 }),
                    new DenseVector(new double[] { 5, 2 }),
                    new DenseVector(new double[] { 1, 4 }),
                    new DenseVector(new double[] { 4, 1 }),
                    new DenseVector(new double[] { 3, 3 }),
                };
            }
            else if (recommender is BiasedMatrixFactorization)
            {
                hp_names          = new string[] { "regularization", "bias_reg" };
                initial_hp_values = new DenseVector[] {                 // TODO reg_u and reg_i (in a second step?)
                    new DenseVector(new double[] { 0.1, 0 }),
                    new DenseVector(new double[] { 0.01, 0 }),
                    new DenseVector(new double[] { 0.0001, 0 }),
                    new DenseVector(new double[] { 0.00001, 0 }),
                    new DenseVector(new double[] { 0.1, 0.0001 }),
                    new DenseVector(new double[] { 0.01, 0.0001 }),
                    new DenseVector(new double[] { 0.0001, 0.0001 }),
                    new DenseVector(new double[] { 0.00001, 0.0001 }),
                };
            }
            else if (recommender is MatrixFactorization)
            {             // TODO normal interval search could be more efficient
                hp_names          = new string[] { "regularization", };
                initial_hp_values = new DenseVector[] {
                    new DenseVector(new double[] { 0.1 }),
                    new DenseVector(new double[] { 0.01 }),
                    new DenseVector(new double[] { 0.0001 }),
                    new DenseVector(new double[] { 0.00001 }),
                };
            }
            // TODO kNN-based methods
            else
            {
                throw new Exception("not prepared for type " + recommender.GetType().ToString());
            }

            return(FindMinimum(
                       error_measure,
                       hp_names, initial_hp_values, recommender, split));
        }
Exemplo n.º 5
0
        static double Run(RatingPredictor recommender, ISplit <IRatings> split, string hp_string, string evaluation_measure)
        {
            Recommender.Configure(recommender, hp_string);

            double result = Eval.Ratings.EvaluateOnSplit(recommender, split)[evaluation_measure];

            Console.Error.WriteLine("Nelder-Mead: {0}: {1}", hp_string, result.ToString(CultureInfo.InvariantCulture));
            return(result);
        }
        /// <summary>Evaluate on the folds of a dataset split</summary>
        /// <param name="recommender">a rating predictor</param>
        /// <param name="num_folds">the number of folds</param>
        /// <param name="compute_fit">if set to true measure fit on the training data as well</param>
        /// <param name="show_fold_results">if set to true to print per-fold results to STDERR</param>
        /// <returns>a dictionary containing the average results over the different folds of the split</returns>
        static public RatingPredictionEvaluationResults DoCrossValidation(
            this RatingPredictor recommender,
            uint num_folds         = 5,
            bool compute_fit       = false,
            bool show_fold_results = false)
        {
            var split = new RatingCrossValidationSplit(recommender.Ratings, num_folds);

            return(recommender.DoCrossValidation(split, compute_fit, show_fold_results));
        }
        /// <summary>Evaluate an iterative recommender on the folds of a dataset split, display results on STDOUT</summary>
        /// <param name="recommender">a rating predictor</param>
        /// <param name="num_folds">the number of folds</param>
        /// <param name="max_iter">the maximum number of iterations</param>
        /// <param name="find_iter">the report interval</param>
        /// <param name="show_fold_results">if set to true to print per-fold results to STDERR</param>
        static public void DoIterativeCrossValidation(
            this RatingPredictor recommender,
            uint num_folds,
            uint max_iter,
            uint find_iter         = 1,
            bool show_fold_results = false)
        {
            var split = new RatingCrossValidationSplit(recommender.Ratings, num_folds);

            recommender.DoIterativeCrossValidation(split, max_iter, find_iter, show_fold_results);
        }
        /// <summary>Evaluate on the folds of a dataset split</summary>
        /// <param name="recommender">an item recommender</param>
        /// <param name="num_folds">the number of folds</param>
        /// <param name="candidate_items">a collection of integers with all candidate items</param>
        /// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
        /// <param name="compute_fit">if set to true measure fit on the training data as well</param>
        /// <param name="show_results">set to true to print results to STDERR</param>
        /// <returns>a dictionary containing the average results over the different folds of the split</returns>
        static public EvaluationResults DoRatingBasedRankingCrossValidation(
            this RatingPredictor recommender,
            uint num_folds,
            IList <int> candidate_items,
            CandidateItems candidate_item_mode = CandidateItems.OVERLAP,
            bool compute_fit  = false,
            bool show_results = false)
        {
            var split = new RatingCrossValidationSplit(recommender.Ratings, num_folds);

            return(recommender.DoRatingBasedRankingCrossValidation(split, candidate_items, candidate_item_mode, compute_fit, show_results));
        }
Exemplo n.º 9
0
    private void CreateRecommender()
    {
        BiasedMatrixFactorization recommender = new BiasedMatrixFactorization();

        Console.Error.Write("Reading in ratings ... ");
        TimeSpan time = Utils.MeasureTime(delegate() {
            recommender.Ratings = RatingPrediction.Read(ratings_file, user_mapping, item_mapping);
        });

        Console.Error.WriteLine("done ({0,0:0.##}).", time.TotalSeconds.ToString(CultureInfo.InvariantCulture));

        //Console.Error.Write("Reading in additional ratings ... ");
        //string[] rating_files = Directory.GetFiles("../../saved_data/", "user-ratings-*");
        //Console.Error.WriteLine("done.");

        foreach (var indices_for_item in recommender.Ratings.ByItem)
        {
            if (indices_for_item.Count > 0)
            {
                movies_by_frequency.Add(new WeightedItem(recommender.Ratings.Items[indices_for_item[0]], indices_for_item.Count));
            }
        }
        movies_by_frequency.Sort();
        movies_by_frequency.Reverse();
        for (int i = 0; i < n_movies; i++)
        {
            top_n_movies.Add(movies_by_frequency[i].item_id);
        }

        Console.Error.Write("Loading prediction model ... ");
        recommender.UpdateUsers    = true;
        recommender.UpdateItems    = false;
        recommender.BiasReg        = 0.001;
        recommender.Regularization = 0.045;
        recommender.NumIter        = 60;
        time = Utils.MeasureTime(delegate() {
            recommender.LoadModel(model_file);
        });
        Console.Error.WriteLine("done ({0,0:0.##}).", time.TotalSeconds.ToString(CultureInfo.InvariantCulture));

        rating_predictor = recommender;

        current_user_id = user_mapping.ToInternalID(current_user_external_id);
        //rating_predictor.AddUser(current_user_id);

        // add movies that were not in the training set
        //rating_predictor.AddItem( item_mapping.InternalIDs.Count - 1 );

        PredictAllRatings();
    }
        /// <summary>Evaluate an iterative recommender on the folds of a dataset split, display results on STDOUT</summary>
        /// <param name="recommender">an item recommender</param>
        /// <param name="num_folds">the number of folds</param>
        /// <param name="test_users">a collection of integers with all test users</param>
        /// <param name="candidate_items">a collection of integers with all candidate items</param>
        /// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
        /// <param name="repeated_events">allow repeated events in the evaluation (i.e. items accessed by a user before may be in the recommended list)</param>
        /// <param name="max_iter">the maximum number of iterations</param>
        /// <param name="find_iter">the report interval</param>
        /// <param name="show_fold_results">if set to true to print per-fold results to STDERR</param>
        static public void DoRatingBasedRankingIterativeCrossValidation(
            this RatingPredictor recommender,
            uint num_folds,
            IList <int> test_users,
            IList <int> candidate_items,
            CandidateItems candidate_item_mode,
            RepeatedEvents repeated_events,
            uint max_iter,
            uint find_iter         = 1,
            bool show_fold_results = false)
        {
            var split = new RatingCrossValidationSplit(recommender.Ratings, num_folds);

            recommender.DoRatingBasedRankingIterativeCrossValidation(split, test_users, candidate_items, candidate_item_mode, repeated_events, max_iter, find_iter);
        }
Exemplo n.º 11
0
        // TODO get rid of recommender argument
        /// <summary>Display dataset statistics</summary>
        /// <param name="train">the training data</param>
        /// <param name="test">the test data</param>
        /// <param name="recommender">the recommender (to get attribute information)</param>
        /// <param name="display_overlap">if set true, display the user/item overlap between train and test</param>
        public static void DisplayDataStats(IRatings train, IRatings test, RatingPredictor recommender, bool display_overlap)
        {
            // training data stats
            int    num_users   = train.AllUsers.Count;
            int    num_items   = train.AllItems.Count;
            long   matrix_size = (long)num_users * num_items;
            long   empty_size  = (long)matrix_size - train.Count;
            double sparsity    = (double)100L * empty_size / matrix_size;

            Console.WriteLine(string.Format(CultureInfo.InvariantCulture, "training data: {0} users, {1} items, {2} ratings, sparsity {3,0:0.#####}", num_users, num_items, train.Count, sparsity));

            // test data stats
            if (test != null)
            {
                num_users   = test.AllUsers.Count;
                num_items   = test.AllItems.Count;
                matrix_size = (long)num_users * num_items;
                empty_size  = (long)matrix_size - test.Count;
                sparsity    = (double)100L * empty_size / matrix_size;
                Console.WriteLine(string.Format(CultureInfo.InvariantCulture, "test data:     {0} users, {1} items, {2} ratings, sparsity {3,0:0.#####}", num_users, num_items, test.Count, sparsity));
            }

            // count and display the overlap between train and test
            if (display_overlap && test != null)
            {
                int      num_new_users = 0;
                int      num_new_items = 0;
                TimeSpan seconds       = Utils.MeasureTime(delegate() {
                    num_new_users = test.AllUsers.Except(train.AllUsers).Count();
                    num_new_items = test.AllItems.Except(train.AllItems).Count();
                });
                Console.WriteLine("{0} new users, {1} new items ({2} seconds)", num_new_users, num_new_items, seconds);
            }

            // attribute stats
            if (recommender != null)
            {
                if (recommender is IUserAttributeAwareRecommender)
                {
                    Console.WriteLine("{0} user attributes", ((IUserAttributeAwareRecommender)recommender).NumUserAttributes);
                }
                if (recommender is IItemAttributeAwareRecommender)
                {
                    Console.WriteLine("{0} item attributes", ((IItemAttributeAwareRecommender)recommender).NumItemAttributes);
                }
            }
        }
        /// <summary>Evaluate on the folds of a dataset split</summary>
        /// <param name="recommender">a rating predictor</param>
        /// <param name="split">a rating dataset split</param>
        /// <param name="compute_fit">if set to true measure fit on the training data as well</param>
        /// <param name="show_fold_results">set to true to print per-fold results to STDERR</param>
        /// <returns>a dictionary containing the average results over the different folds of the split</returns>
        static public RatingPredictionEvaluationResults DoCrossValidation(
            this RatingPredictor recommender,
            ISplit <IRatings> split,
            bool compute_fit       = false,
            bool show_fold_results = false)
        {
            var fold_results = new RatingPredictionEvaluationResults[split.NumberOfFolds];

            Parallel.For(0, (int)split.NumberOfFolds, i =>
            {
                try
                {
                    var split_recommender     = (RatingPredictor)recommender.Clone();                  // to avoid changes in recommender
                    split_recommender.Ratings = split.Train[i];
                    if (recommender is ITransductiveRatingPredictor)
                    {
                        ((ITransductiveRatingPredictor)split_recommender).AdditionalFeedback = split.Test[i];
                    }
                    split_recommender.Train();
                    fold_results[i] = Ratings.Evaluate(split_recommender, split.Test[i]);
                    if (compute_fit)
                    {
                        fold_results[i]["fit"] = (float)split_recommender.ComputeFit();
                    }

                    if (show_fold_results)
                    {
                        Console.Error.WriteLine("fold {0} {1}", i, fold_results[i]);
                    }
                }
                catch (Exception e)
                {
                    Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace);
                    throw;
                }
            });

            return(new RatingPredictionEvaluationResults(fold_results));
        }
Exemplo n.º 13
0
    static void UserItemStart()
    {
        FileReader fReader = new FileReader();
        Dictionary<int, UserPref> userList = fReader.GetData();
        Dictionary<string, float> fileMetaData = fReader.GetMetaData();

        FormulaFactory forFactory = new FormulaFactory();
        NeighbourFinder nFinder = new NeighbourFinder(forFactory.GetFormula(fileMetaData));

        int targetUserID = 186;
        Dictionary<int, double> neighbourList = nFinder.FindNeighbours(targetUserID, userList);

        foreach (var neighbour in neighbourList)
        {
            Console.WriteLine("Neighbour {0} has a similarity of {1}", neighbour.Key, neighbour.Value);
        }

        RatingPredictor rPredictor = new RatingPredictor();
        rPredictor.PredictRatings(userList, targetUserID, neighbourList);
        rPredictor.GetTopPredictedRatings(8);

        Console.Read();
    }
        /// <summary>Evaluate an iterative recommender on the folds of a dataset split, display results on STDOUT</summary>
        /// <param name="recommender">a rating predictor</param>
        /// <param name="split">a rating dataset split</param>
        /// <param name="max_iter">the maximum number of iterations</param>
        /// <param name="find_iter">the report interval</param>
        /// <param name="show_fold_results">if set to true to print per-fold results to STDERR</param>
        static public void DoIterativeCrossValidation(
            this RatingPredictor recommender,
            ISplit <IRatings> split,
            uint max_iter,
            uint find_iter         = 1,
            bool show_fold_results = false)
        {
            if (!(recommender is IIterativeModel))
            {
                throw new ArgumentException("recommender must be of type IIterativeModel");
            }

            var split_recommenders     = new RatingPredictor[split.NumberOfFolds];
            var iterative_recommenders = new IIterativeModel[split.NumberOfFolds];
            var fold_results           = new RatingPredictionEvaluationResults[split.NumberOfFolds];

            // initial training and evaluation
            Parallel.For(0, (int)split.NumberOfFolds, i =>
            {
                try
                {
                    split_recommenders[i]         = (RatingPredictor)recommender.Clone();              // to avoid changes in recommender
                    split_recommenders[i].Ratings = split.Train[i];
                    if (recommender is ITransductiveRatingPredictor)
                    {
                        ((ITransductiveRatingPredictor)split_recommenders[i]).AdditionalFeedback = split.Test[i];
                    }
                    split_recommenders[i].Train();
                    iterative_recommenders[i] = (IIterativeModel)split_recommenders[i];
                    fold_results[i]           = Ratings.Evaluate(split_recommenders[i], split.Test[i]);

                    if (show_fold_results)
                    {
                        Console.Error.WriteLine("fold {0} {1} iteration {2}", i, fold_results[i], iterative_recommenders[i].NumIter);
                    }
                }
                catch (Exception e)
                {
                    Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace);
                    throw;
                }
            });
            Console.WriteLine("{0} iteration {1}", new RatingPredictionEvaluationResults(fold_results), iterative_recommenders[0].NumIter);

            // iterative training and evaluation
            for (int it = (int)iterative_recommenders[0].NumIter + 1; it <= max_iter; it++)
            {
                Parallel.For(0, (int)split.NumberOfFolds, i =>
                {
                    try
                    {
                        iterative_recommenders[i].Iterate();

                        if (it % find_iter == 0)
                        {
                            fold_results[i] = Ratings.Evaluate(split_recommenders[i], split.Test[i]);
                            if (show_fold_results)
                            {
                                Console.Error.WriteLine("fold {0} {1} iteration {2}", i, fold_results[i], it);
                            }
                        }
                    }
                    catch (Exception e)
                    {
                        Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace);
                        throw;
                    }
                });
                Console.WriteLine("{0} iteration {1}", new RatingPredictionEvaluationResults(fold_results), it);
            }
        }
Exemplo n.º 15
0
 /// <summary>
 ///
 /// </summary>
 /// <param name="evaluation_measure">the name of the evaluation measure</param>///
 /// <param name="recommender">the recommender</param>
 public NelderMead(string evaluation_measure, RatingPredictor recommender)
 {
     this.evaluation_measure = evaluation_measure;
     this.recommender        = recommender;
     Init();
 }
Exemplo n.º 16
0
 /// <summary>Evaluate on the folds of a dataset split</summary>
 /// <param name="recommender">a rating predictor</param>
 /// <param name="split">a rating dataset split</param>
 /// <returns>a dictionary containing the average results over the different folds of the split</returns>
 static public Dictionary <string, double> EvaluateOnSplit(RatingPredictor recommender, ISplit <IRatings> split)
 {
     return(EvaluateOnSplit(recommender, split, false));
 }
Exemplo n.º 17
0
    static void Main(string[] args)
    {
        AppDomain.CurrentDomain.UnhandledException += new UnhandledExceptionEventHandler(Handlers.UnhandledExceptionHandler);
        Console.CancelKeyPress += new ConsoleCancelEventHandler(AbortHandler);

        // check number of command line parameters
        if (args.Length < 1)
            Usage("Not enough arguments.");

        // read command line parameters
        string method = args[0];

        RecommenderParameters parameters = null;
        try	{ parameters = new RecommenderParameters(args, 1); }
        catch (ArgumentException e) { Usage(e.Message);	}

        // arguments for iteration search
        find_iter   = parameters.GetRemoveInt32(  "find_iter",   0);
        max_iter    = parameters.GetRemoveInt32(  "max_iter",    500);
        compute_fit = parameters.GetRemoveBool(   "compute_fit", false);
        epsilon     = parameters.GetRemoveDouble( "epsilon",     0);
        rmse_cutoff = parameters.GetRemoveDouble( "rmse_cutoff", double.MaxValue);
        mae_cutoff  = parameters.GetRemoveDouble( "mae_cutoff",  double.MaxValue);

        // data arguments
        string data_dir  = parameters.GetRemoveString( "data_dir");
        track2           = parameters.GetRemoveBool(   "track2", false);
        if (data_dir != string.Empty)
            data_dir = data_dir + (track2 ? "/mml-track2" : "/track1");
        else
            data_dir = track2 ? "/mml-track2" : "track1";
        sample_data      = parameters.GetRemoveBool(   "sample_data", false);

        // other arguments
        save_model_file  = parameters.GetRemoveString( "save_model");
        load_model_file  = parameters.GetRemoveString( "load_model");
        int random_seed  = parameters.GetRemoveInt32(  "random_seed",      -1);
        no_eval          = parameters.GetRemoveBool(   "no_eval",          false);
        prediction_file  = parameters.GetRemoveString( "prediction_file");
        cross_validation = parameters.GetRemoveUInt32( "cross_validation", 0);
        good_rating_prob = parameters.GetRemoveBool(   "good_rating_prob", false);

        if (random_seed != -1)
            MyMediaLite.Util.Random.Seed = random_seed;

        recommender = Recommender.CreateRatingPredictor(method);
        if (recommender == null)
            Usage(string.Format("Unknown method: '{0}'", method));

        Recommender.Configure(recommender, parameters, Usage);

        if (parameters.CheckForLeftovers())
            Usage(-1);

        // load all the data
        TimeSpan loading_time = Wrap.MeasureTime(delegate() { LoadData(data_dir); });
        Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "loading_time {0,0:0.##}", loading_time.TotalSeconds));

        recommender.Ratings = training_ratings;

        Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "ratings range: [{0}, {1}]", recommender.MinRating, recommender.MaxRating));

        if (load_model_file != string.Empty)
            Model.Load(recommender, load_model_file);

        DoTrack1();

        Console.Error.WriteLine("memory {0}", Memory.Usage);
    }
Exemplo n.º 18
0
    static void Main(string[] args)
    {
        Assembly assembly = Assembly.GetExecutingAssembly();

        Assembly.LoadFile(Path.GetDirectoryName(assembly.Location) + Path.DirectorySeparatorChar + "MyMediaLiteExperimental.dll");

        AppDomain.CurrentDomain.UnhandledException += new UnhandledExceptionEventHandler(Handlers.UnhandledExceptionHandler);
        Console.CancelKeyPress += new ConsoleCancelEventHandler(AbortHandler);

        // recommender arguments
        string method = "BiasedMatrixFactorization";
        string recommender_options = string.Empty;

        // help/version
        bool show_help    = false;
        bool show_version = false;

        // arguments for iteration search
        int    find_iter   = 0;
        int    max_iter    = 500;
        double epsilon     = 0;
        double rmse_cutoff = double.MaxValue;
        double mae_cutoff  = double.MaxValue;

        // data arguments
        string data_dir             = string.Empty;
        string user_attributes_file = string.Empty;
        string item_attributes_file = string.Empty;
        string user_relations_file  = string.Empty;
        string item_relations_file  = string.Empty;

        // other arguments
        bool   online_eval      = false;
        bool   search_hp        = false;
        string save_model_file  = string.Empty;
        string load_model_file  = string.Empty;
        int    random_seed      = -1;
        string prediction_file  = string.Empty;
        string prediction_line  = "{0}\t{1}\t{2}";
        int    cross_validation = 0;
        double split_ratio      = 0;

        var p = new OptionSet()
        {
            // string-valued options
            { "training-file=", v => training_file = v },
            { "test-file=", v => test_file = v },
            { "recommender=", v => method = v },
            { "recommender-options=", v => recommender_options += " " + v },
            { "data-dir=", v => data_dir = v },
            { "user-attributes=", v => user_attributes_file = v },
            { "item-attributes=", v => item_attributes_file = v },
            { "user-relations=", v => user_relations_file = v },
            { "item-relations=", v => item_relations_file = v },
            { "save-model=", v => save_model_file = v },
            { "load-model=", v => load_model_file = v },
            { "prediction-file=", v => prediction_file = v },
            { "prediction-line=", v => prediction_line = v },
            // integer-valued options
            { "find-iter=", (int v) => find_iter = v },
            { "max-iter=", (int v) => max_iter = v },
            { "random-seed=", (int v) => random_seed = v },
            { "cross-validation=", (int v) => cross_validation = v },
            // double-valued options
            { "epsilon=", (double v) => epsilon = v },
            { "rmse-cutoff=", (double v) => rmse_cutoff = v },
            { "mae-cutoff=", (double v) => mae_cutoff = v },
            { "split-ratio=", (double v) => split_ratio = v },
            // enum options
            { "rating-type=", (RatingType v) => rating_type = v },
            { "file-format=", (RatingFileFormat v) => file_format = v },
            // boolean options
            { "compute-fit", v => compute_fit = v != null },
            { "online-evaluation", v => online_eval = v != null },
            { "search-hp", v => search_hp = v != null },
            { "help", v => show_help = v != null },
            { "version", v => show_version = v != null },
        };
        IList <string> extra_args = p.Parse(args);

        // TODO make sure interaction of --find-iter and --cross-validation works properly

        bool no_eval = test_file == null;

        if (show_version)
        {
            ShowVersion();
        }
        if (show_help)
        {
            Usage(0);
        }

        if (extra_args.Count > 0)
        {
            Usage("Did not understand " + extra_args[0]);
        }

        if (training_file == null)
        {
            Usage("Parameter --training-file=FILE is missing.");
        }

        if (cross_validation != 0 && split_ratio != 0)
        {
            Usage("--cross-validation=K and --split-ratio=NUM are mutually exclusive.");
        }

        if (random_seed != -1)
        {
            MyMediaLite.Util.Random.InitInstance(random_seed);
        }

        recommender = Recommender.CreateRatingPredictor(method);
        if (recommender == null)
        {
            Usage(string.Format("Unknown method: '{0}'", method));
        }

        Recommender.Configure(recommender, recommender_options, Usage);

        // ID mapping objects
        if (file_format == RatingFileFormat.KDDCUP_2011)
        {
            user_mapping = new IdentityMapping();
            item_mapping = new IdentityMapping();
        }

        // load all the data
        LoadData(data_dir, user_attributes_file, item_attributes_file, user_relations_file, item_relations_file, !online_eval);

        Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "ratings range: [{0}, {1}]", recommender.MinRating, recommender.MaxRating));

        if (split_ratio > 0)
        {
            var split = new RatingsSimpleSplit(training_data, split_ratio);
            recommender.Ratings = split.Train[0];
            training_data       = split.Train[0];
            test_data           = split.Test[0];
        }

        Utils.DisplayDataStats(training_data, test_data, recommender);

        if (find_iter != 0)
        {
            if (!(recommender is IIterativeModel))
            {
                Usage("Only iterative recommenders support find_iter.");
            }
            var iterative_recommender = (IIterativeModel)recommender;
            Console.WriteLine(recommender.ToString() + " ");

            if (load_model_file == string.Empty)
            {
                recommender.Train();
            }
            else
            {
                Recommender.LoadModel(iterative_recommender, load_model_file);
            }

            if (compute_fit)
            {
                Console.Write(string.Format(CultureInfo.InvariantCulture, "fit {0,0:0.#####} ", iterative_recommender.ComputeFit()));
            }

            MyMediaLite.Eval.Ratings.DisplayResults(MyMediaLite.Eval.Ratings.Evaluate(recommender, test_data));
            Console.WriteLine(" iteration " + iterative_recommender.NumIter);

            for (int i = (int)iterative_recommender.NumIter + 1; i <= max_iter; i++)
            {
                TimeSpan time = Utils.MeasureTime(delegate() {
                    iterative_recommender.Iterate();
                });
                training_time_stats.Add(time.TotalSeconds);

                if (i % find_iter == 0)
                {
                    if (compute_fit)
                    {
                        double fit = 0;
                        time = Utils.MeasureTime(delegate() {
                            fit = iterative_recommender.ComputeFit();
                        });
                        fit_time_stats.Add(time.TotalSeconds);
                        Console.Write(string.Format(CultureInfo.InvariantCulture, "fit {0,0:0.#####} ", fit));
                    }

                    Dictionary <string, double> results = null;
                    time = Utils.MeasureTime(delegate() { results = MyMediaLite.Eval.Ratings.Evaluate(recommender, test_data); });
                    eval_time_stats.Add(time.TotalSeconds);
                    MyMediaLite.Eval.Ratings.DisplayResults(results);
                    rmse_eval_stats.Add(results["RMSE"]);
                    Console.WriteLine(" iteration " + i);

                    Recommender.SaveModel(recommender, save_model_file, i);
                    if (prediction_file != string.Empty)
                    {
                        Prediction.WritePredictions(recommender, test_data, user_mapping, item_mapping, prediction_line, prediction_file + "-it-" + i);
                    }

                    if (epsilon > 0.0 && results["RMSE"] - rmse_eval_stats.Min() > epsilon)
                    {
                        Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "{0} >> {1}", results["RMSE"], rmse_eval_stats.Min()));
                        Console.Error.WriteLine("Reached convergence on training/validation data after {0} iterations.", i);
                        break;
                    }
                    if (results["RMSE"] > rmse_cutoff || results["MAE"] > mae_cutoff)
                    {
                        Console.Error.WriteLine("Reached cutoff after {0} iterations.", i);
                        break;
                    }
                }
            }             // for

            DisplayStats();
        }
        else
        {
            TimeSpan seconds;

            if (load_model_file == string.Empty)
            {
                if (cross_validation > 0)
                {
                    Console.Write(recommender.ToString());
                    Console.WriteLine();
                    var split   = new RatingCrossValidationSplit(training_data, cross_validation);
                    var results = MyMediaLite.Eval.Ratings.EvaluateOnSplit(recommender, split);                     // TODO if (search_hp)
                    MyMediaLite.Eval.Ratings.DisplayResults(results);
                    no_eval             = true;
                    recommender.Ratings = training_data;
                }
                else
                {
                    if (search_hp)
                    {
                        // TODO --search-hp-criterion=RMSE
                        double result = NelderMead.FindMinimum("RMSE", recommender);
                        Console.Error.WriteLine("estimated quality (on split) {0}", result.ToString(CultureInfo.InvariantCulture));
                        // TODO give out hp search time
                    }

                    Console.Write(recommender.ToString());
                    seconds = Utils.MeasureTime(delegate() { recommender.Train(); });
                    Console.Write(" training_time " + seconds + " ");
                }
            }
            else
            {
                Recommender.LoadModel(recommender, load_model_file);
                Console.Write(recommender.ToString() + " ");
            }

            if (!no_eval)
            {
                if (online_eval)                  // TODO support also for prediction outputs (to allow external evaluation)
                {
                    seconds = Utils.MeasureTime(delegate() { MyMediaLite.Eval.Ratings.DisplayResults(MyMediaLite.Eval.Ratings.EvaluateOnline(recommender, test_data)); });
                }
                else
                {
                    seconds = Utils.MeasureTime(delegate() { MyMediaLite.Eval.Ratings.DisplayResults(MyMediaLite.Eval.Ratings.Evaluate(recommender, test_data)); });
                }

                Console.Write(" testing_time " + seconds);
            }

            if (compute_fit)
            {
                Console.Write("fit ");
                seconds = Utils.MeasureTime(delegate() {
                    MyMediaLite.Eval.Ratings.DisplayResults(MyMediaLite.Eval.Ratings.Evaluate(recommender, training_data));
                });
                Console.Write(string.Format(CultureInfo.InvariantCulture, " fit_time {0,0:0.#####} ", seconds));
            }

            if (prediction_file != string.Empty)
            {
                seconds = Utils.MeasureTime(delegate() {
                    Console.WriteLine();
                    Prediction.WritePredictions(recommender, test_data, user_mapping, item_mapping, prediction_line, prediction_file);
                });
                Console.Error.Write("predicting_time " + seconds);
            }

            Console.WriteLine();
            Console.Error.WriteLine("memory {0}", Memory.Usage);
        }
        Recommender.SaveModel(recommender, save_model_file);
    }
Exemplo n.º 19
0
        /// <summary>Performs user-wise fold-in evaluation, but instead of folding in perform a complete re-training with the new data</summary>
        /// <remarks>
        /// This method can be quite slow.
        /// </remarks>
        /// <returns>the evaluation results</returns>
        /// <param name='recommender'>a rating predictor capable of performing a user fold-in</param>
        /// <param name='update_data'>the rating data used to represent the users</param>
        /// <param name='eval_data'>the evaluation data</param>
        static public RatingPredictionEvaluationResults EvaluateFoldInCompleteRetraining(this RatingPredictor recommender, IRatings update_data, IRatings eval_data)
        {
            double rmse = 0;
            double mae  = 0;
            double cbd  = 0;

            int rating_count = 0;

            foreach (int user_id in update_data.AllUsers)
            {
                if (eval_data.AllUsers.Contains(user_id))
                {
                    var local_recommender = (RatingPredictor)recommender.Clone();

                    var known_ratings = new RatingsProxy(update_data, update_data.ByUser[user_id]);
                    local_recommender.Ratings = new CombinedRatings(recommender.Ratings, known_ratings);
                    local_recommender.Train();

                    var items_to_rate     = (from index in eval_data.ByUser[user_id] select eval_data.Items[index]).ToArray();
                    var predicted_ratings = recommender.Recommend(user_id, candidate_items: items_to_rate);

                    foreach (var pred in predicted_ratings)
                    {
                        float prediction    = pred.Item2;
                        float actual_rating = eval_data.Get(user_id, pred.Item1, eval_data.ByUser[user_id]);
                        float error         = prediction - actual_rating;

                        rmse += error * error;
                        mae  += Math.Abs(error);
                        cbd  += Eval.Ratings.ComputeCBD(actual_rating, prediction, recommender.MinRating, recommender.MaxRating);
                        rating_count++;
                    }
                    Console.Error.Write(".");
                }
            }

            mae  = mae / rating_count;
            rmse = Math.Sqrt(rmse / rating_count);
            cbd  = cbd / rating_count;

            var result = new RatingPredictionEvaluationResults();

            result["RMSE"] = (float)rmse;
            result["MAE"]  = (float)mae;
            result["NMAE"] = (float)mae / (recommender.MaxRating - recommender.MinRating);
            result["CBD"]  = (float)cbd;
            return(result);
        }
        private List <KeyValuePair <int, double> > PredictRatings(Matrix <int> ratingsMatrix, Matrix <float> weightsMatrix, RatingPredictor ratingPredictor, int user, int item)
        {
            List <KeyValuePair <int, double> > predictions = new List <KeyValuePair <int, double> >();
            HashSet <int> randomItems = new HashSet <int>();

            randomItems.Add(item);

            Random random = new Random();

            while (randomItems.Count < randomRatingsCount + 1)
            {
                int randomMovie = random.Next(this.moviesCount);
                if (!ratingsMatrix[user].Contains(randomMovie))
                {
                    randomItems.Add(randomMovie);
                }
            }

            foreach (int i in randomItems)
            {
                double predictedRating = ratingPredictor.PredictRating(ratingsMatrix, weightsMatrix, user, i, false);
                if (predictedRating > 0)
                {
                    predictions.Add(new KeyValuePair <int, double>(i, predictedRating));
                }
            }

            List <KeyValuePair <int, double> > orderedPredictions = predictions.OrderByDescending(x => x.Value).ToList();

            return(orderedPredictions);
        }
Exemplo n.º 21
0
 /// <summary>Computes the RMSE fit of a recommender on the training data</summary>
 /// <returns>the RMSE on the training data</returns>
 /// <param name='recommender'>the rating predictor to evaluate</param>
 public static double ComputeFit(this RatingPredictor recommender)
 {
     return(recommender.Evaluate(recommender.Ratings)["RMSE"]);
 }
Exemplo n.º 22
0
 /// <summary>Display dataset statistics</summary>
 /// <param name="train">the training data</param>
 /// <param name="test">the test data</param>
 /// <param name="recommender">the recommender (to get attribute information)</param>
 public static void DisplayDataStats(IRatings train, IRatings test, RatingPredictor recommender)
 {
     DisplayDataStats(train, test, recommender, false);
 }
        /// <summary>Evaluate on the folds of a dataset split</summary>
        /// <param name="recommender">an item recommender</param>
        /// <param name="split">a dataset split</param>
        /// <param name="candidate_items">a collection of integers with all candidate items</param>
        /// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
        /// <param name="compute_fit">if set to true measure fit on the training data as well</param>
        /// <param name="show_results">set to true to print results to STDERR</param>
        /// <returns>a dictionary containing the average results over the different folds of the split</returns>
        static public EvaluationResults DoRatingBasedRankingCrossValidation(
            this RatingPredictor recommender,
            ISplit <IRatings> split,
            IList <int> candidate_items,
            CandidateItems candidate_item_mode = CandidateItems.OVERLAP,
            bool compute_fit  = false,
            bool show_results = false)
        {
            var avg_results = new ItemRecommendationEvaluationResults();

            Parallel.For(0, (int)split.NumberOfFolds, fold =>
            {
                try
                {
                    var split_recommender     = (RatingPredictor)recommender.Clone();                  // avoid changes in recommender
                    split_recommender.Ratings = split.Train[fold];
                    split_recommender.Train();

                    var test_data_posonly     = new PosOnlyFeedback <SparseBooleanMatrix>(split.Test[fold]);
                    var training_data_posonly = new PosOnlyFeedback <SparseBooleanMatrix>(split.Train[fold]);
                    IList <int> test_users    = test_data_posonly.AllUsers;
                    var fold_results          = Items.Evaluate(split_recommender, test_data_posonly, training_data_posonly, test_users, candidate_items, candidate_item_mode);
                    if (compute_fit)
                    {
                        fold_results["fit"] = (float)split_recommender.ComputeFit();
                    }

                    // thread-safe stats
                    lock (avg_results)
                        foreach (var key in fold_results.Keys)
                        {
                            if (avg_results.ContainsKey(key))
                            {
                                avg_results[key] += fold_results[key];
                            }
                            else
                            {
                                avg_results[key] = fold_results[key];
                            }
                        }

                    if (show_results)
                    {
                        Console.Error.WriteLine("fold {0} {1}", fold, fold_results);
                    }
                }
                catch (Exception e)
                {
                    Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace);
                    throw;
                }
            });

            foreach (var key in Items.Measures)
            {
                avg_results[key] /= split.NumberOfFolds;
            }
            avg_results["num_users"] /= split.NumberOfFolds;
            avg_results["num_items"] /= split.NumberOfFolds;

            return(avg_results);
        }
Exemplo n.º 24
0
        /// <summary>Find the the parameters resulting in the minimal results for a given evaluation measure</summary>
        /// <remarks>The recommender will be set to the best parameter value after calling this method.</remarks>
        /// <param name="evaluation_measure">the name of the evaluation measure</param>
        /// <param name="hp_names">the names of the hyperparameters to optimize</param>
        /// <param name="initial_hp_values">the values of the hyperparameters to try out first</param>
        /// <param name="recommender">the recommender</param>
        /// <param name="split">the dataset split to use</param>
        /// <returns>the best (lowest) average value for the hyperparameter</returns>
        public static double FindMinimum(string evaluation_measure,
                                         IList <string> hp_names,
                                         IList <Vector> initial_hp_values,
                                         RatingPredictor recommender,         // TODO make more general?
                                         ISplit <IRatings> split)
        {
            var results    = new Dictionary <string, double>();
            var hp_vectors = new Dictionary <string, Vector>();

            // initialize
            foreach (var hp_values in initial_hp_values)
            {
                string hp_string = CreateConfigString(hp_names, hp_values);
                results[hp_string]    = Run(recommender, split, hp_string, evaluation_measure);
                hp_vectors[hp_string] = hp_values;
            }

            List <string> keys;

            for (int i = 0; i < num_it; i++)
            {
                if (results.Count != hp_vectors.Count)
                {
                    throw new Exception(string.Format("{0} vs. {1}", results.Count, hp_vectors.Count));
                }

                keys = new List <string>(results.Keys);
                keys.Sort(delegate(string k1, string k2) { return(results[k1].CompareTo(results[k2])); });

                var min_key = keys.First();
                var max_key = keys.Last();

                Console.Error.WriteLine("Nelder-Mead: iteration {0} ({1})", i, results[min_key]);

                var worst_vector = hp_vectors[max_key];
                var worst_result = results[max_key];
                hp_vectors.Remove(max_key);
                results.Remove(max_key);

                // compute center
                var center = ComputeCenter(results, hp_vectors);

                // reflection
                //Console.Error.WriteLine("ref");
                var    reflection = center + alpha * (center - worst_vector);
                string ref_string = CreateConfigString(hp_names, reflection);
                double ref_result = Run(recommender, split, ref_string, evaluation_measure);
                if (results[min_key] <= ref_result && ref_result < results.Values.Max())
                {
                    results[ref_string]    = ref_result;
                    hp_vectors[ref_string] = reflection;
                    continue;
                }

                // expansion
                if (ref_result < results[min_key])
                {
                    //Console.Error.WriteLine("exp");

                    var    expansion  = center + gamma * (center - worst_vector);
                    string exp_string = CreateConfigString(hp_names, expansion);
                    double exp_result = Run(recommender, split, exp_string, evaluation_measure);
                    if (exp_result < ref_result)
                    {
                        results[exp_string]    = exp_result;
                        hp_vectors[exp_string] = expansion;
                    }
                    else
                    {
                        results[ref_string]    = ref_result;
                        hp_vectors[ref_string] = reflection;
                    }
                    continue;
                }

                // contraction
                //Console.Error.WriteLine("con");
                var    contraction = worst_vector + rho * (center - worst_vector);
                string con_string  = CreateConfigString(hp_names, contraction);
                double con_result  = Run(recommender, split, con_string, evaluation_measure);
                if (con_result < worst_result)
                {
                    results[con_string]    = con_result;
                    hp_vectors[con_string] = contraction;
                    continue;
                }

                // reduction
                //Console.Error.WriteLine("red");
                var best_vector = hp_vectors[min_key];
                var best_result = results[min_key];
                hp_vectors.Remove(min_key);
                results.Remove(min_key);
                foreach (var key in new List <string>(results.Keys))
                {
                    var    reduction  = hp_vectors[key] + sigma * (hp_vectors[key] - best_vector);
                    string red_string = CreateConfigString(hp_names, reduction);
                    double red_result = Run(recommender, split, red_string, evaluation_measure);

                    // replace by reduced vector
                    results.Remove(key);
                    hp_vectors.Remove(key);
                    results[red_string]    = red_result;
                    hp_vectors[red_string] = reduction;
                }
                results[min_key]    = best_result;
                hp_vectors[min_key] = best_vector;
                results[max_key]    = worst_result;
                hp_vectors[max_key] = worst_vector;
            }

            keys = new List <string>(results.Keys);
            keys.Sort(delegate(string k1, string k2) { return(results[k1].CompareTo(results[k2])); });

            // set to best hyperparameter values
            Recommender.Configure(recommender, keys.First());

            return(results[keys.First()]);
        }
Exemplo n.º 25
0
    static void Main(string[] args)
    {
        Assembly assembly = Assembly.GetExecutingAssembly();

        Assembly.LoadFile(Path.GetDirectoryName(assembly.Location) + Path.DirectorySeparatorChar + "MyMediaLiteExperimental.dll");

        double min_rating = 0;
        double max_rating = 100;

        AppDomain.CurrentDomain.UnhandledException += new UnhandledExceptionEventHandler(Handlers.UnhandledExceptionHandler);
        Console.CancelKeyPress += new ConsoleCancelEventHandler(AbortHandler);

        // check number of command line parameters
        if (args.Length < 1)
        {
            Usage("Not enough arguments.");
        }

        // read command line parameters
        string method = args[0];

        RecommenderParameters parameters = null;

        try     { parameters = new RecommenderParameters(args, 1); }
        catch (ArgumentException e) { Usage(e.Message); }

        // arguments for iteration search
        find_iter   = parameters.GetRemoveInt32("find_iter", 0);
        max_iter    = parameters.GetRemoveInt32("max_iter", 500);
        compute_fit = parameters.GetRemoveBool("compute_fit", false);
        epsilon     = parameters.GetRemoveDouble("epsilon", 0);
        rmse_cutoff = parameters.GetRemoveDouble("rmse_cutoff", double.MaxValue);
        mae_cutoff  = parameters.GetRemoveDouble("mae_cutoff", double.MaxValue);

        // data arguments
        string data_dir = parameters.GetRemoveString("data_dir");

        track2 = parameters.GetRemoveBool("track2", false);
        if (data_dir != string.Empty)
        {
            data_dir = data_dir + (track2 ? "/mml-track2" : "/track1");
        }
        else
        {
            data_dir = track2 ? "/mml-track2" : "track1";
        }
        sample_data = parameters.GetRemoveBool("sample_data", false);

        // other arguments
        save_model_file = parameters.GetRemoveString("save_model");
        load_model_file = parameters.GetRemoveString("load_model");
        int random_seed = parameters.GetRemoveInt32("random_seed", -1);

        no_eval          = parameters.GetRemoveBool("no_eval", false);
        prediction_file  = parameters.GetRemoveString("prediction_file");
        cross_validation = parameters.GetRemoveInt32("cross_validation", 0);
        good_rating_prob = parameters.GetRemoveBool("good_rating_prob", false);

        if (good_rating_prob)
        {
            max_rating = 1;
        }

        if (random_seed != -1)
        {
            MyMediaLite.Util.Random.InitInstance(random_seed);
        }

        recommender = Recommender.CreateRatingPredictor(method);
        if (recommender == null)
        {
            Usage(string.Format("Unknown method: '{0}'", method));
        }

        Recommender.Configure(recommender, parameters, Usage);

        if (parameters.CheckForLeftovers())
        {
            Usage(-1);
        }

        // load all the data
        TimeSpan loading_time = Utils.MeasureTime(delegate() { LoadData(data_dir); });

        Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "loading_time {0,0:0.##}", loading_time.TotalSeconds));

        recommender.Ratings = training_ratings;

        recommender.MinRating = min_rating;
        recommender.MaxRating = max_rating;
        Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "ratings range: [{0}, {1}]", recommender.MinRating, recommender.MaxRating));

        if (load_model_file != string.Empty)
        {
            Recommender.LoadModel(recommender, load_model_file);
        }

        DoTrack1();

        Console.Error.WriteLine("memory {0}", Memory.Usage);
    }
Exemplo n.º 26
0
    static void Main(string[] args)
    {
        Assembly assembly = Assembly.GetExecutingAssembly();
        Assembly.LoadFile(Path.GetDirectoryName(assembly.Location) + Path.DirectorySeparatorChar + "MyMediaLiteExperimental.dll");

        AppDomain.CurrentDomain.UnhandledException += new UnhandledExceptionEventHandler(Handlers.UnhandledExceptionHandler);
        Console.CancelKeyPress += new ConsoleCancelEventHandler(AbortHandler);

        // recommender arguments
        string method              = "BiasedMatrixFactorization";
        string recommender_options = string.Empty;

        // help/version
        bool show_help    = false;
        bool show_version = false;

        // arguments for iteration search
        int find_iter      = 0;
        int max_iter       = 500;
        double epsilon     = 0;
        double rmse_cutoff = double.MaxValue;
        double mae_cutoff  = double.MaxValue;

        // data arguments
        string data_dir             = string.Empty;
        string user_attributes_file = string.Empty;
        string item_attributes_file = string.Empty;
        string user_relations_file  = string.Empty;
        string item_relations_file  = string.Empty;

        // other arguments
        bool online_eval       = false;
        bool search_hp         = false;
        string save_model_file = string.Empty;
        string load_model_file = string.Empty;
        int random_seed        = -1;
        string prediction_file = string.Empty;
        string prediction_line = "{0}\t{1}\t{2}";
        int cross_validation   = 0;
        double split_ratio     = 0;

           	var p = new OptionSet() {
            // string-valued options
            { "training-file=",       v              => training_file        = v },
            { "test-file=",           v              => test_file            = v },
            { "recommender=",         v              => method               = v },
            { "recommender-options=", v              => recommender_options += " " + v },
           			{ "data-dir=",            v              => data_dir             = v },
            { "user-attributes=",     v              => user_attributes_file = v },
            { "item-attributes=",     v              => item_attributes_file = v },
            { "user-relations=",      v              => user_relations_file  = v },
            { "item-relations=",      v              => item_relations_file  = v },
            { "save-model=",          v              => save_model_file      = v },
            { "load-model=",          v              => load_model_file      = v },
            { "prediction-file=",     v              => prediction_file      = v },
            { "prediction-line=",     v              => prediction_line      = v },
            // integer-valued options
           			{ "find-iter=",           (int v)        => find_iter            = v },
            { "max-iter=",            (int v)        => max_iter             = v },
            { "random-seed=",         (int v)        => random_seed          = v },
            { "cross-validation=",    (int v)        => cross_validation     = v },
            // double-valued options
            { "epsilon=",             (double v)     => epsilon              = v },
            { "rmse-cutoff=",         (double v)     => rmse_cutoff          = v },
            { "mae-cutoff=",          (double v)     => mae_cutoff           = v },
            { "split-ratio=",         (double v)     => split_ratio          = v },
            // enum options
            { "rating-type=",         (RatingType v) => rating_type          = v },
            { "file-format=",         (RatingFileFormat v) => file_format    = v },
            // boolean options
            { "compute-fit",          v => compute_fit  = v != null },
            { "online-evaluation",    v => online_eval  = v != null },
            { "search-hp",            v => search_hp    = v != null },
            { "help",                 v => show_help    = v != null },
            { "version",              v => show_version = v != null },
           	  	};
           		IList<string> extra_args = p.Parse(args);

        // TODO make sure interaction of --find-iter and --cross-validation works properly

        bool no_eval = test_file == null;

        if (show_version)
            ShowVersion();
        if (show_help)
            Usage(0);

        if (extra_args.Count > 0)
            Usage("Did not understand " + extra_args[0]);

        if (training_file == null)
            Usage("Parameter --training-file=FILE is missing.");

        if (cross_validation != 0 && split_ratio != 0)
            Usage("--cross-validation=K and --split-ratio=NUM are mutually exclusive.");

        if (random_seed != -1)
            MyMediaLite.Util.Random.InitInstance(random_seed);

        recommender = Recommender.CreateRatingPredictor(method);
        if (recommender == null)
            Usage(string.Format("Unknown method: '{0}'", method));

        Recommender.Configure(recommender, recommender_options, Usage);

        // ID mapping objects
        if (file_format == RatingFileFormat.KDDCUP_2011)
        {
            user_mapping = new IdentityMapping();
            item_mapping = new IdentityMapping();
        }

        // load all the data
        LoadData(data_dir, user_attributes_file, item_attributes_file, user_relations_file, item_relations_file, !online_eval);

        Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "ratings range: [{0}, {1}]", recommender.MinRating, recommender.MaxRating));

        if (split_ratio > 0)
        {
            var split = new RatingsSimpleSplit(training_data, split_ratio);
            recommender.Ratings = split.Train[0];
            training_data = split.Train[0];
            test_data     = split.Test[0];
        }

        Utils.DisplayDataStats(training_data, test_data, recommender);

        if (find_iter != 0)
        {
            if ( !(recommender is IIterativeModel) )
                Usage("Only iterative recommenders support find_iter.");
            var iterative_recommender = (IIterativeModel) recommender;
            Console.WriteLine(recommender.ToString() + " ");

            if (load_model_file == string.Empty)
                recommender.Train();
            else
                Recommender.LoadModel(iterative_recommender, load_model_file);

            if (compute_fit)
                Console.Write(string.Format(CultureInfo.InvariantCulture, "fit {0,0:0.#####} ", iterative_recommender.ComputeFit()));

            MyMediaLite.Eval.Ratings.DisplayResults(MyMediaLite.Eval.Ratings.Evaluate(recommender, test_data));
            Console.WriteLine(" iteration " + iterative_recommender.NumIter);

            for (int i = (int) iterative_recommender.NumIter + 1; i <= max_iter; i++)
            {
                TimeSpan time = Utils.MeasureTime(delegate() {
                    iterative_recommender.Iterate();
                });
                training_time_stats.Add(time.TotalSeconds);

                if (i % find_iter == 0)
                {
                    if (compute_fit)
                    {
                        double fit = 0;
                        time = Utils.MeasureTime(delegate() {
                            fit = iterative_recommender.ComputeFit();
                        });
                        fit_time_stats.Add(time.TotalSeconds);
                        Console.Write(string.Format(CultureInfo.InvariantCulture, "fit {0,0:0.#####} ", fit));
                    }

                    Dictionary<string, double> results = null;
                    time = Utils.MeasureTime(delegate() { results = MyMediaLite.Eval.Ratings.Evaluate(recommender, test_data); });
                    eval_time_stats.Add(time.TotalSeconds);
                    MyMediaLite.Eval.Ratings.DisplayResults(results);
                    rmse_eval_stats.Add(results["RMSE"]);
                    Console.WriteLine(" iteration " + i);

                    Recommender.SaveModel(recommender, save_model_file, i);
                    if (prediction_file != string.Empty)
                        Prediction.WritePredictions(recommender, test_data, user_mapping, item_mapping, prediction_line, prediction_file + "-it-" + i);

                    if (epsilon > 0.0 && results["RMSE"] - rmse_eval_stats.Min() > epsilon)
                    {
                        Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "{0} >> {1}", results["RMSE"], rmse_eval_stats.Min()));
                        Console.Error.WriteLine("Reached convergence on training/validation data after {0} iterations.", i);
                        break;
                    }
                    if (results["RMSE"] > rmse_cutoff || results["MAE"] > mae_cutoff)
                    {
                            Console.Error.WriteLine("Reached cutoff after {0} iterations.", i);
                            break;
                    }
                }
            } // for

            DisplayStats();
        }
        else
        {
            TimeSpan seconds;

            if (load_model_file == string.Empty)
            {
                if (cross_validation > 0)
                {
                    Console.Write(recommender.ToString());
                    Console.WriteLine();
                    var split = new RatingCrossValidationSplit(training_data, cross_validation);
                    var results = MyMediaLite.Eval.Ratings.EvaluateOnSplit(recommender, split); // TODO if (search_hp)
                    MyMediaLite.Eval.Ratings.DisplayResults(results);
                    no_eval = true;
                    recommender.Ratings = training_data;
                }
                else
                {
                    if (search_hp)
                    {
                        // TODO --search-hp-criterion=RMSE
                        double result = NelderMead.FindMinimum("RMSE", recommender);
                        Console.Error.WriteLine("estimated quality (on split) {0}", result.ToString(CultureInfo.InvariantCulture));
                        // TODO give out hp search time
                    }

                    Console.Write(recommender.ToString());
                    seconds = Utils.MeasureTime( delegate() { recommender.Train(); } );
                    Console.Write(" training_time " + seconds + " ");
                }
            }
            else
            {
                Recommender.LoadModel(recommender, load_model_file);
                Console.Write(recommender.ToString() + " ");
            }

            if (!no_eval)
            {
                if (online_eval)  // TODO support also for prediction outputs (to allow external evaluation)
                    seconds = Utils.MeasureTime(delegate() { MyMediaLite.Eval.Ratings.DisplayResults(MyMediaLite.Eval.Ratings.EvaluateOnline(recommender, test_data)); });
                else
                    seconds = Utils.MeasureTime(delegate() { MyMediaLite.Eval.Ratings.DisplayResults(MyMediaLite.Eval.Ratings.Evaluate(recommender, test_data)); });

                Console.Write(" testing_time " + seconds);
            }

            if (compute_fit)
            {
                Console.Write("fit ");
                seconds = Utils.MeasureTime(delegate() {
                    MyMediaLite.Eval.Ratings.DisplayResults(MyMediaLite.Eval.Ratings.Evaluate(recommender, training_data));
                });
                Console.Write(string.Format(CultureInfo.InvariantCulture, " fit_time {0,0:0.#####} ", seconds));
            }

            if (prediction_file != string.Empty)
            {
                seconds = Utils.MeasureTime(delegate() {
                        Console.WriteLine();
                        Prediction.WritePredictions(recommender, test_data, user_mapping, item_mapping, prediction_line, prediction_file);
                });
                Console.Error.Write("predicting_time " + seconds);
            }

            Console.WriteLine();
            Console.Error.WriteLine("memory {0}", Memory.Usage);
        }
        Recommender.SaveModel(recommender, save_model_file);
    }
    public static void Main(string[] args)
    {
        // TODO add random seed
        // TODO report per-user times

        string data_file      = args[0];
        string method         = args[1];
        string options        = args[2];
        int    num_test_users = int.Parse(args[3]);

        // load the data
        var all_data = RatingData.Read(data_file);

        // TODO randomize
        var test_users = new HashSet <int>(Enumerable.Range(0, num_test_users));

        var update_indices = new List <int>();
        var eval_indices   = new List <int>();

        foreach (int user_id in test_users)
        {
            if (all_data.ByUser[user_id].Count > 1)
            {
                var user_indices = all_data.ByUser[user_id];
                for (int i = 0; i < user_indices.Count - 1; i++)
                {
                    update_indices.Add(user_indices[i]);
                }
                for (int i = user_indices.Count - 1; i < user_indices.Count; i++)
                {
                    eval_indices.Add(user_indices[i]);
                }
            }
        }

        var training_indices = new List <int>();

        for (int i = 0; i < all_data.Count; i++)
        {
            if (!test_users.Contains(all_data.Users[i]))
            {
                training_indices.Add(i);
            }
        }
        var training_data = new MyMediaLite.Data.Ratings();

        foreach (int i in training_indices)
        {
            training_data.Add(all_data.Users[i], all_data.Items[i], all_data[i]);
        }

        var update_data = new RatingsProxy(all_data, update_indices);
        var eval_data   = new RatingsProxy(all_data, eval_indices);

        Console.Write(training_data.Statistics());
        Console.Write(update_data.Statistics());
        Console.Write(eval_data.Statistics());

        // prepare recommender
        RatingPredictor recommender = method.CreateRatingPredictor();

        recommender.Configure(options);
        recommender.Ratings = training_data;
        Console.WriteLine(string.Format(CultureInfo.InvariantCulture, "ratings range: [{0}, {1}]", recommender.MinRating, recommender.MaxRating));
        Console.WriteLine("recommender: {0}", recommender);
        recommender.Train();

        // I. complete retraining
        Console.WriteLine(
            "complete training: {0}",
            recommender.EvaluateFoldInCompleteRetraining(update_data, eval_data));

        // II. online updates
        Console.WriteLine(
            "incremental training: {0}",
            ((IncrementalRatingPredictor)recommender).EvaluateFoldInIncrementalTraining(update_data, eval_data));

        // III. fold-in
        Console.WriteLine(
            "fold-in: {0}",
            ((IFoldInRatingPredictor)recommender).EvaluateFoldIn(update_data, eval_data));
    }
        /// <summary>Evaluate an iterative recommender on the folds of a dataset split, display results on STDOUT</summary>
        /// <param name="recommender">an item recommender</param>
        /// <param name="split">a positive-only feedback dataset split</param>
        /// <param name="test_users">a collection of integers with all test users</param>
        /// <param name="candidate_items">a collection of integers with all candidate items</param>
        /// <param name="candidate_item_mode">the mode used to determine the candidate items</param>
        /// <param name="repeated_events">allow repeated events in the evaluation (i.e. items accessed by a user before may be in the recommended list)</param>
        /// <param name="max_iter">the maximum number of iterations</param>
        /// <param name="find_iter">the report interval</param>
        /// <param name="show_fold_results">if set to true to print per-fold results to STDERR</param>
        static public void DoRatingBasedRankingIterativeCrossValidation(
            this RatingPredictor recommender,
            ISplit <IRatings> split,
            IList <int> test_users,
            IList <int> candidate_items,
            CandidateItems candidate_item_mode,
            RepeatedEvents repeated_events,
            uint max_iter,
            uint find_iter         = 1,
            bool show_fold_results = false)
        {
            if (!(recommender is IIterativeModel))
            {
                throw new ArgumentException("recommender must be of type IIterativeModel");
            }

            var split_recommenders     = new RatingPredictor[split.NumberOfFolds];
            var iterative_recommenders = new IIterativeModel[split.NumberOfFolds];
            var fold_results           = new ItemRecommendationEvaluationResults[split.NumberOfFolds];

            // initial training and evaluation
            Parallel.For(0, (int)split.NumberOfFolds, i =>
            {
                try
                {
                    split_recommenders[i]         = (RatingPredictor)recommender.Clone();              // to avoid changes in recommender
                    split_recommenders[i].Ratings = split.Train[i];
                    split_recommenders[i].Train();
                    iterative_recommenders[i] = (IIterativeModel)split_recommenders[i];

                    var test_data_posonly     = new PosOnlyFeedback <SparseBooleanMatrix>(split.Test[i]);
                    var training_data_posonly = new PosOnlyFeedback <SparseBooleanMatrix>(split.Train[i]);
                    fold_results[i]           = Items.Evaluate(split_recommenders[i], test_data_posonly, training_data_posonly, test_users, candidate_items, candidate_item_mode, repeated_events);
                    if (show_fold_results)
                    {
                        Console.WriteLine("fold {0} {1} iteration {2}", i, fold_results, iterative_recommenders[i].NumIter);
                    }
                }
                catch (Exception e)
                {
                    Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace);
                    throw;
                }
            });
            Console.WriteLine("{0} iteration {1}", new ItemRecommendationEvaluationResults(fold_results), iterative_recommenders[0].NumIter);

            // iterative training and evaluation
            for (int it = (int)iterative_recommenders[0].NumIter + 1; it <= max_iter; it++)
            {
                Parallel.For(0, (int)split.NumberOfFolds, i =>
                {
                    try
                    {
                        iterative_recommenders[i].Iterate();

                        if (it % find_iter == 0)
                        {
                            var test_data_posonly     = new PosOnlyFeedback <SparseBooleanMatrix>(split.Test[i]);
                            var training_data_posonly = new PosOnlyFeedback <SparseBooleanMatrix>(split.Train[i]);

                            fold_results[i] = Items.Evaluate(split_recommenders[i], test_data_posonly, training_data_posonly, test_users, candidate_items, candidate_item_mode, repeated_events);
                            if (show_fold_results)
                            {
                                Console.WriteLine("fold {0} {1} iteration {2}", i, fold_results, it);
                            }
                        }
                    }
                    catch (Exception e)
                    {
                        Console.Error.WriteLine("===> ERROR: " + e.Message + e.StackTrace);
                        throw;
                    }
                });
                Console.WriteLine("{0} iteration {1}", new ItemRecommendationEvaluationResults(fold_results), it);
            }
        }
Exemplo n.º 29
0
    static void Main(string[] args)
    {
        AppDomain.CurrentDomain.UnhandledException += new UnhandledExceptionEventHandler(Handlers.UnhandledExceptionHandler);
        Console.CancelKeyPress += new ConsoleCancelEventHandler(AbortHandler);

        // recommender arguments
        string method              = null;
        string recommender_options = string.Empty;

        // help/version
        bool show_help    = false;
        bool show_version = false;

        // arguments for iteration search
        int max_iter   = 100;
        string measure = "RMSE";
        double epsilon = 0;
        double cutoff  = double.MaxValue;

        // other arguments
        bool search_hp             = false;
        int random_seed            = -1;
        string prediction_line     = "{0}\t{1}\t{2}";
        string prediction_header   = null;

        var p = new OptionSet() {
            // string-valued options
            { "training-file=",       v              => training_file        = v },
            { "test-file=",           v              => test_file            = v },
            { "recommender=",         v              => method               = v },
            { "recommender-options=", v              => recommender_options += " " + v },
            { "data-dir=",            v              => data_dir             = v },
            { "user-attributes=",     v              => user_attributes_file = v },
            { "item-attributes=",     v              => item_attributes_file = v },
            { "user-relations=",      v              => user_relations_file  = v },
            { "item-relations=",      v              => item_relations_file  = v },
            { "save-model=",          v              => save_model_file      = v },
            { "load-model=",          v              => load_model_file      = v },
            { "save-user-mapping=",   v              => save_user_mapping_file = v },
            { "save-item-mapping=",   v              => save_item_mapping_file = v },
            { "load-user-mapping=",   v              => load_user_mapping_file = v },
            { "load-item-mapping=",   v              => load_item_mapping_file = v },
            { "prediction-file=",     v              => prediction_file      = v },
            { "prediction-line=",     v              => prediction_line      = v },
            { "prediction-header=",   v              => prediction_header    = v },
            { "chronological-split=", v              => chronological_split  = v },
            { "measure=",             v              => measure              = v },
            // integer-valued options
            { "find-iter=",           (int v)        => find_iter            = v },
            { "max-iter=",            (int v)        => max_iter             = v },
            { "random-seed=",         (int v)        => random_seed          = v },
            { "cross-validation=",    (uint v)       => cross_validation     = v },
            // double-valued options
            { "epsilon=",             (double v)     => epsilon              = v },
            { "cutoff=",              (double v)     => cutoff               = v },
            { "test-ratio=",          (double v)     => test_ratio           = v },
            // enum options
            { "rating-type=",         (RatingType v) => rating_type          = v },
            { "file-format=",         (RatingFileFormat v) => file_format    = v },
            // boolean options
            { "compute-fit",          v => compute_fit       = v != null },
            { "online-evaluation",    v => online_eval       = v != null },
            { "show-fold-results",    v => show_fold_results = v != null },
            { "search-hp",            v => search_hp         = v != null },
            { "no-id-mapping",        v => no_id_mapping     = v != null },
            { "help",                 v => show_help         = v != null },
            { "version",              v => show_version      = v != null },
        };
        IList<string> extra_args = p.Parse(args);

        // ... some more command line parameter actions ...
        bool no_eval = true;
        if (test_ratio > 0 || test_file != null || chronological_split != null)
            no_eval = false;

        if (show_version)
            ShowVersion();
        if (show_help)
            Usage(0);

        if (random_seed != -1)
            MyMediaLite.Util.Random.Seed = random_seed;

        // set up recommender
        if (load_model_file != null)
            recommender = (RatingPredictor) Model.Load(load_model_file);
        else if (method != null)
            recommender = Recommender.CreateRatingPredictor(method);
        else
            recommender = Recommender.CreateRatingPredictor("BiasedMatrixFactorization");
        // in case something went wrong ...
        if (recommender == null && method != null)
            Usage(string.Format("Unknown rating prediction method: '{0}'", method));
        if (recommender == null && load_model_file != null)
            Abort(string.Format("Could not load model from file {0}.", load_model_file));

        CheckParameters(extra_args);

        recommender.Configure(recommender_options, (string m) => { Console.Error.WriteLine(m); Environment.Exit(-1); });

        // ID mapping objects
        if (file_format == RatingFileFormat.KDDCUP_2011 || no_id_mapping)
        {
            user_mapping = new IdentityMapping();
            item_mapping = new IdentityMapping();
        }
        if (load_user_mapping_file != null)
            user_mapping = EntityMappingExtensions.LoadMapping(load_user_mapping_file);
        if (load_item_mapping_file != null)
            item_mapping = EntityMappingExtensions.LoadMapping(load_item_mapping_file);

        // load all the data
        LoadData(!online_eval);

        // if requested, save ID mappings
        if (save_user_mapping_file != null)
            user_mapping.SaveMapping(save_user_mapping_file);
        if (save_item_mapping_file != null)
            item_mapping.SaveMapping(save_item_mapping_file);

        Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "ratings range: [{0}, {1}]", recommender.MinRating, recommender.MaxRating));

        if (test_ratio > 0)
        {
            var split = new RatingsSimpleSplit(training_data, test_ratio);
            recommender.Ratings = training_data = split.Train[0];
            test_data = split.Test[0];
            Console.Error.WriteLine(string.Format( CultureInfo.InvariantCulture, "test ratio {0}", test_ratio));
        }
        if (chronological_split != null)
        {
            var split = chronological_split_ratio != -1
                            ? new RatingsChronologicalSplit((ITimedRatings) training_data, chronological_split_ratio)
                            : new RatingsChronologicalSplit((ITimedRatings) training_data, chronological_split_time);
            recommender.Ratings = training_data = split.Train[0];
            test_data = split.Test[0];
            if (test_ratio != -1)
                Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "test ratio (chronological) {0}", chronological_split_ratio));
            else
                Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "split time {0}", chronological_split_time));
        }

        Console.Write(training_data.Statistics(test_data, user_attributes, item_attributes));

        if (find_iter != 0)
        {
            if ( !(recommender is IIterativeModel) )
                Abort("Only iterative recommenders (interface IIterativeModel) support --find-iter=N.");

            Console.WriteLine(recommender.ToString());

            if (cross_validation > 1)
            {
                recommender.DoIterativeCrossValidation(cross_validation, max_iter, find_iter);
            }
            else
            {
                var iterative_recommender = (IIterativeModel) recommender;
                var eval_stats = new List<double>();

                if (load_model_file == null)
                    recommender.Train();

                if (compute_fit)
                    Console.WriteLine("fit {0} iteration {1}", recommender.Evaluate(training_data), iterative_recommender.NumIter);

                Console.WriteLine("{0} iteration {1}", recommender.Evaluate(test_data), iterative_recommender.NumIter);

                for (int it = (int) iterative_recommender.NumIter + 1; it <= max_iter; it++)
                {
                    TimeSpan time = Wrap.MeasureTime(delegate() {
                        iterative_recommender.Iterate();
                    });
                    training_time_stats.Add(time.TotalSeconds);

                    if (it % find_iter == 0)
                    {
                        if (compute_fit)
                        {
                            time = Wrap.MeasureTime(delegate() {
                                Console.WriteLine("fit {0} iteration {1}", recommender.Evaluate(training_data), it);
                            });
                            fit_time_stats.Add(time.TotalSeconds);
                        }

                        RatingPredictionEvaluationResults results = null;
                        time = Wrap.MeasureTime(delegate() { results = recommender.Evaluate(test_data); });
                        eval_time_stats.Add(time.TotalSeconds);
                        eval_stats.Add(results[measure]);
                        Console.WriteLine("{0} iteration {1}", results, it);

                        Model.Save(recommender, save_model_file, it);
                        if (prediction_file != null)
                            recommender.WritePredictions(test_data, prediction_file + "-it-" + it, user_mapping, item_mapping, prediction_line, prediction_header);

                        if (epsilon > 0.0 && results[measure] - eval_stats.Min() > epsilon)
                        {
                            Console.Error.WriteLine(string.Format(CultureInfo.InvariantCulture, "{0} >> {1}", results["RMSE"], eval_stats.Min()));
                            Console.Error.WriteLine("Reached convergence on training/validation data after {0} iterations.", it);
                            break;
                        }
                        if (results[measure] > cutoff)
                        {
                            Console.Error.WriteLine("Reached cutoff after {0} iterations.", it);
                            break;
                        }
                    }
                } // for
            }
        }
        else
        {
            TimeSpan seconds;

            Console.Write(recommender + " ");

            if (load_model_file == null)
            {
                if (cross_validation > 1)
                {
                    Console.WriteLine();
                    var results = recommender.DoCrossValidation(cross_validation, compute_fit, show_fold_results);
                    Console.Write(results);
                    no_eval = true;
                }
                else
                {
                    if (search_hp)
                    {
                        double result = NelderMead.FindMinimum("RMSE", recommender);
                        Console.Error.WriteLine("estimated quality (on split) {0}", result.ToString(CultureInfo.InvariantCulture));
                    }

                    seconds = Wrap.MeasureTime( delegate() { recommender.Train(); } );
                    Console.Write(" training_time " + seconds + " ");
                }
            }

            if (!no_eval)
            {
                if (online_eval)
                    seconds = Wrap.MeasureTime(delegate() { Console.Write(recommender.EvaluateOnline(test_data)); });
                else
                    seconds = Wrap.MeasureTime(delegate() { Console.Write(recommender.Evaluate(test_data)); });

                Console.Write(" testing_time " + seconds);

                if (compute_fit)
                {
                    Console.Write("\nfit ");
                    seconds = Wrap.MeasureTime(delegate() {
                        Console.Write(recommender.Evaluate(training_data));
                    });
                    Console.Write(" fit_time " + seconds);
                }

                if (prediction_file != null)
                {
                    Console.WriteLine();
                    seconds = Wrap.MeasureTime(delegate() {
                        recommender.WritePredictions(test_data, prediction_file, user_mapping, item_mapping, prediction_line, prediction_header);
                    });
                    Console.Error.Write("prediction_time " + seconds);
                }
            }

            Console.WriteLine();
        }
        Model.Save(recommender, save_model_file);
        DisplayStats();
    }
        public void Evaluate(Matrix <int> ratingsMatrix, Matrix <float> weightsMatrix, Matrix <int> maxRatings, RatingPredictor ratingPredictor)
        {
            int  testRatings      = 0;
            long hits             = 0;
            int  ratings          = 0;
            int  ratingsPredicted = 0;
            int  itemsPredicted   = 0;

            // First type parameter is the type of the source elements
            // Second type parameter is the type of the thread-local variable (partition subtotal)

            Parallel.ForEach(maxRatings.Rows,                                   // source collection
                             () => new Tuple <long, int, int, int>(0, 0, 0, 0), // method to initialize the local variable
                             (user, loop, tuple) =>                             // method invoked by the loop on each iteration
            {
                long localHits            = 0;
                int localTestRatings      = 0;
                int localRatingsPredicted = 0;
                int localItemsPredicted   = 0;
                foreach (int item in maxRatings[user])
                {
                    List <KeyValuePair <int, double> > predictedRatings = PredictRatings(ratingsMatrix, weightsMatrix, ratingPredictor, user, item);

                    for (int i = 0; i < this.topK && i < predictedRatings.Count; i++)
                    {
                        if (predictedRatings[i].Key == item && predictedRatings[i].Value != 0)
                        {
                            localHits++;
                        }
                    }

                    foreach (KeyValuePair <int, double> pair in predictedRatings)
                    {
                        if (pair.Key == item && pair.Value != 0)
                        {
                            localItemsPredicted++;
                        }
                    }

                    localTestRatings++;
                }

                return(new Tuple <long, int, int, int>(tuple.Item1 + localHits, tuple.Item2 + localTestRatings, tuple.Item3 + localRatingsPredicted, tuple.Item4 + localItemsPredicted)); // value to be passed to next iteration
            },
                                                                                                                                                                                          // Method to be executed when each partition has completed.
                                                                                                                                                                                          // finalResult is the final value of subtotal for a particular partition.
                             (pair) =>
            {
                Interlocked.Add(ref hits, pair.Item1);
                Interlocked.Add(ref testRatings, pair.Item2);
                Interlocked.Add(ref ratings, 101 * pair.Item2);
                Interlocked.Add(ref ratingsPredicted, pair.Item3);
                Interlocked.Add(ref itemsPredicted, pair.Item4);
            }
                             );


            //foreach (int user in maxRatings.Rows)
            //{
            //	Parallel.ForEach(maxRatings.GetRow(user), // source collection
            //							() => 0, // method to initialize the local variable
            //							(userRatings, loop, localHits) => // method invoked by the loop on each iteration
            //							{
            //								List<KeyValuePair<int, double>> predictedRatings = PredictRatings(ratingsMatrix, weightsMatrix, ratingPredictor, user, userRatings.Key);

            //								for (int i = 0; i < this.topK; i++)
            //									if (predictedRatings[i].Key == userRatings.Key)
            //										localHits++;

            //								return localHits; // value to be passed to next iteration
            //							},
            //							// Method to be executed when each partition has completed.
            //							// finalResult is the final value of subtotal for a particular partition.
            //							(finalResult) => Interlocked.Add(ref hits, finalResult)
            //							);
            //}

            RatingsPredicted = ratingsPredicted;
            TotalRatings     = ratings;
            Coverage         = RatingsPredicted / TotalRatings;
            ItemsPredicted   = itemsPredicted;
            Recall           = (double)hits / testRatings;
            Precision        = Recall / topK;
        }
Exemplo n.º 31
0
    private void CreateRecommender()
    {
        BiasedMatrixFactorization recommender = new BiasedMatrixFactorization();

        Console.Error.Write("Reading in ratings ... ");
        TimeSpan time = Utils.MeasureTime(delegate() {
            recommender.Ratings = RatingPrediction.Read(ratings_file, user_mapping, item_mapping);
        });
        Console.Error.WriteLine("done ({0,0:0.##}).", time.TotalSeconds.ToString(CultureInfo.InvariantCulture));

        //Console.Error.Write("Reading in additional ratings ... ");
        //string[] rating_files = Directory.GetFiles("../../saved_data/", "user-ratings-*");
        //Console.Error.WriteLine("done.");

        foreach (var indices_for_item in recommender.Ratings.ByItem)
            if (indices_for_item.Count > 0)
                movies_by_frequency.Add( new WeightedItem(recommender.Ratings.Items[indices_for_item[0]], indices_for_item.Count) );
        movies_by_frequency.Sort();
        movies_by_frequency.Reverse();
        for (int i = 0; i < n_movies; i++)
            top_n_movies.Add( movies_by_frequency[i].item_id );

        Console.Error.Write("Loading prediction model ... ");
        recommender.UpdateUsers = true;
        recommender.UpdateItems = false;
        recommender.BiasReg = 0.001;
        recommender.Regularization = 0.045;
        recommender.NumIter = 60;
        time = Utils.MeasureTime(delegate() {
            recommender.LoadModel(model_file);
        });
        Console.Error.WriteLine("done ({0,0:0.##}).", time.TotalSeconds.ToString(CultureInfo.InvariantCulture));

        rating_predictor = recommender;

        current_user_id = user_mapping.ToInternalID(current_user_external_id);
        //rating_predictor.AddUser(current_user_id);

        // add movies that were not in the training set
        //rating_predictor.AddItem( item_mapping.InternalIDs.Count - 1 );

        PredictAllRatings();
    }