Matrix factorization with explicit user and item bias, learning is performed by stochastic gradient descent

Per default optimizes for RMSE. Alternatively, you can set the Loss property to MAE or LogisticLoss. If set to log likelihood and with binary ratings, the recommender implements a simple version Menon and Elkan's LFL model, which predicts binary labels, has no advanced regularization, and uses no side information.

This recommender makes use of multi-core machines if requested. Just set MaxThreads to a large enough number (usually multiples of the number of available cores). The parallelization is based on ideas presented in the paper by Gemulla et al.

Literature: Ruslan Salakhutdinov, Andriy Mnih: Probabilistic Matrix Factorization. NIPS 2007. http://www.mit.edu/~rsalakhu/papers/nips07_pmf.pdf Steffen Rendle, Lars Schmidt-Thieme: Online-Updating Regularized Kernel Matrix Factorization Models for Large-Scale Recommender Systems. RecSys 2008. http://www.ismll.uni-hildesheim.de/pub/pdfs/Rendle2008-Online_Updating_Regularized_Kernel_Matrix_Factorization_Models.pdf Aditya Krishna Menon, Charles Elkan: A log-linear model with latent features for dyadic prediction. ICDM 2010. http://cseweb.ucsd.edu/~akmenon/LFL-ICDM10.pdf Rainer Gemulla, Peter J. Haas, Erik Nijkamp, Yannis Sismanis: Large-Scale Matrix Factorization with Distributed Stochastic Gradient Descent. KDD 2011. http://www.mpi-inf.mpg.de/~rgemulla/publications/gemulla11dsgd.pdf

This recommender supports incremental updates. See the paper by Rendle and Schmidt-Thieme.

Наследование: MatrixFactorization
        public void TestCurrentLearnRate()
        {
            var mf = new BiasedMatrixFactorization() { LearnRate = 1.1f, Ratings = TestUtils.CreateRatings() };

            mf.InitModel();
            Assert.AreEqual(1.1f, mf.LearnRate);
            Assert.AreEqual(1.1f, mf.current_learnrate);
        }
		public void TestMatrixInit()
		{
			var mf = new BiasedMatrixFactorization() { Ratings = TestUtils.CreateRatings() };
			mf.InitModel();
			Assert.IsNotNull(mf.user_factors);
			Assert.IsNotNull(mf.item_factors);
			Assert.IsNotNull(mf.user_bias);
			Assert.IsNotNull(mf.item_bias);
		}
 public void TestFoldIn()
 {
     var mf = new BiasedMatrixFactorization() { Ratings = TestUtils.CreateRatings() };
     mf.Train();
     var user_ratings = new List<Tuple<int, float>>();
     user_ratings.Add(new Tuple<int, float>(0, 4.0f));
     var candidate_items = new List<int> { 0, 1 }; // have a known and an unknown item
     var results = mf.ScoreItems(user_ratings, candidate_items);
     Assert.AreEqual(2, results.Count);
 }
        public void TestDecay()
        {
            var mf = new BiasedMatrixFactorization()
            {
                LearnRate = 1.0f, LearnRateDecay = 0.5f,
                NumIter = 1, Ratings = TestUtils.CreateRatings()
            };

            mf.Train();
            Assert.AreEqual(0.5f, mf.current_learnrate);

            mf.Iterate();
            Assert.AreEqual(0.25f, mf.current_learnrate);
        }
Пример #5
0
	public static void Main(string[] args)
	{
		// load the data
		var training_data = RatingData.Read(args[0]);
		var test_data = RatingData.Read(args[1]);

		// set up the recommender
		var recommender = new UserItemBaseline();
		recommender.Ratings = training_data;
		recommender.Train();

		// measure the accuracy on the test data set
		var results = recommender.Evaluate(test_data);
		Console.WriteLine("RMSE={0} MAE={1}", results["RMSE"], results["MAE"]);
		Console.WriteLine(results);

		// make a prediction for a certain user and item
		Console.WriteLine(recommender.Predict(1, 1));
		
		var bmf = new BiasedMatrixFactorization {Ratings = training_data};
		Console.WriteLine(bmf.DoCrossValidation());
	}
Пример #6
0
        static void Main(string[] args)
        {
            var t = new Stopwatch();

            //Utils.Converter(@"D:\__testdata\lab10\train.csv");
            //return;

            t.Restart();
            Console.WriteLine("RatingData.Read Begin");
            var trainingData = RatingData.Read(TRAIN_DATA_FILE_NAME);
            Console.WriteLine("RatingData.Read End {0}", t.Elapsed.TotalSeconds);

            //t.Restart();
            //Console.WriteLine("RatingData.Read Begin");
            //var evalData = RatingData.Read(EVAL_DATA_FILE_NAME);
            //Console.WriteLine("RatingData.Read End {0}", t.Elapsed.TotalSeconds);

            //http://www.mymedialite.net/examples/datasets.html
            // num_factors=120
            // bias_reg=0.001
            // regularization=0.055
            // learn_rate=0.07
            // num_iter=100
            // bold_driver=true
            var recommender = new BiasedMatrixFactorization
            {
                Ratings = trainingData,
                NumIter = 10,
                MaxRating = 5,
                MinRating = 1,
            };

            t.Restart();
            Console.WriteLine("Training begin");
            recommender.Train();
            Console.WriteLine("Training end {0}", t.Elapsed.TotalSeconds);

            //t.Restart();
            //Console.WriteLine("Evaluate begin");
            //var resultsb = recommender.Evaluate(evalData);
            //Console.WriteLine("Evaluate end {0}", t.Elapsed.TotalSeconds);

            Console.WriteLine(recommender.DoCrossValidation());

            //Console.WriteLine("RMSE={0} MAE={1}", resultsb["RMSE"], resultsb["MAE"]);

            t.Restart();
            Console.WriteLine("Result Begin");

            var users = recommender.Ratings.AllUsers.ToArray();
            var items = recommender.Ratings.AllItems.ToArray();
            var ratingAvg = recommender.Ratings.Average;

            using(var testIn = new StreamReader(TEST_FILE_NAME))
            using (var resultOut = new StreamWriter(RESULTS_FILE_NAME))
            {

                var count = 0;
                string line;

                while ((line = testIn.ReadLine()) != null)
                {
                    if (count > 0)
                    {
                        var parts = line.Split(new[] {","}, StringSplitOptions.RemoveEmptyEntries);
                        var userId = int.Parse(parts[0]);
                        var itemId = int.Parse(parts[1]);

                        var rating = !users.Contains(userId) || !items.Contains(itemId)
                            ? ratingAvg
                            : recommender.Predict(userId, itemId);

                        resultOut.WriteLine("{0}, {1}, {2}", userId, itemId, rating);
                    }

                    if (count%10000 == 0)
                    {
                        resultOut.Flush();
                        Console.Write(".");
                    }
                    count++;
                }

                resultOut.Close();
                testIn.Close();
            }

            Console.WriteLine();
            Console.WriteLine("Result End {0}", t.Elapsed.TotalSeconds);

            t.Stop();

            Console.WriteLine("End");
            Console.ReadLine();
        }
Пример #7
0
    private void CreateRecommender()
    {
        BiasedMatrixFactorization recommender = new BiasedMatrixFactorization();

        Console.Error.Write("Reading in ratings ... ");
        TimeSpan time = Wrap.MeasureTime(delegate() {
            recommender.Ratings = RatingData.Read(ratings_file, user_mapping, item_mapping);
        });
        Console.Error.WriteLine("done ({0,0:0.##}).", time.TotalSeconds.ToString(CultureInfo.InvariantCulture));

        //Console.Error.Write("Reading in additional ratings ... ");
        //string[] rating_files = Directory.GetFiles("../../saved_data/", "user-ratings-*");
        //Console.Error.WriteLine("done.");

        foreach (var indices_for_item in recommender.Ratings.ByItem)
            if (indices_for_item.Count > 0)
                movies_by_frequency.Add(new Tuple<int, float>(recommender.Ratings.Items[indices_for_item[0]], indices_for_item.Count));
        movies_by_frequency = movies_by_frequency.OrderByDescending(x => x.Item2).ToList();
        for (int i = 0; i < n_movies; i++)
            top_n_movies.Add( movies_by_frequency[i].Item1 );

        Console.Error.Write("Loading prediction model ... ");
        recommender.UpdateUsers = true;
        recommender.UpdateItems = false;
        recommender.BiasReg = 0.001f;
        recommender.Regularization = 0.045f;
        recommender.NumIter = 60;
        time = Wrap.MeasureTime(delegate() {
            recommender.LoadModel(model_file);
        });
        Console.Error.WriteLine("done ({0,0:0.##}).", time.TotalSeconds.ToString(CultureInfo.InvariantCulture));

        rating_predictor = recommender;

        current_user_id = user_mapping.ToInternalID(current_user_external_id.ToString());
        //rating_predictor.AddUser(current_user_id);

        // add movies that were not in the training set
        //rating_predictor.AddItem( item_mapping.InternalIDs.Count - 1 );

        PredictAllRatings();
    }
 public void TestDefaultBehaviorIsNoDecay()
 {
     var mf = new BiasedMatrixFactorization() { LearnRate = 1.1f, NumIter = 10, Ratings = TestUtils.CreateRatings() };
     mf.Train();
     Assert.AreEqual(1.1f, mf.current_learnrate);
 }
		public void TestIncrementalUpdates()
		{
			var mf = new BiasedMatrixFactorization() { Ratings = TestUtils.CreateRatings(), FrequencyRegularization = true };
			mf.Train();

			var new_ratings = new Ratings();
			new_ratings.Add(mf.MaxUserID + 1, mf.MaxItemID + 1, 3f);
			mf.AddRatings(new_ratings);
		}
Пример #10
0
        /// <summary>
        /// Predict the rating of the item by users
        /// </summary>
        private static void PredictRating(string[] args)
        {
            Console.WriteLine("Predicting ratings for Users...");
            String outputFile = Path.Combine(args[2], outputFileForRatings);
            testingFile = Path.Combine(args[1], testingFile);
            testingFileModified = Path.Combine(args[2], testingFileModified);
            ModifyTestingFileForRating(testingFile, testingFileModified);

            float minRating = 1;
            float maxRating = 5;

            var userMapping = new Mapping();
            var itemMapping = new Mapping();
            var trainingData = StaticRatingData.Read(trainingCompactFileForRatings, userMapping, itemMapping, RatingType.FLOAT, TestRatingFileFormat.WITH_RATINGS, false);
            var testUsers = trainingData.AllUsers; // users that will be taken into account in the evaluation
            var candidate_items = trainingData.AllItems; // items that will be taken into account in the evaluation
            var testData = StaticRatingData.Read(testingFileModified, userMapping, itemMapping, RatingType.FLOAT, TestRatingFileFormat.WITH_RATINGS, false);

            var recommender = new BiasedMatrixFactorization();
            recommender.MinRating = minRating;
            recommender.MaxRating = maxRating;
            recommender.Ratings = trainingData;

            recommender.NumFactors = 30;
            recommender.NumIter = 100;
            recommender.RegI = 0.04F;
            recommender.RegU = 0.04F;
            //recommender.BiasReg = 0.09F;
            recommender.FrequencyRegularization = true;
            recommender.BoldDriver = true;
            recommender.LearnRate = 0.07F;

            Stopwatch timeKeeper = new Stopwatch();
            timeKeeper.Start();
            recommender.Train();
            timeKeeper.Stop();
            Console.WriteLine("time passed for training rating prediction model: " + timeKeeper.ElapsedMilliseconds);
            // measure the accuracy on the test data set

            timeKeeper = new Stopwatch();
            timeKeeper.Start();
            var results = recommender.Evaluate(testData);
            timeKeeper.Stop();
            Console.WriteLine("time passed for rating prediction: " + timeKeeper.ElapsedMilliseconds);
            Console.WriteLine("RMSE={0}", results["RMSE"]);

            recommender.WritePredictions(testData, outputFile, userMapping, itemMapping, "{0}-{1},{2}", "userID-itemID,rating");
        }